1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. |
4 | * Author: Joerg Roedel <jroedel@suse.de> |
5 | * Leo Duran <leo.duran@amd.com> |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) "AMD-Vi: " fmt |
9 | #define dev_fmt(fmt) pr_fmt(fmt) |
10 | |
11 | #include <linux/ratelimit.h> |
12 | #include <linux/pci.h> |
13 | #include <linux/acpi.h> |
14 | #include <linux/pci-ats.h> |
15 | #include <linux/bitmap.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/debugfs.h> |
18 | #include <linux/scatterlist.h> |
19 | #include <linux/dma-map-ops.h> |
20 | #include <linux/dma-direct.h> |
21 | #include <linux/iommu-helper.h> |
22 | #include <linux/delay.h> |
23 | #include <linux/amd-iommu.h> |
24 | #include <linux/notifier.h> |
25 | #include <linux/export.h> |
26 | #include <linux/irq.h> |
27 | #include <linux/msi.h> |
28 | #include <linux/irqdomain.h> |
29 | #include <linux/percpu.h> |
30 | #include <linux/io-pgtable.h> |
31 | #include <linux/cc_platform.h> |
32 | #include <asm/irq_remapping.h> |
33 | #include <asm/io_apic.h> |
34 | #include <asm/apic.h> |
35 | #include <asm/hw_irq.h> |
36 | #include <asm/proto.h> |
37 | #include <asm/iommu.h> |
38 | #include <asm/gart.h> |
39 | #include <asm/dma.h> |
40 | #include <uapi/linux/iommufd.h> |
41 | |
42 | #include "amd_iommu.h" |
43 | #include "../dma-iommu.h" |
44 | #include "../irq_remapping.h" |
45 | |
46 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
47 | |
48 | /* Reserved IOVA ranges */ |
49 | #define MSI_RANGE_START (0xfee00000) |
50 | #define MSI_RANGE_END (0xfeefffff) |
51 | #define HT_RANGE_START (0xfd00000000ULL) |
52 | #define HT_RANGE_END (0xffffffffffULL) |
53 | |
54 | #define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL |
55 | |
56 | static DEFINE_SPINLOCK(pd_bitmap_lock); |
57 | |
58 | LIST_HEAD(ioapic_map); |
59 | LIST_HEAD(hpet_map); |
60 | LIST_HEAD(acpihid_map); |
61 | |
62 | const struct iommu_ops amd_iommu_ops; |
63 | static const struct iommu_dirty_ops amd_dirty_ops; |
64 | |
65 | int amd_iommu_max_glx_val = -1; |
66 | |
67 | /* |
68 | * general struct to manage commands send to an IOMMU |
69 | */ |
70 | struct iommu_cmd { |
71 | u32 data[4]; |
72 | }; |
73 | |
74 | struct kmem_cache *amd_iommu_irq_cache; |
75 | |
76 | static void detach_device(struct device *dev); |
77 | |
78 | static void set_dte_entry(struct amd_iommu *iommu, |
79 | struct iommu_dev_data *dev_data); |
80 | |
81 | /**************************************************************************** |
82 | * |
83 | * Helper functions |
84 | * |
85 | ****************************************************************************/ |
86 | |
87 | static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom) |
88 | { |
89 | return (pdom && (pdom->pd_mode == PD_MODE_V2)); |
90 | } |
91 | |
92 | static inline int get_acpihid_device_id(struct device *dev, |
93 | struct acpihid_map_entry **entry) |
94 | { |
95 | struct acpi_device *adev = ACPI_COMPANION(dev); |
96 | struct acpihid_map_entry *p; |
97 | |
98 | if (!adev) |
99 | return -ENODEV; |
100 | |
101 | list_for_each_entry(p, &acpihid_map, list) { |
102 | if (acpi_dev_hid_uid_match(adev, p->hid, |
103 | p->uid[0] ? p->uid : NULL)) { |
104 | if (entry) |
105 | *entry = p; |
106 | return p->devid; |
107 | } |
108 | } |
109 | return -EINVAL; |
110 | } |
111 | |
112 | static inline int get_device_sbdf_id(struct device *dev) |
113 | { |
114 | int sbdf; |
115 | |
116 | if (dev_is_pci(dev)) |
117 | sbdf = get_pci_sbdf_id(to_pci_dev(dev)); |
118 | else |
119 | sbdf = get_acpihid_device_id(dev, NULL); |
120 | |
121 | return sbdf; |
122 | } |
123 | |
124 | struct dev_table_entry *get_dev_table(struct amd_iommu *iommu) |
125 | { |
126 | struct dev_table_entry *dev_table; |
127 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
128 | |
129 | BUG_ON(pci_seg == NULL); |
130 | dev_table = pci_seg->dev_table; |
131 | BUG_ON(dev_table == NULL); |
132 | |
133 | return dev_table; |
134 | } |
135 | |
136 | static inline u16 get_device_segment(struct device *dev) |
137 | { |
138 | u16 seg; |
139 | |
140 | if (dev_is_pci(dev)) { |
141 | struct pci_dev *pdev = to_pci_dev(dev); |
142 | |
143 | seg = pci_domain_nr(bus: pdev->bus); |
144 | } else { |
145 | u32 devid = get_acpihid_device_id(dev, NULL); |
146 | |
147 | seg = PCI_SBDF_TO_SEGID(devid); |
148 | } |
149 | |
150 | return seg; |
151 | } |
152 | |
153 | /* Writes the specific IOMMU for a device into the PCI segment rlookup table */ |
154 | void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid) |
155 | { |
156 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
157 | |
158 | pci_seg->rlookup_table[devid] = iommu; |
159 | } |
160 | |
161 | static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid) |
162 | { |
163 | struct amd_iommu_pci_seg *pci_seg; |
164 | |
165 | for_each_pci_segment(pci_seg) { |
166 | if (pci_seg->id == seg) |
167 | return pci_seg->rlookup_table[devid]; |
168 | } |
169 | return NULL; |
170 | } |
171 | |
172 | static struct amd_iommu *rlookup_amd_iommu(struct device *dev) |
173 | { |
174 | u16 seg = get_device_segment(dev); |
175 | int devid = get_device_sbdf_id(dev); |
176 | |
177 | if (devid < 0) |
178 | return NULL; |
179 | return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid)); |
180 | } |
181 | |
182 | static struct protection_domain *to_pdomain(struct iommu_domain *dom) |
183 | { |
184 | return container_of(dom, struct protection_domain, domain); |
185 | } |
186 | |
187 | static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) |
188 | { |
189 | struct iommu_dev_data *dev_data; |
190 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
191 | |
192 | dev_data = kzalloc(size: sizeof(*dev_data), GFP_KERNEL); |
193 | if (!dev_data) |
194 | return NULL; |
195 | |
196 | spin_lock_init(&dev_data->lock); |
197 | dev_data->devid = devid; |
198 | ratelimit_default_init(rs: &dev_data->rs); |
199 | |
200 | llist_add(new: &dev_data->dev_data_list, head: &pci_seg->dev_data_list); |
201 | return dev_data; |
202 | } |
203 | |
204 | static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) |
205 | { |
206 | struct iommu_dev_data *dev_data; |
207 | struct llist_node *node; |
208 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
209 | |
210 | if (llist_empty(head: &pci_seg->dev_data_list)) |
211 | return NULL; |
212 | |
213 | node = pci_seg->dev_data_list.first; |
214 | llist_for_each_entry(dev_data, node, dev_data_list) { |
215 | if (dev_data->devid == devid) |
216 | return dev_data; |
217 | } |
218 | |
219 | return NULL; |
220 | } |
221 | |
222 | static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) |
223 | { |
224 | struct amd_iommu *iommu; |
225 | struct dev_table_entry *dev_table; |
226 | u16 devid = pci_dev_id(dev: pdev); |
227 | |
228 | if (devid == alias) |
229 | return 0; |
230 | |
231 | iommu = rlookup_amd_iommu(dev: &pdev->dev); |
232 | if (!iommu) |
233 | return 0; |
234 | |
235 | amd_iommu_set_rlookup_table(iommu, devid: alias); |
236 | dev_table = get_dev_table(iommu); |
237 | memcpy(dev_table[alias].data, |
238 | dev_table[devid].data, |
239 | sizeof(dev_table[alias].data)); |
240 | |
241 | return 0; |
242 | } |
243 | |
244 | static void clone_aliases(struct amd_iommu *iommu, struct device *dev) |
245 | { |
246 | struct pci_dev *pdev; |
247 | |
248 | if (!dev_is_pci(dev)) |
249 | return; |
250 | pdev = to_pci_dev(dev); |
251 | |
252 | /* |
253 | * The IVRS alias stored in the alias table may not be |
254 | * part of the PCI DMA aliases if it's bus differs |
255 | * from the original device. |
256 | */ |
257 | clone_alias(pdev, alias: iommu->pci_seg->alias_table[pci_dev_id(dev: pdev)], NULL); |
258 | |
259 | pci_for_each_dma_alias(pdev, fn: clone_alias, NULL); |
260 | } |
261 | |
262 | static void setup_aliases(struct amd_iommu *iommu, struct device *dev) |
263 | { |
264 | struct pci_dev *pdev = to_pci_dev(dev); |
265 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
266 | u16 ivrs_alias; |
267 | |
268 | /* For ACPI HID devices, there are no aliases */ |
269 | if (!dev_is_pci(dev)) |
270 | return; |
271 | |
272 | /* |
273 | * Add the IVRS alias to the pci aliases if it is on the same |
274 | * bus. The IVRS table may know about a quirk that we don't. |
275 | */ |
276 | ivrs_alias = pci_seg->alias_table[pci_dev_id(dev: pdev)]; |
277 | if (ivrs_alias != pci_dev_id(dev: pdev) && |
278 | PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) |
279 | pci_add_dma_alias(dev: pdev, devfn_from: ivrs_alias & 0xff, nr_devfns: 1); |
280 | |
281 | clone_aliases(iommu, dev); |
282 | } |
283 | |
284 | static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid) |
285 | { |
286 | struct iommu_dev_data *dev_data; |
287 | |
288 | dev_data = search_dev_data(iommu, devid); |
289 | |
290 | if (dev_data == NULL) { |
291 | dev_data = alloc_dev_data(iommu, devid); |
292 | if (!dev_data) |
293 | return NULL; |
294 | |
295 | if (translation_pre_enabled(iommu)) |
296 | dev_data->defer_attach = true; |
297 | } |
298 | |
299 | return dev_data; |
300 | } |
301 | |
302 | /* |
303 | * Find or create an IOMMU group for a acpihid device. |
304 | */ |
305 | static struct iommu_group *acpihid_device_group(struct device *dev) |
306 | { |
307 | struct acpihid_map_entry *p, *entry = NULL; |
308 | int devid; |
309 | |
310 | devid = get_acpihid_device_id(dev, entry: &entry); |
311 | if (devid < 0) |
312 | return ERR_PTR(error: devid); |
313 | |
314 | list_for_each_entry(p, &acpihid_map, list) { |
315 | if ((devid == p->devid) && p->group) |
316 | entry->group = p->group; |
317 | } |
318 | |
319 | if (!entry->group) |
320 | entry->group = generic_device_group(dev); |
321 | else |
322 | iommu_group_ref_get(group: entry->group); |
323 | |
324 | return entry->group; |
325 | } |
326 | |
327 | static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data) |
328 | { |
329 | return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP); |
330 | } |
331 | |
332 | static u32 pdev_get_caps(struct pci_dev *pdev) |
333 | { |
334 | int features; |
335 | u32 flags = 0; |
336 | |
337 | if (pci_ats_supported(dev: pdev)) |
338 | flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; |
339 | |
340 | if (pci_pri_supported(pdev)) |
341 | flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; |
342 | |
343 | features = pci_pasid_features(pdev); |
344 | if (features >= 0) { |
345 | flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; |
346 | |
347 | if (features & PCI_PASID_CAP_EXEC) |
348 | flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; |
349 | |
350 | if (features & PCI_PASID_CAP_PRIV) |
351 | flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; |
352 | } |
353 | |
354 | return flags; |
355 | } |
356 | |
357 | static inline int pdev_enable_cap_ats(struct pci_dev *pdev) |
358 | { |
359 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
360 | int ret = -EINVAL; |
361 | |
362 | if (dev_data->ats_enabled) |
363 | return 0; |
364 | |
365 | if (amd_iommu_iotlb_sup && |
366 | (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) { |
367 | ret = pci_enable_ats(dev: pdev, PAGE_SHIFT); |
368 | if (!ret) { |
369 | dev_data->ats_enabled = 1; |
370 | dev_data->ats_qdep = pci_ats_queue_depth(dev: pdev); |
371 | } |
372 | } |
373 | |
374 | return ret; |
375 | } |
376 | |
377 | static inline void pdev_disable_cap_ats(struct pci_dev *pdev) |
378 | { |
379 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
380 | |
381 | if (dev_data->ats_enabled) { |
382 | pci_disable_ats(dev: pdev); |
383 | dev_data->ats_enabled = 0; |
384 | } |
385 | } |
386 | |
387 | int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) |
388 | { |
389 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
390 | int ret = -EINVAL; |
391 | |
392 | if (dev_data->pri_enabled) |
393 | return 0; |
394 | |
395 | if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) { |
396 | /* |
397 | * First reset the PRI state of the device. |
398 | * FIXME: Hardcode number of outstanding requests for now |
399 | */ |
400 | if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, reqs: 32)) { |
401 | dev_data->pri_enabled = 1; |
402 | dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); |
403 | |
404 | ret = 0; |
405 | } |
406 | } |
407 | |
408 | return ret; |
409 | } |
410 | |
411 | void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev) |
412 | { |
413 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
414 | |
415 | if (dev_data->pri_enabled) { |
416 | pci_disable_pri(pdev); |
417 | dev_data->pri_enabled = 0; |
418 | } |
419 | } |
420 | |
421 | static inline int pdev_enable_cap_pasid(struct pci_dev *pdev) |
422 | { |
423 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
424 | int ret = -EINVAL; |
425 | |
426 | if (dev_data->pasid_enabled) |
427 | return 0; |
428 | |
429 | if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) { |
430 | /* Only allow access to user-accessible pages */ |
431 | ret = pci_enable_pasid(pdev, features: 0); |
432 | if (!ret) |
433 | dev_data->pasid_enabled = 1; |
434 | } |
435 | |
436 | return ret; |
437 | } |
438 | |
439 | static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) |
440 | { |
441 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
442 | |
443 | if (dev_data->pasid_enabled) { |
444 | pci_disable_pasid(pdev); |
445 | dev_data->pasid_enabled = 0; |
446 | } |
447 | } |
448 | |
449 | static void pdev_enable_caps(struct pci_dev *pdev) |
450 | { |
451 | pdev_enable_cap_ats(pdev); |
452 | pdev_enable_cap_pasid(pdev); |
453 | amd_iommu_pdev_enable_cap_pri(pdev); |
454 | |
455 | } |
456 | |
457 | static void pdev_disable_caps(struct pci_dev *pdev) |
458 | { |
459 | pdev_disable_cap_ats(pdev); |
460 | pdev_disable_cap_pasid(pdev); |
461 | amd_iommu_pdev_disable_cap_pri(pdev); |
462 | } |
463 | |
464 | /* |
465 | * This function checks if the driver got a valid device from the caller to |
466 | * avoid dereferencing invalid pointers. |
467 | */ |
468 | static bool check_device(struct device *dev) |
469 | { |
470 | struct amd_iommu_pci_seg *pci_seg; |
471 | struct amd_iommu *iommu; |
472 | int devid, sbdf; |
473 | |
474 | if (!dev) |
475 | return false; |
476 | |
477 | sbdf = get_device_sbdf_id(dev); |
478 | if (sbdf < 0) |
479 | return false; |
480 | devid = PCI_SBDF_TO_DEVID(sbdf); |
481 | |
482 | iommu = rlookup_amd_iommu(dev); |
483 | if (!iommu) |
484 | return false; |
485 | |
486 | /* Out of our scope? */ |
487 | pci_seg = iommu->pci_seg; |
488 | if (devid > pci_seg->last_bdf) |
489 | return false; |
490 | |
491 | return true; |
492 | } |
493 | |
494 | static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) |
495 | { |
496 | struct iommu_dev_data *dev_data; |
497 | int devid, sbdf; |
498 | |
499 | if (dev_iommu_priv_get(dev)) |
500 | return 0; |
501 | |
502 | sbdf = get_device_sbdf_id(dev); |
503 | if (sbdf < 0) |
504 | return sbdf; |
505 | |
506 | devid = PCI_SBDF_TO_DEVID(sbdf); |
507 | dev_data = find_dev_data(iommu, devid); |
508 | if (!dev_data) |
509 | return -ENOMEM; |
510 | |
511 | dev_data->dev = dev; |
512 | setup_aliases(iommu, dev); |
513 | |
514 | /* |
515 | * By default we use passthrough mode for IOMMUv2 capable device. |
516 | * But if amd_iommu=force_isolation is set (e.g. to debug DMA to |
517 | * invalid address), we ignore the capability for the device so |
518 | * it'll be forced to go into translation mode. |
519 | */ |
520 | if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && |
521 | dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) { |
522 | dev_data->flags = pdev_get_caps(to_pci_dev(dev)); |
523 | } |
524 | |
525 | dev_iommu_priv_set(dev, priv: dev_data); |
526 | |
527 | return 0; |
528 | } |
529 | |
530 | static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) |
531 | { |
532 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
533 | struct dev_table_entry *dev_table = get_dev_table(iommu); |
534 | int devid, sbdf; |
535 | |
536 | sbdf = get_device_sbdf_id(dev); |
537 | if (sbdf < 0) |
538 | return; |
539 | |
540 | devid = PCI_SBDF_TO_DEVID(sbdf); |
541 | pci_seg->rlookup_table[devid] = NULL; |
542 | memset(&dev_table[devid], 0, sizeof(struct dev_table_entry)); |
543 | |
544 | setup_aliases(iommu, dev); |
545 | } |
546 | |
547 | static void amd_iommu_uninit_device(struct device *dev) |
548 | { |
549 | struct iommu_dev_data *dev_data; |
550 | |
551 | dev_data = dev_iommu_priv_get(dev); |
552 | if (!dev_data) |
553 | return; |
554 | |
555 | if (dev_data->domain) |
556 | detach_device(dev); |
557 | |
558 | /* |
559 | * We keep dev_data around for unplugged devices and reuse it when the |
560 | * device is re-plugged - not doing so would introduce a ton of races. |
561 | */ |
562 | } |
563 | |
564 | /**************************************************************************** |
565 | * |
566 | * Interrupt handling functions |
567 | * |
568 | ****************************************************************************/ |
569 | |
570 | static void dump_dte_entry(struct amd_iommu *iommu, u16 devid) |
571 | { |
572 | int i; |
573 | struct dev_table_entry *dev_table = get_dev_table(iommu); |
574 | |
575 | for (i = 0; i < 4; ++i) |
576 | pr_err("DTE[%d]: %016llx\n" , i, dev_table[devid].data[i]); |
577 | } |
578 | |
579 | static void dump_command(unsigned long phys_addr) |
580 | { |
581 | struct iommu_cmd *cmd = iommu_phys_to_virt(paddr: phys_addr); |
582 | int i; |
583 | |
584 | for (i = 0; i < 4; ++i) |
585 | pr_err("CMD[%d]: %08x\n" , i, cmd->data[i]); |
586 | } |
587 | |
588 | static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 *event) |
589 | { |
590 | struct iommu_dev_data *dev_data = NULL; |
591 | int devid, vmg_tag, flags; |
592 | struct pci_dev *pdev; |
593 | u64 spa; |
594 | |
595 | devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; |
596 | vmg_tag = (event[1]) & 0xFFFF; |
597 | flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
598 | spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8); |
599 | |
600 | pdev = pci_get_domain_bus_and_slot(domain: iommu->pci_seg->id, PCI_BUS_NUM(devid), |
601 | devfn: devid & 0xff); |
602 | if (pdev) |
603 | dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
604 | |
605 | if (dev_data) { |
606 | if (__ratelimit(&dev_data->rs)) { |
607 | pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n" , |
608 | vmg_tag, spa, flags); |
609 | } |
610 | } else { |
611 | pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n" , |
612 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
613 | vmg_tag, spa, flags); |
614 | } |
615 | |
616 | if (pdev) |
617 | pci_dev_put(dev: pdev); |
618 | } |
619 | |
620 | static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *event) |
621 | { |
622 | struct iommu_dev_data *dev_data = NULL; |
623 | int devid, flags_rmp, vmg_tag, flags; |
624 | struct pci_dev *pdev; |
625 | u64 gpa; |
626 | |
627 | devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; |
628 | flags_rmp = (event[0] >> EVENT_FLAGS_SHIFT) & 0xFF; |
629 | vmg_tag = (event[1]) & 0xFFFF; |
630 | flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
631 | gpa = ((u64)event[3] << 32) | event[2]; |
632 | |
633 | pdev = pci_get_domain_bus_and_slot(domain: iommu->pci_seg->id, PCI_BUS_NUM(devid), |
634 | devfn: devid & 0xff); |
635 | if (pdev) |
636 | dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
637 | |
638 | if (dev_data) { |
639 | if (__ratelimit(&dev_data->rs)) { |
640 | pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n" , |
641 | vmg_tag, gpa, flags_rmp, flags); |
642 | } |
643 | } else { |
644 | pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n" , |
645 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
646 | vmg_tag, gpa, flags_rmp, flags); |
647 | } |
648 | |
649 | if (pdev) |
650 | pci_dev_put(dev: pdev); |
651 | } |
652 | |
653 | #define IS_IOMMU_MEM_TRANSACTION(flags) \ |
654 | (((flags) & EVENT_FLAG_I) == 0) |
655 | |
656 | #define IS_WRITE_REQUEST(flags) \ |
657 | ((flags) & EVENT_FLAG_RW) |
658 | |
659 | static void amd_iommu_report_page_fault(struct amd_iommu *iommu, |
660 | u16 devid, u16 domain_id, |
661 | u64 address, int flags) |
662 | { |
663 | struct iommu_dev_data *dev_data = NULL; |
664 | struct pci_dev *pdev; |
665 | |
666 | pdev = pci_get_domain_bus_and_slot(domain: iommu->pci_seg->id, PCI_BUS_NUM(devid), |
667 | devfn: devid & 0xff); |
668 | if (pdev) |
669 | dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
670 | |
671 | if (dev_data) { |
672 | /* |
673 | * If this is a DMA fault (for which the I(nterrupt) |
674 | * bit will be unset), allow report_iommu_fault() to |
675 | * prevent logging it. |
676 | */ |
677 | if (IS_IOMMU_MEM_TRANSACTION(flags)) { |
678 | /* Device not attached to domain properly */ |
679 | if (dev_data->domain == NULL) { |
680 | pr_err_ratelimited("Event logged [Device not attached to domain properly]\n" ); |
681 | pr_err_ratelimited(" device=%04x:%02x:%02x.%x domain=0x%04x\n" , |
682 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), |
683 | PCI_FUNC(devid), domain_id); |
684 | goto out; |
685 | } |
686 | |
687 | if (!report_iommu_fault(domain: &dev_data->domain->domain, |
688 | dev: &pdev->dev, iova: address, |
689 | IS_WRITE_REQUEST(flags) ? |
690 | IOMMU_FAULT_WRITE : |
691 | IOMMU_FAULT_READ)) |
692 | goto out; |
693 | } |
694 | |
695 | if (__ratelimit(&dev_data->rs)) { |
696 | pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n" , |
697 | domain_id, address, flags); |
698 | } |
699 | } else { |
700 | pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%04x:%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n" , |
701 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
702 | domain_id, address, flags); |
703 | } |
704 | |
705 | out: |
706 | if (pdev) |
707 | pci_dev_put(dev: pdev); |
708 | } |
709 | |
710 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) |
711 | { |
712 | struct device *dev = iommu->iommu.dev; |
713 | int type, devid, flags, tag; |
714 | volatile u32 *event = __evt; |
715 | int count = 0; |
716 | u64 address; |
717 | u32 pasid; |
718 | |
719 | retry: |
720 | type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; |
721 | devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; |
722 | pasid = (event[0] & EVENT_DOMID_MASK_HI) | |
723 | (event[1] & EVENT_DOMID_MASK_LO); |
724 | flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
725 | address = (u64)(((u64)event[3]) << 32) | event[2]; |
726 | |
727 | if (type == 0) { |
728 | /* Did we hit the erratum? */ |
729 | if (++count == LOOP_TIMEOUT) { |
730 | pr_err("No event written to event log\n" ); |
731 | return; |
732 | } |
733 | udelay(1); |
734 | goto retry; |
735 | } |
736 | |
737 | if (type == EVENT_TYPE_IO_FAULT) { |
738 | amd_iommu_report_page_fault(iommu, devid, domain_id: pasid, address, flags); |
739 | return; |
740 | } |
741 | |
742 | switch (type) { |
743 | case EVENT_TYPE_ILL_DEV: |
744 | dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n" , |
745 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
746 | pasid, address, flags); |
747 | dump_dte_entry(iommu, devid); |
748 | break; |
749 | case EVENT_TYPE_DEV_TAB_ERR: |
750 | dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x " |
751 | "address=0x%llx flags=0x%04x]\n" , |
752 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
753 | address, flags); |
754 | break; |
755 | case EVENT_TYPE_PAGE_TAB_ERR: |
756 | dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n" , |
757 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
758 | pasid, address, flags); |
759 | break; |
760 | case EVENT_TYPE_ILL_CMD: |
761 | dev_err(dev, "Event logged [ILLEGAL_COMMAND_ERROR address=0x%llx]\n" , address); |
762 | dump_command(phys_addr: address); |
763 | break; |
764 | case EVENT_TYPE_CMD_HARD_ERR: |
765 | dev_err(dev, "Event logged [COMMAND_HARDWARE_ERROR address=0x%llx flags=0x%04x]\n" , |
766 | address, flags); |
767 | break; |
768 | case EVENT_TYPE_IOTLB_INV_TO: |
769 | dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%04x:%02x:%02x.%x address=0x%llx]\n" , |
770 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
771 | address); |
772 | break; |
773 | case EVENT_TYPE_INV_DEV_REQ: |
774 | dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n" , |
775 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
776 | pasid, address, flags); |
777 | break; |
778 | case EVENT_TYPE_RMP_FAULT: |
779 | amd_iommu_report_rmp_fault(iommu, event); |
780 | break; |
781 | case EVENT_TYPE_RMP_HW_ERR: |
782 | amd_iommu_report_rmp_hw_error(iommu, event); |
783 | break; |
784 | case EVENT_TYPE_INV_PPR_REQ: |
785 | pasid = PPR_PASID(*((u64 *)__evt)); |
786 | tag = event[1] & 0x03FF; |
787 | dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n" , |
788 | iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
789 | pasid, address, flags, tag); |
790 | break; |
791 | default: |
792 | dev_err(dev, "Event logged [UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n" , |
793 | event[0], event[1], event[2], event[3]); |
794 | } |
795 | |
796 | /* |
797 | * To detect the hardware errata 732 we need to clear the |
798 | * entry back to zero. This issue does not exist on SNP |
799 | * enabled system. Also this buffer is not writeable on |
800 | * SNP enabled system. |
801 | */ |
802 | if (!amd_iommu_snp_en) |
803 | memset(__evt, 0, 4 * sizeof(u32)); |
804 | } |
805 | |
806 | static void iommu_poll_events(struct amd_iommu *iommu) |
807 | { |
808 | u32 head, tail; |
809 | |
810 | head = readl(addr: iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); |
811 | tail = readl(addr: iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); |
812 | |
813 | while (head != tail) { |
814 | iommu_print_event(iommu, evt: iommu->evt_buf + head); |
815 | head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; |
816 | } |
817 | |
818 | writel(val: head, addr: iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); |
819 | } |
820 | |
821 | static void iommu_poll_ppr_log(struct amd_iommu *iommu) |
822 | { |
823 | u32 head, tail; |
824 | |
825 | if (iommu->ppr_log == NULL) |
826 | return; |
827 | |
828 | head = readl(addr: iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); |
829 | tail = readl(addr: iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); |
830 | |
831 | while (head != tail) { |
832 | volatile u64 *raw; |
833 | u64 entry[2]; |
834 | int i; |
835 | |
836 | raw = (u64 *)(iommu->ppr_log + head); |
837 | |
838 | /* |
839 | * Hardware bug: Interrupt may arrive before the entry is |
840 | * written to memory. If this happens we need to wait for the |
841 | * entry to arrive. |
842 | */ |
843 | for (i = 0; i < LOOP_TIMEOUT; ++i) { |
844 | if (PPR_REQ_TYPE(raw[0]) != 0) |
845 | break; |
846 | udelay(1); |
847 | } |
848 | |
849 | /* Avoid memcpy function-call overhead */ |
850 | entry[0] = raw[0]; |
851 | entry[1] = raw[1]; |
852 | |
853 | /* |
854 | * To detect the hardware errata 733 we need to clear the |
855 | * entry back to zero. This issue does not exist on SNP |
856 | * enabled system. Also this buffer is not writeable on |
857 | * SNP enabled system. |
858 | */ |
859 | if (!amd_iommu_snp_en) |
860 | raw[0] = raw[1] = 0UL; |
861 | |
862 | /* Update head pointer of hardware ring-buffer */ |
863 | head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; |
864 | writel(val: head, addr: iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); |
865 | |
866 | /* TODO: PPR Handler will be added when we add IOPF support */ |
867 | |
868 | /* Refresh ring-buffer information */ |
869 | head = readl(addr: iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); |
870 | tail = readl(addr: iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); |
871 | } |
872 | } |
873 | |
874 | #ifdef CONFIG_IRQ_REMAP |
875 | static int (*iommu_ga_log_notifier)(u32); |
876 | |
877 | int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) |
878 | { |
879 | iommu_ga_log_notifier = notifier; |
880 | |
881 | return 0; |
882 | } |
883 | EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); |
884 | |
885 | static void iommu_poll_ga_log(struct amd_iommu *iommu) |
886 | { |
887 | u32 head, tail; |
888 | |
889 | if (iommu->ga_log == NULL) |
890 | return; |
891 | |
892 | head = readl(addr: iommu->mmio_base + MMIO_GA_HEAD_OFFSET); |
893 | tail = readl(addr: iommu->mmio_base + MMIO_GA_TAIL_OFFSET); |
894 | |
895 | while (head != tail) { |
896 | volatile u64 *raw; |
897 | u64 log_entry; |
898 | |
899 | raw = (u64 *)(iommu->ga_log + head); |
900 | |
901 | /* Avoid memcpy function-call overhead */ |
902 | log_entry = *raw; |
903 | |
904 | /* Update head pointer of hardware ring-buffer */ |
905 | head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE; |
906 | writel(val: head, addr: iommu->mmio_base + MMIO_GA_HEAD_OFFSET); |
907 | |
908 | /* Handle GA entry */ |
909 | switch (GA_REQ_TYPE(log_entry)) { |
910 | case GA_GUEST_NR: |
911 | if (!iommu_ga_log_notifier) |
912 | break; |
913 | |
914 | pr_debug("%s: devid=%#x, ga_tag=%#x\n" , |
915 | __func__, GA_DEVID(log_entry), |
916 | GA_TAG(log_entry)); |
917 | |
918 | if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0) |
919 | pr_err("GA log notifier failed.\n" ); |
920 | break; |
921 | default: |
922 | break; |
923 | } |
924 | } |
925 | } |
926 | |
927 | static void |
928 | amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) |
929 | { |
930 | if (!irq_remapping_enabled || !dev_is_pci(dev) || |
931 | !pci_dev_has_default_msi_parent_domain(to_pci_dev(dev))) |
932 | return; |
933 | |
934 | dev_set_msi_domain(dev, d: iommu->ir_domain); |
935 | } |
936 | |
937 | #else /* CONFIG_IRQ_REMAP */ |
938 | static inline void |
939 | amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { } |
940 | #endif /* !CONFIG_IRQ_REMAP */ |
941 | |
942 | static void amd_iommu_handle_irq(void *data, const char *evt_type, |
943 | u32 int_mask, u32 overflow_mask, |
944 | void (*int_handler)(struct amd_iommu *), |
945 | void (*overflow_handler)(struct amd_iommu *)) |
946 | { |
947 | struct amd_iommu *iommu = (struct amd_iommu *) data; |
948 | u32 status = readl(addr: iommu->mmio_base + MMIO_STATUS_OFFSET); |
949 | u32 mask = int_mask | overflow_mask; |
950 | |
951 | while (status & mask) { |
952 | /* Enable interrupt sources again */ |
953 | writel(val: mask, addr: iommu->mmio_base + MMIO_STATUS_OFFSET); |
954 | |
955 | if (int_handler) { |
956 | pr_devel("Processing IOMMU (ivhd%d) %s Log\n" , |
957 | iommu->index, evt_type); |
958 | int_handler(iommu); |
959 | } |
960 | |
961 | if ((status & overflow_mask) && overflow_handler) |
962 | overflow_handler(iommu); |
963 | |
964 | /* |
965 | * Hardware bug: ERBT1312 |
966 | * When re-enabling interrupt (by writing 1 |
967 | * to clear the bit), the hardware might also try to set |
968 | * the interrupt bit in the event status register. |
969 | * In this scenario, the bit will be set, and disable |
970 | * subsequent interrupts. |
971 | * |
972 | * Workaround: The IOMMU driver should read back the |
973 | * status register and check if the interrupt bits are cleared. |
974 | * If not, driver will need to go through the interrupt handler |
975 | * again and re-clear the bits |
976 | */ |
977 | status = readl(addr: iommu->mmio_base + MMIO_STATUS_OFFSET); |
978 | } |
979 | } |
980 | |
981 | irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data) |
982 | { |
983 | amd_iommu_handle_irq(data, evt_type: "Evt" , MMIO_STATUS_EVT_INT_MASK, |
984 | MMIO_STATUS_EVT_OVERFLOW_MASK, |
985 | int_handler: iommu_poll_events, overflow_handler: amd_iommu_restart_event_logging); |
986 | |
987 | return IRQ_HANDLED; |
988 | } |
989 | |
990 | irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data) |
991 | { |
992 | amd_iommu_handle_irq(data, evt_type: "PPR" , MMIO_STATUS_PPR_INT_MASK, |
993 | MMIO_STATUS_PPR_OVERFLOW_MASK, |
994 | int_handler: iommu_poll_ppr_log, overflow_handler: amd_iommu_restart_ppr_log); |
995 | |
996 | return IRQ_HANDLED; |
997 | } |
998 | |
999 | irqreturn_t amd_iommu_int_thread_galog(int irq, void *data) |
1000 | { |
1001 | #ifdef CONFIG_IRQ_REMAP |
1002 | amd_iommu_handle_irq(data, evt_type: "GA" , MMIO_STATUS_GALOG_INT_MASK, |
1003 | MMIO_STATUS_GALOG_OVERFLOW_MASK, |
1004 | int_handler: iommu_poll_ga_log, overflow_handler: amd_iommu_restart_ga_log); |
1005 | #endif |
1006 | |
1007 | return IRQ_HANDLED; |
1008 | } |
1009 | |
1010 | irqreturn_t amd_iommu_int_thread(int irq, void *data) |
1011 | { |
1012 | amd_iommu_int_thread_evtlog(irq, data); |
1013 | amd_iommu_int_thread_pprlog(irq, data); |
1014 | amd_iommu_int_thread_galog(irq, data); |
1015 | |
1016 | return IRQ_HANDLED; |
1017 | } |
1018 | |
1019 | irqreturn_t amd_iommu_int_handler(int irq, void *data) |
1020 | { |
1021 | return IRQ_WAKE_THREAD; |
1022 | } |
1023 | |
1024 | /**************************************************************************** |
1025 | * |
1026 | * IOMMU command queuing functions |
1027 | * |
1028 | ****************************************************************************/ |
1029 | |
1030 | static int wait_on_sem(struct amd_iommu *iommu, u64 data) |
1031 | { |
1032 | int i = 0; |
1033 | |
1034 | while (*iommu->cmd_sem != data && i < LOOP_TIMEOUT) { |
1035 | udelay(1); |
1036 | i += 1; |
1037 | } |
1038 | |
1039 | if (i == LOOP_TIMEOUT) { |
1040 | pr_alert("Completion-Wait loop timed out\n" ); |
1041 | return -EIO; |
1042 | } |
1043 | |
1044 | return 0; |
1045 | } |
1046 | |
1047 | static void copy_cmd_to_buffer(struct amd_iommu *iommu, |
1048 | struct iommu_cmd *cmd) |
1049 | { |
1050 | u8 *target; |
1051 | u32 tail; |
1052 | |
1053 | /* Copy command to buffer */ |
1054 | tail = iommu->cmd_buf_tail; |
1055 | target = iommu->cmd_buf + tail; |
1056 | memcpy(target, cmd, sizeof(*cmd)); |
1057 | |
1058 | tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; |
1059 | iommu->cmd_buf_tail = tail; |
1060 | |
1061 | /* Tell the IOMMU about it */ |
1062 | writel(val: tail, addr: iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
1063 | } |
1064 | |
1065 | static void build_completion_wait(struct iommu_cmd *cmd, |
1066 | struct amd_iommu *iommu, |
1067 | u64 data) |
1068 | { |
1069 | u64 paddr = iommu_virt_to_phys(vaddr: (void *)iommu->cmd_sem); |
1070 | |
1071 | memset(cmd, 0, sizeof(*cmd)); |
1072 | cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK; |
1073 | cmd->data[1] = upper_32_bits(paddr); |
1074 | cmd->data[2] = lower_32_bits(data); |
1075 | cmd->data[3] = upper_32_bits(data); |
1076 | CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); |
1077 | } |
1078 | |
1079 | static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) |
1080 | { |
1081 | memset(cmd, 0, sizeof(*cmd)); |
1082 | cmd->data[0] = devid; |
1083 | CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); |
1084 | } |
1085 | |
1086 | /* |
1087 | * Builds an invalidation address which is suitable for one page or multiple |
1088 | * pages. Sets the size bit (S) as needed is more than one page is flushed. |
1089 | */ |
1090 | static inline u64 build_inv_address(u64 address, size_t size) |
1091 | { |
1092 | u64 pages, end, msb_diff; |
1093 | |
1094 | pages = iommu_num_pages(addr: address, len: size, PAGE_SIZE); |
1095 | |
1096 | if (pages == 1) |
1097 | return address & PAGE_MASK; |
1098 | |
1099 | end = address + size - 1; |
1100 | |
1101 | /* |
1102 | * msb_diff would hold the index of the most significant bit that |
1103 | * flipped between the start and end. |
1104 | */ |
1105 | msb_diff = fls64(x: end ^ address) - 1; |
1106 | |
1107 | /* |
1108 | * Bits 63:52 are sign extended. If for some reason bit 51 is different |
1109 | * between the start and the end, invalidate everything. |
1110 | */ |
1111 | if (unlikely(msb_diff > 51)) { |
1112 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; |
1113 | } else { |
1114 | /* |
1115 | * The msb-bit must be clear on the address. Just set all the |
1116 | * lower bits. |
1117 | */ |
1118 | address |= (1ull << msb_diff) - 1; |
1119 | } |
1120 | |
1121 | /* Clear bits 11:0 */ |
1122 | address &= PAGE_MASK; |
1123 | |
1124 | /* Set the size bit - we flush more than one 4kb page */ |
1125 | return address | CMD_INV_IOMMU_PAGES_SIZE_MASK; |
1126 | } |
1127 | |
1128 | static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, |
1129 | size_t size, u16 domid, |
1130 | ioasid_t pasid, bool gn) |
1131 | { |
1132 | u64 inv_address = build_inv_address(address, size); |
1133 | |
1134 | memset(cmd, 0, sizeof(*cmd)); |
1135 | |
1136 | cmd->data[1] |= domid; |
1137 | cmd->data[2] = lower_32_bits(inv_address); |
1138 | cmd->data[3] = upper_32_bits(inv_address); |
1139 | /* PDE bit - we want to flush everything, not only the PTEs */ |
1140 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; |
1141 | if (gn) { |
1142 | cmd->data[0] |= pasid; |
1143 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; |
1144 | } |
1145 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); |
1146 | } |
1147 | |
1148 | static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, |
1149 | u64 address, size_t size, |
1150 | ioasid_t pasid, bool gn) |
1151 | { |
1152 | u64 inv_address = build_inv_address(address, size); |
1153 | |
1154 | memset(cmd, 0, sizeof(*cmd)); |
1155 | |
1156 | cmd->data[0] = devid; |
1157 | cmd->data[0] |= (qdep & 0xff) << 24; |
1158 | cmd->data[1] = devid; |
1159 | cmd->data[2] = lower_32_bits(inv_address); |
1160 | cmd->data[3] = upper_32_bits(inv_address); |
1161 | if (gn) { |
1162 | cmd->data[0] |= ((pasid >> 8) & 0xff) << 16; |
1163 | cmd->data[1] |= (pasid & 0xff) << 16; |
1164 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; |
1165 | } |
1166 | |
1167 | CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); |
1168 | } |
1169 | |
1170 | static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid, |
1171 | int status, int tag, u8 gn) |
1172 | { |
1173 | memset(cmd, 0, sizeof(*cmd)); |
1174 | |
1175 | cmd->data[0] = devid; |
1176 | if (gn) { |
1177 | cmd->data[1] = pasid; |
1178 | cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; |
1179 | } |
1180 | cmd->data[3] = tag & 0x1ff; |
1181 | cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT; |
1182 | |
1183 | CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR); |
1184 | } |
1185 | |
1186 | static void build_inv_all(struct iommu_cmd *cmd) |
1187 | { |
1188 | memset(cmd, 0, sizeof(*cmd)); |
1189 | CMD_SET_TYPE(cmd, CMD_INV_ALL); |
1190 | } |
1191 | |
1192 | static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) |
1193 | { |
1194 | memset(cmd, 0, sizeof(*cmd)); |
1195 | cmd->data[0] = devid; |
1196 | CMD_SET_TYPE(cmd, CMD_INV_IRT); |
1197 | } |
1198 | |
1199 | /* |
1200 | * Writes the command to the IOMMUs command buffer and informs the |
1201 | * hardware about the new command. |
1202 | */ |
1203 | static int __iommu_queue_command_sync(struct amd_iommu *iommu, |
1204 | struct iommu_cmd *cmd, |
1205 | bool sync) |
1206 | { |
1207 | unsigned int count = 0; |
1208 | u32 left, next_tail; |
1209 | |
1210 | next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; |
1211 | again: |
1212 | left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE; |
1213 | |
1214 | if (left <= 0x20) { |
1215 | /* Skip udelay() the first time around */ |
1216 | if (count++) { |
1217 | if (count == LOOP_TIMEOUT) { |
1218 | pr_err("Command buffer timeout\n" ); |
1219 | return -EIO; |
1220 | } |
1221 | |
1222 | udelay(1); |
1223 | } |
1224 | |
1225 | /* Update head and recheck remaining space */ |
1226 | iommu->cmd_buf_head = readl(addr: iommu->mmio_base + |
1227 | MMIO_CMD_HEAD_OFFSET); |
1228 | |
1229 | goto again; |
1230 | } |
1231 | |
1232 | copy_cmd_to_buffer(iommu, cmd); |
1233 | |
1234 | /* Do we need to make sure all commands are processed? */ |
1235 | iommu->need_sync = sync; |
1236 | |
1237 | return 0; |
1238 | } |
1239 | |
1240 | static int iommu_queue_command_sync(struct amd_iommu *iommu, |
1241 | struct iommu_cmd *cmd, |
1242 | bool sync) |
1243 | { |
1244 | unsigned long flags; |
1245 | int ret; |
1246 | |
1247 | raw_spin_lock_irqsave(&iommu->lock, flags); |
1248 | ret = __iommu_queue_command_sync(iommu, cmd, sync); |
1249 | raw_spin_unlock_irqrestore(&iommu->lock, flags); |
1250 | |
1251 | return ret; |
1252 | } |
1253 | |
1254 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) |
1255 | { |
1256 | return iommu_queue_command_sync(iommu, cmd, sync: true); |
1257 | } |
1258 | |
1259 | /* |
1260 | * This function queues a completion wait command into the command |
1261 | * buffer of an IOMMU |
1262 | */ |
1263 | static int iommu_completion_wait(struct amd_iommu *iommu) |
1264 | { |
1265 | struct iommu_cmd cmd; |
1266 | unsigned long flags; |
1267 | int ret; |
1268 | u64 data; |
1269 | |
1270 | if (!iommu->need_sync) |
1271 | return 0; |
1272 | |
1273 | data = atomic64_add_return(i: 1, v: &iommu->cmd_sem_val); |
1274 | build_completion_wait(cmd: &cmd, iommu, data); |
1275 | |
1276 | raw_spin_lock_irqsave(&iommu->lock, flags); |
1277 | |
1278 | ret = __iommu_queue_command_sync(iommu, cmd: &cmd, sync: false); |
1279 | if (ret) |
1280 | goto out_unlock; |
1281 | |
1282 | ret = wait_on_sem(iommu, data); |
1283 | |
1284 | out_unlock: |
1285 | raw_spin_unlock_irqrestore(&iommu->lock, flags); |
1286 | |
1287 | return ret; |
1288 | } |
1289 | |
1290 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) |
1291 | { |
1292 | struct iommu_cmd cmd; |
1293 | |
1294 | build_inv_dte(cmd: &cmd, devid); |
1295 | |
1296 | return iommu_queue_command(iommu, cmd: &cmd); |
1297 | } |
1298 | |
1299 | static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) |
1300 | { |
1301 | u32 devid; |
1302 | u16 last_bdf = iommu->pci_seg->last_bdf; |
1303 | |
1304 | for (devid = 0; devid <= last_bdf; ++devid) |
1305 | iommu_flush_dte(iommu, devid); |
1306 | |
1307 | iommu_completion_wait(iommu); |
1308 | } |
1309 | |
1310 | /* |
1311 | * This function uses heavy locking and may disable irqs for some time. But |
1312 | * this is no issue because it is only called during resume. |
1313 | */ |
1314 | static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) |
1315 | { |
1316 | u32 dom_id; |
1317 | u16 last_bdf = iommu->pci_seg->last_bdf; |
1318 | |
1319 | for (dom_id = 0; dom_id <= last_bdf; ++dom_id) { |
1320 | struct iommu_cmd cmd; |
1321 | build_inv_iommu_pages(cmd: &cmd, address: 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, |
1322 | domid: dom_id, IOMMU_NO_PASID, gn: false); |
1323 | iommu_queue_command(iommu, cmd: &cmd); |
1324 | } |
1325 | |
1326 | iommu_completion_wait(iommu); |
1327 | } |
1328 | |
1329 | static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id) |
1330 | { |
1331 | struct iommu_cmd cmd; |
1332 | |
1333 | build_inv_iommu_pages(cmd: &cmd, address: 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, |
1334 | domid: dom_id, IOMMU_NO_PASID, gn: false); |
1335 | iommu_queue_command(iommu, cmd: &cmd); |
1336 | |
1337 | iommu_completion_wait(iommu); |
1338 | } |
1339 | |
1340 | static void amd_iommu_flush_all(struct amd_iommu *iommu) |
1341 | { |
1342 | struct iommu_cmd cmd; |
1343 | |
1344 | build_inv_all(cmd: &cmd); |
1345 | |
1346 | iommu_queue_command(iommu, cmd: &cmd); |
1347 | iommu_completion_wait(iommu); |
1348 | } |
1349 | |
1350 | static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) |
1351 | { |
1352 | struct iommu_cmd cmd; |
1353 | |
1354 | build_inv_irt(cmd: &cmd, devid); |
1355 | |
1356 | iommu_queue_command(iommu, cmd: &cmd); |
1357 | } |
1358 | |
1359 | static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) |
1360 | { |
1361 | u32 devid; |
1362 | u16 last_bdf = iommu->pci_seg->last_bdf; |
1363 | |
1364 | if (iommu->irtcachedis_enabled) |
1365 | return; |
1366 | |
1367 | for (devid = 0; devid <= last_bdf; devid++) |
1368 | iommu_flush_irt(iommu, devid); |
1369 | |
1370 | iommu_completion_wait(iommu); |
1371 | } |
1372 | |
1373 | void amd_iommu_flush_all_caches(struct amd_iommu *iommu) |
1374 | { |
1375 | if (check_feature(FEATURE_IA)) { |
1376 | amd_iommu_flush_all(iommu); |
1377 | } else { |
1378 | amd_iommu_flush_dte_all(iommu); |
1379 | amd_iommu_flush_irt_all(iommu); |
1380 | amd_iommu_flush_tlb_all(iommu); |
1381 | } |
1382 | } |
1383 | |
1384 | /* |
1385 | * Command send function for flushing on-device TLB |
1386 | */ |
1387 | static int device_flush_iotlb(struct iommu_dev_data *dev_data, u64 address, |
1388 | size_t size, ioasid_t pasid, bool gn) |
1389 | { |
1390 | struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); |
1391 | struct iommu_cmd cmd; |
1392 | int qdep = dev_data->ats_qdep; |
1393 | |
1394 | build_inv_iotlb_pages(cmd: &cmd, devid: dev_data->devid, qdep, address, |
1395 | size, pasid, gn); |
1396 | |
1397 | return iommu_queue_command(iommu, cmd: &cmd); |
1398 | } |
1399 | |
1400 | static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) |
1401 | { |
1402 | struct amd_iommu *iommu = data; |
1403 | |
1404 | return iommu_flush_dte(iommu, devid: alias); |
1405 | } |
1406 | |
1407 | /* |
1408 | * Command send function for invalidating a device table entry |
1409 | */ |
1410 | static int device_flush_dte(struct iommu_dev_data *dev_data) |
1411 | { |
1412 | struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); |
1413 | struct pci_dev *pdev = NULL; |
1414 | struct amd_iommu_pci_seg *pci_seg; |
1415 | u16 alias; |
1416 | int ret; |
1417 | |
1418 | if (dev_is_pci(dev_data->dev)) |
1419 | pdev = to_pci_dev(dev_data->dev); |
1420 | |
1421 | if (pdev) |
1422 | ret = pci_for_each_dma_alias(pdev, |
1423 | fn: device_flush_dte_alias, data: iommu); |
1424 | else |
1425 | ret = iommu_flush_dte(iommu, devid: dev_data->devid); |
1426 | if (ret) |
1427 | return ret; |
1428 | |
1429 | pci_seg = iommu->pci_seg; |
1430 | alias = pci_seg->alias_table[dev_data->devid]; |
1431 | if (alias != dev_data->devid) { |
1432 | ret = iommu_flush_dte(iommu, devid: alias); |
1433 | if (ret) |
1434 | return ret; |
1435 | } |
1436 | |
1437 | if (dev_data->ats_enabled) { |
1438 | /* Invalidate the entire contents of an IOTLB */ |
1439 | ret = device_flush_iotlb(dev_data, address: 0, size: ~0UL, |
1440 | IOMMU_NO_PASID, gn: false); |
1441 | } |
1442 | |
1443 | return ret; |
1444 | } |
1445 | |
1446 | static int domain_flush_pages_v2(struct protection_domain *pdom, |
1447 | u64 address, size_t size) |
1448 | { |
1449 | struct iommu_dev_data *dev_data; |
1450 | struct iommu_cmd cmd; |
1451 | int ret = 0; |
1452 | |
1453 | list_for_each_entry(dev_data, &pdom->dev_list, list) { |
1454 | struct amd_iommu *iommu = get_amd_iommu_from_dev(dev: dev_data->dev); |
1455 | u16 domid = dev_data->gcr3_info.domid; |
1456 | |
1457 | build_inv_iommu_pages(cmd: &cmd, address, size, |
1458 | domid, IOMMU_NO_PASID, gn: true); |
1459 | |
1460 | ret |= iommu_queue_command(iommu, cmd: &cmd); |
1461 | } |
1462 | |
1463 | return ret; |
1464 | } |
1465 | |
1466 | static int domain_flush_pages_v1(struct protection_domain *pdom, |
1467 | u64 address, size_t size) |
1468 | { |
1469 | struct iommu_cmd cmd; |
1470 | int ret = 0, i; |
1471 | |
1472 | build_inv_iommu_pages(cmd: &cmd, address, size, |
1473 | domid: pdom->id, IOMMU_NO_PASID, gn: false); |
1474 | |
1475 | for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { |
1476 | if (!pdom->dev_iommu[i]) |
1477 | continue; |
1478 | |
1479 | /* |
1480 | * Devices of this domain are behind this IOMMU |
1481 | * We need a TLB flush |
1482 | */ |
1483 | ret |= iommu_queue_command(iommu: amd_iommus[i], cmd: &cmd); |
1484 | } |
1485 | |
1486 | return ret; |
1487 | } |
1488 | |
1489 | /* |
1490 | * TLB invalidation function which is called from the mapping functions. |
1491 | * It flushes range of PTEs of the domain. |
1492 | */ |
1493 | static void __domain_flush_pages(struct protection_domain *domain, |
1494 | u64 address, size_t size) |
1495 | { |
1496 | struct iommu_dev_data *dev_data; |
1497 | int ret = 0; |
1498 | ioasid_t pasid = IOMMU_NO_PASID; |
1499 | bool gn = false; |
1500 | |
1501 | if (pdom_is_v2_pgtbl_mode(pdom: domain)) { |
1502 | gn = true; |
1503 | ret = domain_flush_pages_v2(pdom: domain, address, size); |
1504 | } else { |
1505 | ret = domain_flush_pages_v1(pdom: domain, address, size); |
1506 | } |
1507 | |
1508 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1509 | |
1510 | if (!dev_data->ats_enabled) |
1511 | continue; |
1512 | |
1513 | ret |= device_flush_iotlb(dev_data, address, size, pasid, gn); |
1514 | } |
1515 | |
1516 | WARN_ON(ret); |
1517 | } |
1518 | |
1519 | void amd_iommu_domain_flush_pages(struct protection_domain *domain, |
1520 | u64 address, size_t size) |
1521 | { |
1522 | if (likely(!amd_iommu_np_cache)) { |
1523 | __domain_flush_pages(domain, address, size); |
1524 | |
1525 | /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ |
1526 | amd_iommu_domain_flush_complete(domain); |
1527 | |
1528 | return; |
1529 | } |
1530 | |
1531 | /* |
1532 | * When NpCache is on, we infer that we run in a VM and use a vIOMMU. |
1533 | * In such setups it is best to avoid flushes of ranges which are not |
1534 | * naturally aligned, since it would lead to flushes of unmodified |
1535 | * PTEs. Such flushes would require the hypervisor to do more work than |
1536 | * necessary. Therefore, perform repeated flushes of aligned ranges |
1537 | * until you cover the range. Each iteration flushes the smaller |
1538 | * between the natural alignment of the address that we flush and the |
1539 | * greatest naturally aligned region that fits in the range. |
1540 | */ |
1541 | while (size != 0) { |
1542 | int addr_alignment = __ffs(address); |
1543 | int size_alignment = __fls(word: size); |
1544 | int min_alignment; |
1545 | size_t flush_size; |
1546 | |
1547 | /* |
1548 | * size is always non-zero, but address might be zero, causing |
1549 | * addr_alignment to be negative. As the casting of the |
1550 | * argument in __ffs(address) to long might trim the high bits |
1551 | * of the address on x86-32, cast to long when doing the check. |
1552 | */ |
1553 | if (likely((unsigned long)address != 0)) |
1554 | min_alignment = min(addr_alignment, size_alignment); |
1555 | else |
1556 | min_alignment = size_alignment; |
1557 | |
1558 | flush_size = 1ul << min_alignment; |
1559 | |
1560 | __domain_flush_pages(domain, address, size: flush_size); |
1561 | address += flush_size; |
1562 | size -= flush_size; |
1563 | } |
1564 | |
1565 | /* Wait until IOMMU TLB and all device IOTLB flushes are complete */ |
1566 | amd_iommu_domain_flush_complete(domain); |
1567 | } |
1568 | |
1569 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ |
1570 | static void amd_iommu_domain_flush_all(struct protection_domain *domain) |
1571 | { |
1572 | amd_iommu_domain_flush_pages(domain, address: 0, |
1573 | CMD_INV_IOMMU_ALL_PAGES_ADDRESS); |
1574 | } |
1575 | |
1576 | void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data, |
1577 | ioasid_t pasid, u64 address, size_t size) |
1578 | { |
1579 | struct iommu_cmd cmd; |
1580 | struct amd_iommu *iommu = get_amd_iommu_from_dev(dev: dev_data->dev); |
1581 | |
1582 | build_inv_iommu_pages(cmd: &cmd, address, size, |
1583 | domid: dev_data->gcr3_info.domid, pasid, gn: true); |
1584 | iommu_queue_command(iommu, cmd: &cmd); |
1585 | |
1586 | if (dev_data->ats_enabled) |
1587 | device_flush_iotlb(dev_data, address, size, pasid, gn: true); |
1588 | |
1589 | iommu_completion_wait(iommu); |
1590 | } |
1591 | |
1592 | void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data, |
1593 | ioasid_t pasid) |
1594 | { |
1595 | amd_iommu_dev_flush_pasid_pages(dev_data, pasid: 0, |
1596 | CMD_INV_IOMMU_ALL_PAGES_ADDRESS, size: pasid); |
1597 | } |
1598 | |
1599 | void amd_iommu_domain_flush_complete(struct protection_domain *domain) |
1600 | { |
1601 | int i; |
1602 | |
1603 | for (i = 0; i < amd_iommu_get_num_iommus(); ++i) { |
1604 | if (domain && !domain->dev_iommu[i]) |
1605 | continue; |
1606 | |
1607 | /* |
1608 | * Devices of this domain are behind this IOMMU |
1609 | * We need to wait for completion of all commands. |
1610 | */ |
1611 | iommu_completion_wait(iommu: amd_iommus[i]); |
1612 | } |
1613 | } |
1614 | |
1615 | /* Flush the not present cache if it exists */ |
1616 | static void domain_flush_np_cache(struct protection_domain *domain, |
1617 | dma_addr_t iova, size_t size) |
1618 | { |
1619 | if (unlikely(amd_iommu_np_cache)) { |
1620 | unsigned long flags; |
1621 | |
1622 | spin_lock_irqsave(&domain->lock, flags); |
1623 | amd_iommu_domain_flush_pages(domain, address: iova, size); |
1624 | spin_unlock_irqrestore(lock: &domain->lock, flags); |
1625 | } |
1626 | } |
1627 | |
1628 | |
1629 | /* |
1630 | * This function flushes the DTEs for all devices in domain |
1631 | */ |
1632 | static void domain_flush_devices(struct protection_domain *domain) |
1633 | { |
1634 | struct iommu_dev_data *dev_data; |
1635 | |
1636 | list_for_each_entry(dev_data, &domain->dev_list, list) |
1637 | device_flush_dte(dev_data); |
1638 | } |
1639 | |
1640 | static void update_device_table(struct protection_domain *domain) |
1641 | { |
1642 | struct iommu_dev_data *dev_data; |
1643 | |
1644 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1645 | struct amd_iommu *iommu = rlookup_amd_iommu(dev: dev_data->dev); |
1646 | |
1647 | set_dte_entry(iommu, dev_data); |
1648 | clone_aliases(iommu, dev: dev_data->dev); |
1649 | } |
1650 | } |
1651 | |
1652 | void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) |
1653 | { |
1654 | update_device_table(domain); |
1655 | domain_flush_devices(domain); |
1656 | } |
1657 | |
1658 | void amd_iommu_domain_update(struct protection_domain *domain) |
1659 | { |
1660 | /* Update device table */ |
1661 | amd_iommu_update_and_flush_device_table(domain); |
1662 | |
1663 | /* Flush domain TLB(s) and wait for completion */ |
1664 | amd_iommu_domain_flush_all(domain); |
1665 | } |
1666 | |
1667 | int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, |
1668 | int status, int tag) |
1669 | { |
1670 | struct iommu_dev_data *dev_data; |
1671 | struct amd_iommu *iommu; |
1672 | struct iommu_cmd cmd; |
1673 | |
1674 | dev_data = dev_iommu_priv_get(dev: &pdev->dev); |
1675 | iommu = get_amd_iommu_from_dev(dev: &pdev->dev); |
1676 | |
1677 | build_complete_ppr(cmd: &cmd, devid: dev_data->devid, pasid, status, |
1678 | tag, gn: dev_data->pri_tlp); |
1679 | |
1680 | return iommu_queue_command(iommu, cmd: &cmd); |
1681 | } |
1682 | |
1683 | /**************************************************************************** |
1684 | * |
1685 | * The next functions belong to the domain allocation. A domain is |
1686 | * allocated for every IOMMU as the default domain. If device isolation |
1687 | * is enabled, every device get its own domain. The most important thing |
1688 | * about domains is the page table mapping the DMA address space they |
1689 | * contain. |
1690 | * |
1691 | ****************************************************************************/ |
1692 | |
1693 | static u16 domain_id_alloc(void) |
1694 | { |
1695 | unsigned long flags; |
1696 | int id; |
1697 | |
1698 | spin_lock_irqsave(&pd_bitmap_lock, flags); |
1699 | id = find_first_zero_bit(addr: amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); |
1700 | BUG_ON(id == 0); |
1701 | if (id > 0 && id < MAX_DOMAIN_ID) |
1702 | __set_bit(id, amd_iommu_pd_alloc_bitmap); |
1703 | else |
1704 | id = 0; |
1705 | spin_unlock_irqrestore(lock: &pd_bitmap_lock, flags); |
1706 | |
1707 | return id; |
1708 | } |
1709 | |
1710 | static void domain_id_free(int id) |
1711 | { |
1712 | unsigned long flags; |
1713 | |
1714 | spin_lock_irqsave(&pd_bitmap_lock, flags); |
1715 | if (id > 0 && id < MAX_DOMAIN_ID) |
1716 | __clear_bit(id, amd_iommu_pd_alloc_bitmap); |
1717 | spin_unlock_irqrestore(lock: &pd_bitmap_lock, flags); |
1718 | } |
1719 | |
1720 | static void free_gcr3_tbl_level1(u64 *tbl) |
1721 | { |
1722 | u64 *ptr; |
1723 | int i; |
1724 | |
1725 | for (i = 0; i < 512; ++i) { |
1726 | if (!(tbl[i] & GCR3_VALID)) |
1727 | continue; |
1728 | |
1729 | ptr = iommu_phys_to_virt(paddr: tbl[i] & PAGE_MASK); |
1730 | |
1731 | free_page((unsigned long)ptr); |
1732 | } |
1733 | } |
1734 | |
1735 | static void free_gcr3_tbl_level2(u64 *tbl) |
1736 | { |
1737 | u64 *ptr; |
1738 | int i; |
1739 | |
1740 | for (i = 0; i < 512; ++i) { |
1741 | if (!(tbl[i] & GCR3_VALID)) |
1742 | continue; |
1743 | |
1744 | ptr = iommu_phys_to_virt(paddr: tbl[i] & PAGE_MASK); |
1745 | |
1746 | free_gcr3_tbl_level1(tbl: ptr); |
1747 | } |
1748 | } |
1749 | |
1750 | static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) |
1751 | { |
1752 | if (gcr3_info->glx == 2) |
1753 | free_gcr3_tbl_level2(tbl: gcr3_info->gcr3_tbl); |
1754 | else if (gcr3_info->glx == 1) |
1755 | free_gcr3_tbl_level1(tbl: gcr3_info->gcr3_tbl); |
1756 | else |
1757 | WARN_ON_ONCE(gcr3_info->glx != 0); |
1758 | |
1759 | gcr3_info->glx = 0; |
1760 | |
1761 | /* Free per device domain ID */ |
1762 | domain_id_free(id: gcr3_info->domid); |
1763 | |
1764 | free_page((unsigned long)gcr3_info->gcr3_tbl); |
1765 | gcr3_info->gcr3_tbl = NULL; |
1766 | } |
1767 | |
1768 | /* |
1769 | * Number of GCR3 table levels required. Level must be 4-Kbyte |
1770 | * page and can contain up to 512 entries. |
1771 | */ |
1772 | static int get_gcr3_levels(int pasids) |
1773 | { |
1774 | int levels; |
1775 | |
1776 | if (pasids == -1) |
1777 | return amd_iommu_max_glx_val; |
1778 | |
1779 | levels = get_count_order(count: pasids); |
1780 | |
1781 | return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels; |
1782 | } |
1783 | |
1784 | static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, |
1785 | struct amd_iommu *iommu, int pasids) |
1786 | { |
1787 | int levels = get_gcr3_levels(pasids); |
1788 | int nid = iommu ? dev_to_node(dev: &iommu->dev->dev) : NUMA_NO_NODE; |
1789 | |
1790 | if (levels > amd_iommu_max_glx_val) |
1791 | return -EINVAL; |
1792 | |
1793 | if (gcr3_info->gcr3_tbl) |
1794 | return -EBUSY; |
1795 | |
1796 | /* Allocate per device domain ID */ |
1797 | gcr3_info->domid = domain_id_alloc(); |
1798 | |
1799 | gcr3_info->gcr3_tbl = alloc_pgtable_page(nid, GFP_ATOMIC); |
1800 | if (gcr3_info->gcr3_tbl == NULL) { |
1801 | domain_id_free(id: gcr3_info->domid); |
1802 | return -ENOMEM; |
1803 | } |
1804 | |
1805 | gcr3_info->glx = levels; |
1806 | |
1807 | return 0; |
1808 | } |
1809 | |
1810 | static u64 *__get_gcr3_pte(struct gcr3_tbl_info *gcr3_info, |
1811 | ioasid_t pasid, bool alloc) |
1812 | { |
1813 | int index; |
1814 | u64 *pte; |
1815 | u64 *root = gcr3_info->gcr3_tbl; |
1816 | int level = gcr3_info->glx; |
1817 | |
1818 | while (true) { |
1819 | |
1820 | index = (pasid >> (9 * level)) & 0x1ff; |
1821 | pte = &root[index]; |
1822 | |
1823 | if (level == 0) |
1824 | break; |
1825 | |
1826 | if (!(*pte & GCR3_VALID)) { |
1827 | if (!alloc) |
1828 | return NULL; |
1829 | |
1830 | root = (void *)get_zeroed_page(GFP_ATOMIC); |
1831 | if (root == NULL) |
1832 | return NULL; |
1833 | |
1834 | *pte = iommu_virt_to_phys(vaddr: root) | GCR3_VALID; |
1835 | } |
1836 | |
1837 | root = iommu_phys_to_virt(paddr: *pte & PAGE_MASK); |
1838 | |
1839 | level -= 1; |
1840 | } |
1841 | |
1842 | return pte; |
1843 | } |
1844 | |
1845 | static int update_gcr3(struct iommu_dev_data *dev_data, |
1846 | ioasid_t pasid, unsigned long gcr3, bool set) |
1847 | { |
1848 | struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; |
1849 | u64 *pte; |
1850 | |
1851 | pte = __get_gcr3_pte(gcr3_info, pasid, alloc: true); |
1852 | if (pte == NULL) |
1853 | return -ENOMEM; |
1854 | |
1855 | if (set) |
1856 | *pte = (gcr3 & PAGE_MASK) | GCR3_VALID; |
1857 | else |
1858 | *pte = 0; |
1859 | |
1860 | amd_iommu_dev_flush_pasid_all(dev_data, pasid); |
1861 | return 0; |
1862 | } |
1863 | |
1864 | int amd_iommu_set_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid, |
1865 | unsigned long gcr3) |
1866 | { |
1867 | struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; |
1868 | int ret; |
1869 | |
1870 | iommu_group_mutex_assert(dev: dev_data->dev); |
1871 | |
1872 | ret = update_gcr3(dev_data, pasid, gcr3, set: true); |
1873 | if (ret) |
1874 | return ret; |
1875 | |
1876 | gcr3_info->pasid_cnt++; |
1877 | return ret; |
1878 | } |
1879 | |
1880 | int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid) |
1881 | { |
1882 | struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; |
1883 | int ret; |
1884 | |
1885 | iommu_group_mutex_assert(dev: dev_data->dev); |
1886 | |
1887 | ret = update_gcr3(dev_data, pasid, gcr3: 0, set: false); |
1888 | if (ret) |
1889 | return ret; |
1890 | |
1891 | gcr3_info->pasid_cnt--; |
1892 | return ret; |
1893 | } |
1894 | |
1895 | static void set_dte_entry(struct amd_iommu *iommu, |
1896 | struct iommu_dev_data *dev_data) |
1897 | { |
1898 | u64 pte_root = 0; |
1899 | u64 flags = 0; |
1900 | u32 old_domid; |
1901 | u16 devid = dev_data->devid; |
1902 | u16 domid; |
1903 | struct protection_domain *domain = dev_data->domain; |
1904 | struct dev_table_entry *dev_table = get_dev_table(iommu); |
1905 | struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info; |
1906 | |
1907 | if (gcr3_info && gcr3_info->gcr3_tbl) |
1908 | domid = dev_data->gcr3_info.domid; |
1909 | else |
1910 | domid = domain->id; |
1911 | |
1912 | if (domain->iop.mode != PAGE_MODE_NONE) |
1913 | pte_root = iommu_virt_to_phys(vaddr: domain->iop.root); |
1914 | |
1915 | pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) |
1916 | << DEV_ENTRY_MODE_SHIFT; |
1917 | |
1918 | pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; |
1919 | |
1920 | /* |
1921 | * When SNP is enabled, Only set TV bit when IOMMU |
1922 | * page translation is in use. |
1923 | */ |
1924 | if (!amd_iommu_snp_en || (domid != 0)) |
1925 | pte_root |= DTE_FLAG_TV; |
1926 | |
1927 | flags = dev_table[devid].data[1]; |
1928 | |
1929 | if (dev_data->ats_enabled) |
1930 | flags |= DTE_FLAG_IOTLB; |
1931 | |
1932 | if (dev_data->ppr) |
1933 | pte_root |= 1ULL << DEV_ENTRY_PPR; |
1934 | |
1935 | if (domain->dirty_tracking) |
1936 | pte_root |= DTE_FLAG_HAD; |
1937 | |
1938 | if (gcr3_info && gcr3_info->gcr3_tbl) { |
1939 | u64 gcr3 = iommu_virt_to_phys(vaddr: gcr3_info->gcr3_tbl); |
1940 | u64 glx = gcr3_info->glx; |
1941 | u64 tmp; |
1942 | |
1943 | pte_root |= DTE_FLAG_GV; |
1944 | pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT; |
1945 | |
1946 | /* First mask out possible old values for GCR3 table */ |
1947 | tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; |
1948 | flags &= ~tmp; |
1949 | |
1950 | tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; |
1951 | flags &= ~tmp; |
1952 | |
1953 | /* Encode GCR3 table into DTE */ |
1954 | tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A; |
1955 | pte_root |= tmp; |
1956 | |
1957 | tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B; |
1958 | flags |= tmp; |
1959 | |
1960 | tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C; |
1961 | flags |= tmp; |
1962 | |
1963 | if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) { |
1964 | dev_table[devid].data[2] |= |
1965 | ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT); |
1966 | } |
1967 | |
1968 | /* GIOV is supported with V2 page table mode only */ |
1969 | if (pdom_is_v2_pgtbl_mode(pdom: domain)) |
1970 | pte_root |= DTE_FLAG_GIOV; |
1971 | } |
1972 | |
1973 | flags &= ~DEV_DOMID_MASK; |
1974 | flags |= domid; |
1975 | |
1976 | old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; |
1977 | dev_table[devid].data[1] = flags; |
1978 | dev_table[devid].data[0] = pte_root; |
1979 | |
1980 | /* |
1981 | * A kdump kernel might be replacing a domain ID that was copied from |
1982 | * the previous kernel--if so, it needs to flush the translation cache |
1983 | * entries for the old domain ID that is being overwritten |
1984 | */ |
1985 | if (old_domid) { |
1986 | amd_iommu_flush_tlb_domid(iommu, dom_id: old_domid); |
1987 | } |
1988 | } |
1989 | |
1990 | static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) |
1991 | { |
1992 | struct dev_table_entry *dev_table = get_dev_table(iommu); |
1993 | |
1994 | /* remove entry from the device table seen by the hardware */ |
1995 | dev_table[devid].data[0] = DTE_FLAG_V; |
1996 | |
1997 | if (!amd_iommu_snp_en) |
1998 | dev_table[devid].data[0] |= DTE_FLAG_TV; |
1999 | |
2000 | dev_table[devid].data[1] &= DTE_FLAG_MASK; |
2001 | |
2002 | amd_iommu_apply_erratum_63(iommu, devid); |
2003 | } |
2004 | |
2005 | static int do_attach(struct iommu_dev_data *dev_data, |
2006 | struct protection_domain *domain) |
2007 | { |
2008 | struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); |
2009 | int ret = 0; |
2010 | |
2011 | /* Update data structures */ |
2012 | dev_data->domain = domain; |
2013 | list_add(new: &dev_data->list, head: &domain->dev_list); |
2014 | |
2015 | /* Update NUMA Node ID */ |
2016 | if (domain->nid == NUMA_NO_NODE) |
2017 | domain->nid = dev_to_node(dev: dev_data->dev); |
2018 | |
2019 | /* Do reference counting */ |
2020 | domain->dev_iommu[iommu->index] += 1; |
2021 | domain->dev_cnt += 1; |
2022 | |
2023 | /* Init GCR3 table and update device table */ |
2024 | if (domain->pd_mode == PD_MODE_V2) { |
2025 | /* By default, setup GCR3 table to support single PASID */ |
2026 | ret = setup_gcr3_table(gcr3_info: &dev_data->gcr3_info, iommu, pasids: 1); |
2027 | if (ret) |
2028 | return ret; |
2029 | |
2030 | ret = update_gcr3(dev_data, pasid: 0, |
2031 | gcr3: iommu_virt_to_phys(vaddr: domain->iop.pgd), set: true); |
2032 | if (ret) { |
2033 | free_gcr3_table(gcr3_info: &dev_data->gcr3_info); |
2034 | return ret; |
2035 | } |
2036 | } |
2037 | |
2038 | /* Update device table */ |
2039 | set_dte_entry(iommu, dev_data); |
2040 | clone_aliases(iommu, dev: dev_data->dev); |
2041 | |
2042 | device_flush_dte(dev_data); |
2043 | |
2044 | return ret; |
2045 | } |
2046 | |
2047 | static void do_detach(struct iommu_dev_data *dev_data) |
2048 | { |
2049 | struct protection_domain *domain = dev_data->domain; |
2050 | struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); |
2051 | |
2052 | /* Clear GCR3 table */ |
2053 | if (domain->pd_mode == PD_MODE_V2) { |
2054 | update_gcr3(dev_data, pasid: 0, gcr3: 0, set: false); |
2055 | free_gcr3_table(gcr3_info: &dev_data->gcr3_info); |
2056 | } |
2057 | |
2058 | /* Update data structures */ |
2059 | dev_data->domain = NULL; |
2060 | list_del(entry: &dev_data->list); |
2061 | clear_dte_entry(iommu, devid: dev_data->devid); |
2062 | clone_aliases(iommu, dev: dev_data->dev); |
2063 | |
2064 | /* Flush the DTE entry */ |
2065 | device_flush_dte(dev_data); |
2066 | |
2067 | /* Flush IOTLB and wait for the flushes to finish */ |
2068 | amd_iommu_domain_flush_all(domain); |
2069 | |
2070 | /* decrease reference counters - needs to happen after the flushes */ |
2071 | domain->dev_iommu[iommu->index] -= 1; |
2072 | domain->dev_cnt -= 1; |
2073 | } |
2074 | |
2075 | /* |
2076 | * If a device is not yet associated with a domain, this function makes the |
2077 | * device visible in the domain |
2078 | */ |
2079 | static int attach_device(struct device *dev, |
2080 | struct protection_domain *domain) |
2081 | { |
2082 | struct iommu_dev_data *dev_data; |
2083 | unsigned long flags; |
2084 | int ret = 0; |
2085 | |
2086 | spin_lock_irqsave(&domain->lock, flags); |
2087 | |
2088 | dev_data = dev_iommu_priv_get(dev); |
2089 | |
2090 | spin_lock(lock: &dev_data->lock); |
2091 | |
2092 | if (dev_data->domain != NULL) { |
2093 | ret = -EBUSY; |
2094 | goto out; |
2095 | } |
2096 | |
2097 | if (dev_is_pci(dev)) |
2098 | pdev_enable_caps(to_pci_dev(dev)); |
2099 | |
2100 | ret = do_attach(dev_data, domain); |
2101 | |
2102 | out: |
2103 | spin_unlock(lock: &dev_data->lock); |
2104 | |
2105 | spin_unlock_irqrestore(lock: &domain->lock, flags); |
2106 | |
2107 | return ret; |
2108 | } |
2109 | |
2110 | /* |
2111 | * Removes a device from a protection domain (with devtable_lock held) |
2112 | */ |
2113 | static void detach_device(struct device *dev) |
2114 | { |
2115 | struct protection_domain *domain; |
2116 | struct iommu_dev_data *dev_data; |
2117 | unsigned long flags; |
2118 | |
2119 | dev_data = dev_iommu_priv_get(dev); |
2120 | domain = dev_data->domain; |
2121 | |
2122 | spin_lock_irqsave(&domain->lock, flags); |
2123 | |
2124 | spin_lock(lock: &dev_data->lock); |
2125 | |
2126 | /* |
2127 | * First check if the device is still attached. It might already |
2128 | * be detached from its domain because the generic |
2129 | * iommu_detach_group code detached it and we try again here in |
2130 | * our alias handling. |
2131 | */ |
2132 | if (WARN_ON(!dev_data->domain)) |
2133 | goto out; |
2134 | |
2135 | do_detach(dev_data); |
2136 | |
2137 | if (dev_is_pci(dev)) |
2138 | pdev_disable_caps(to_pci_dev(dev)); |
2139 | |
2140 | out: |
2141 | spin_unlock(lock: &dev_data->lock); |
2142 | |
2143 | spin_unlock_irqrestore(lock: &domain->lock, flags); |
2144 | } |
2145 | |
2146 | static struct iommu_device *amd_iommu_probe_device(struct device *dev) |
2147 | { |
2148 | struct iommu_device *iommu_dev; |
2149 | struct amd_iommu *iommu; |
2150 | int ret; |
2151 | |
2152 | if (!check_device(dev)) |
2153 | return ERR_PTR(error: -ENODEV); |
2154 | |
2155 | iommu = rlookup_amd_iommu(dev); |
2156 | if (!iommu) |
2157 | return ERR_PTR(error: -ENODEV); |
2158 | |
2159 | /* Not registered yet? */ |
2160 | if (!iommu->iommu.ops) |
2161 | return ERR_PTR(error: -ENODEV); |
2162 | |
2163 | if (dev_iommu_priv_get(dev)) |
2164 | return &iommu->iommu; |
2165 | |
2166 | ret = iommu_init_device(iommu, dev); |
2167 | if (ret) { |
2168 | dev_err(dev, "Failed to initialize - trying to proceed anyway\n" ); |
2169 | iommu_dev = ERR_PTR(error: ret); |
2170 | iommu_ignore_device(iommu, dev); |
2171 | } else { |
2172 | amd_iommu_set_pci_msi_domain(dev, iommu); |
2173 | iommu_dev = &iommu->iommu; |
2174 | } |
2175 | |
2176 | iommu_completion_wait(iommu); |
2177 | |
2178 | return iommu_dev; |
2179 | } |
2180 | |
2181 | static void amd_iommu_probe_finalize(struct device *dev) |
2182 | { |
2183 | /* Domains are initialized for this device - have a look what we ended up with */ |
2184 | set_dma_ops(dev, NULL); |
2185 | iommu_setup_dma_ops(dev, dma_base: 0, U64_MAX); |
2186 | } |
2187 | |
2188 | static void amd_iommu_release_device(struct device *dev) |
2189 | { |
2190 | struct amd_iommu *iommu; |
2191 | |
2192 | if (!check_device(dev)) |
2193 | return; |
2194 | |
2195 | iommu = rlookup_amd_iommu(dev); |
2196 | if (!iommu) |
2197 | return; |
2198 | |
2199 | amd_iommu_uninit_device(dev); |
2200 | iommu_completion_wait(iommu); |
2201 | } |
2202 | |
2203 | static struct iommu_group *amd_iommu_device_group(struct device *dev) |
2204 | { |
2205 | if (dev_is_pci(dev)) |
2206 | return pci_device_group(dev); |
2207 | |
2208 | return acpihid_device_group(dev); |
2209 | } |
2210 | |
2211 | /***************************************************************************** |
2212 | * |
2213 | * The following functions belong to the exported interface of AMD IOMMU |
2214 | * |
2215 | * This interface allows access to lower level functions of the IOMMU |
2216 | * like protection domain handling and assignement of devices to domains |
2217 | * which is not possible with the dma_ops interface. |
2218 | * |
2219 | *****************************************************************************/ |
2220 | |
2221 | static void cleanup_domain(struct protection_domain *domain) |
2222 | { |
2223 | struct iommu_dev_data *entry; |
2224 | |
2225 | lockdep_assert_held(&domain->lock); |
2226 | |
2227 | if (!domain->dev_cnt) |
2228 | return; |
2229 | |
2230 | while (!list_empty(head: &domain->dev_list)) { |
2231 | entry = list_first_entry(&domain->dev_list, |
2232 | struct iommu_dev_data, list); |
2233 | BUG_ON(!entry->domain); |
2234 | do_detach(dev_data: entry); |
2235 | } |
2236 | WARN_ON(domain->dev_cnt != 0); |
2237 | } |
2238 | |
2239 | static void protection_domain_free(struct protection_domain *domain) |
2240 | { |
2241 | if (!domain) |
2242 | return; |
2243 | |
2244 | if (domain->iop.pgtbl_cfg.tlb) |
2245 | free_io_pgtable_ops(ops: &domain->iop.iop.ops); |
2246 | |
2247 | if (domain->iop.root) |
2248 | free_page((unsigned long)domain->iop.root); |
2249 | |
2250 | if (domain->id) |
2251 | domain_id_free(id: domain->id); |
2252 | |
2253 | kfree(objp: domain); |
2254 | } |
2255 | |
2256 | static int protection_domain_init_v1(struct protection_domain *domain, int mode) |
2257 | { |
2258 | u64 *pt_root = NULL; |
2259 | |
2260 | BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); |
2261 | |
2262 | if (mode != PAGE_MODE_NONE) { |
2263 | pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
2264 | if (!pt_root) |
2265 | return -ENOMEM; |
2266 | } |
2267 | |
2268 | domain->pd_mode = PD_MODE_V1; |
2269 | amd_iommu_domain_set_pgtable(domain, root: pt_root, mode); |
2270 | |
2271 | return 0; |
2272 | } |
2273 | |
2274 | static int protection_domain_init_v2(struct protection_domain *pdom) |
2275 | { |
2276 | pdom->pd_mode = PD_MODE_V2; |
2277 | pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; |
2278 | |
2279 | return 0; |
2280 | } |
2281 | |
2282 | static struct protection_domain *protection_domain_alloc(unsigned int type) |
2283 | { |
2284 | struct io_pgtable_ops *pgtbl_ops; |
2285 | struct protection_domain *domain; |
2286 | int pgtable; |
2287 | int ret; |
2288 | |
2289 | domain = kzalloc(size: sizeof(*domain), GFP_KERNEL); |
2290 | if (!domain) |
2291 | return NULL; |
2292 | |
2293 | domain->id = domain_id_alloc(); |
2294 | if (!domain->id) |
2295 | goto out_err; |
2296 | |
2297 | spin_lock_init(&domain->lock); |
2298 | INIT_LIST_HEAD(list: &domain->dev_list); |
2299 | domain->nid = NUMA_NO_NODE; |
2300 | |
2301 | switch (type) { |
2302 | /* No need to allocate io pgtable ops in passthrough mode */ |
2303 | case IOMMU_DOMAIN_IDENTITY: |
2304 | return domain; |
2305 | case IOMMU_DOMAIN_DMA: |
2306 | pgtable = amd_iommu_pgtable; |
2307 | break; |
2308 | /* |
2309 | * Force IOMMU v1 page table when allocating |
2310 | * domain for pass-through devices. |
2311 | */ |
2312 | case IOMMU_DOMAIN_UNMANAGED: |
2313 | pgtable = AMD_IOMMU_V1; |
2314 | break; |
2315 | default: |
2316 | goto out_err; |
2317 | } |
2318 | |
2319 | switch (pgtable) { |
2320 | case AMD_IOMMU_V1: |
2321 | ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL); |
2322 | break; |
2323 | case AMD_IOMMU_V2: |
2324 | ret = protection_domain_init_v2(pdom: domain); |
2325 | break; |
2326 | default: |
2327 | ret = -EINVAL; |
2328 | break; |
2329 | } |
2330 | |
2331 | if (ret) |
2332 | goto out_err; |
2333 | |
2334 | pgtbl_ops = alloc_io_pgtable_ops(fmt: pgtable, cfg: &domain->iop.pgtbl_cfg, cookie: domain); |
2335 | if (!pgtbl_ops) |
2336 | goto out_err; |
2337 | |
2338 | return domain; |
2339 | out_err: |
2340 | protection_domain_free(domain); |
2341 | return NULL; |
2342 | } |
2343 | |
2344 | static inline u64 dma_max_address(void) |
2345 | { |
2346 | if (amd_iommu_pgtable == AMD_IOMMU_V1) |
2347 | return ~0ULL; |
2348 | |
2349 | /* V2 with 4/5 level page table */ |
2350 | return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); |
2351 | } |
2352 | |
2353 | static bool amd_iommu_hd_support(struct amd_iommu *iommu) |
2354 | { |
2355 | return iommu && (iommu->features & FEATURE_HDSUP); |
2356 | } |
2357 | |
2358 | static struct iommu_domain *do_iommu_domain_alloc(unsigned int type, |
2359 | struct device *dev, u32 flags) |
2360 | { |
2361 | bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; |
2362 | struct protection_domain *domain; |
2363 | struct amd_iommu *iommu = NULL; |
2364 | |
2365 | if (dev) |
2366 | iommu = get_amd_iommu_from_dev(dev); |
2367 | |
2368 | /* |
2369 | * Since DTE[Mode]=0 is prohibited on SNP-enabled system, |
2370 | * default to use IOMMU_DOMAIN_DMA[_FQ]. |
2371 | */ |
2372 | if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY)) |
2373 | return ERR_PTR(error: -EINVAL); |
2374 | |
2375 | if (dirty_tracking && !amd_iommu_hd_support(iommu)) |
2376 | return ERR_PTR(error: -EOPNOTSUPP); |
2377 | |
2378 | domain = protection_domain_alloc(type); |
2379 | if (!domain) |
2380 | return ERR_PTR(error: -ENOMEM); |
2381 | |
2382 | domain->domain.geometry.aperture_start = 0; |
2383 | domain->domain.geometry.aperture_end = dma_max_address(); |
2384 | domain->domain.geometry.force_aperture = true; |
2385 | |
2386 | if (iommu) { |
2387 | domain->domain.type = type; |
2388 | domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap; |
2389 | domain->domain.ops = iommu->iommu.ops->default_domain_ops; |
2390 | |
2391 | if (dirty_tracking) |
2392 | domain->domain.dirty_ops = &amd_dirty_ops; |
2393 | } |
2394 | |
2395 | return &domain->domain; |
2396 | } |
2397 | |
2398 | static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type) |
2399 | { |
2400 | struct iommu_domain *domain; |
2401 | |
2402 | domain = do_iommu_domain_alloc(type, NULL, flags: 0); |
2403 | if (IS_ERR(ptr: domain)) |
2404 | return NULL; |
2405 | |
2406 | return domain; |
2407 | } |
2408 | |
2409 | static struct iommu_domain * |
2410 | amd_iommu_domain_alloc_user(struct device *dev, u32 flags, |
2411 | struct iommu_domain *parent, |
2412 | const struct iommu_user_data *user_data) |
2413 | |
2414 | { |
2415 | unsigned int type = IOMMU_DOMAIN_UNMANAGED; |
2416 | |
2417 | if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data) |
2418 | return ERR_PTR(error: -EOPNOTSUPP); |
2419 | |
2420 | return do_iommu_domain_alloc(type, dev, flags); |
2421 | } |
2422 | |
2423 | static void amd_iommu_domain_free(struct iommu_domain *dom) |
2424 | { |
2425 | struct protection_domain *domain; |
2426 | unsigned long flags; |
2427 | |
2428 | if (!dom) |
2429 | return; |
2430 | |
2431 | domain = to_pdomain(dom); |
2432 | |
2433 | spin_lock_irqsave(&domain->lock, flags); |
2434 | |
2435 | cleanup_domain(domain); |
2436 | |
2437 | spin_unlock_irqrestore(lock: &domain->lock, flags); |
2438 | |
2439 | protection_domain_free(domain); |
2440 | } |
2441 | |
2442 | static int amd_iommu_attach_device(struct iommu_domain *dom, |
2443 | struct device *dev) |
2444 | { |
2445 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); |
2446 | struct protection_domain *domain = to_pdomain(dom); |
2447 | struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); |
2448 | int ret; |
2449 | |
2450 | /* |
2451 | * Skip attach device to domain if new domain is same as |
2452 | * devices current domain |
2453 | */ |
2454 | if (dev_data->domain == domain) |
2455 | return 0; |
2456 | |
2457 | dev_data->defer_attach = false; |
2458 | |
2459 | /* |
2460 | * Restrict to devices with compatible IOMMU hardware support |
2461 | * when enforcement of dirty tracking is enabled. |
2462 | */ |
2463 | if (dom->dirty_ops && !amd_iommu_hd_support(iommu)) |
2464 | return -EINVAL; |
2465 | |
2466 | if (dev_data->domain) |
2467 | detach_device(dev); |
2468 | |
2469 | ret = attach_device(dev, domain); |
2470 | |
2471 | #ifdef CONFIG_IRQ_REMAP |
2472 | if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { |
2473 | if (dom->type == IOMMU_DOMAIN_UNMANAGED) |
2474 | dev_data->use_vapic = 1; |
2475 | else |
2476 | dev_data->use_vapic = 0; |
2477 | } |
2478 | #endif |
2479 | |
2480 | iommu_completion_wait(iommu); |
2481 | |
2482 | return ret; |
2483 | } |
2484 | |
2485 | static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom, |
2486 | unsigned long iova, size_t size) |
2487 | { |
2488 | struct protection_domain *domain = to_pdomain(dom); |
2489 | struct io_pgtable_ops *ops = &domain->iop.iop.ops; |
2490 | |
2491 | if (ops->map_pages) |
2492 | domain_flush_np_cache(domain, iova, size); |
2493 | return 0; |
2494 | } |
2495 | |
2496 | static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, |
2497 | phys_addr_t paddr, size_t pgsize, size_t pgcount, |
2498 | int iommu_prot, gfp_t gfp, size_t *mapped) |
2499 | { |
2500 | struct protection_domain *domain = to_pdomain(dom); |
2501 | struct io_pgtable_ops *ops = &domain->iop.iop.ops; |
2502 | int prot = 0; |
2503 | int ret = -EINVAL; |
2504 | |
2505 | if ((domain->pd_mode == PD_MODE_V1) && |
2506 | (domain->iop.mode == PAGE_MODE_NONE)) |
2507 | return -EINVAL; |
2508 | |
2509 | if (iommu_prot & IOMMU_READ) |
2510 | prot |= IOMMU_PROT_IR; |
2511 | if (iommu_prot & IOMMU_WRITE) |
2512 | prot |= IOMMU_PROT_IW; |
2513 | |
2514 | if (ops->map_pages) { |
2515 | ret = ops->map_pages(ops, iova, paddr, pgsize, |
2516 | pgcount, prot, gfp, mapped); |
2517 | } |
2518 | |
2519 | return ret; |
2520 | } |
2521 | |
2522 | static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain, |
2523 | struct iommu_iotlb_gather *gather, |
2524 | unsigned long iova, size_t size) |
2525 | { |
2526 | /* |
2527 | * AMD's IOMMU can flush as many pages as necessary in a single flush. |
2528 | * Unless we run in a virtual machine, which can be inferred according |
2529 | * to whether "non-present cache" is on, it is probably best to prefer |
2530 | * (potentially) too extensive TLB flushing (i.e., more misses) over |
2531 | * mutliple TLB flushes (i.e., more flushes). For virtual machines the |
2532 | * hypervisor needs to synchronize the host IOMMU PTEs with those of |
2533 | * the guest, and the trade-off is different: unnecessary TLB flushes |
2534 | * should be avoided. |
2535 | */ |
2536 | if (amd_iommu_np_cache && |
2537 | iommu_iotlb_gather_is_disjoint(gather, iova, size)) |
2538 | iommu_iotlb_sync(domain, iotlb_gather: gather); |
2539 | |
2540 | iommu_iotlb_gather_add_range(gather, iova, size); |
2541 | } |
2542 | |
2543 | static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova, |
2544 | size_t pgsize, size_t pgcount, |
2545 | struct iommu_iotlb_gather *gather) |
2546 | { |
2547 | struct protection_domain *domain = to_pdomain(dom); |
2548 | struct io_pgtable_ops *ops = &domain->iop.iop.ops; |
2549 | size_t r; |
2550 | |
2551 | if ((domain->pd_mode == PD_MODE_V1) && |
2552 | (domain->iop.mode == PAGE_MODE_NONE)) |
2553 | return 0; |
2554 | |
2555 | r = (ops->unmap_pages) ? ops->unmap_pages(ops, iova, pgsize, pgcount, NULL) : 0; |
2556 | |
2557 | if (r) |
2558 | amd_iommu_iotlb_gather_add_page(domain: dom, gather, iova, size: r); |
2559 | |
2560 | return r; |
2561 | } |
2562 | |
2563 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
2564 | dma_addr_t iova) |
2565 | { |
2566 | struct protection_domain *domain = to_pdomain(dom); |
2567 | struct io_pgtable_ops *ops = &domain->iop.iop.ops; |
2568 | |
2569 | return ops->iova_to_phys(ops, iova); |
2570 | } |
2571 | |
2572 | static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) |
2573 | { |
2574 | switch (cap) { |
2575 | case IOMMU_CAP_CACHE_COHERENCY: |
2576 | return true; |
2577 | case IOMMU_CAP_NOEXEC: |
2578 | return false; |
2579 | case IOMMU_CAP_PRE_BOOT_PROTECTION: |
2580 | return amdr_ivrs_remap_support; |
2581 | case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: |
2582 | return true; |
2583 | case IOMMU_CAP_DEFERRED_FLUSH: |
2584 | return true; |
2585 | case IOMMU_CAP_DIRTY_TRACKING: { |
2586 | struct amd_iommu *iommu = get_amd_iommu_from_dev(dev); |
2587 | |
2588 | return amd_iommu_hd_support(iommu); |
2589 | } |
2590 | default: |
2591 | break; |
2592 | } |
2593 | |
2594 | return false; |
2595 | } |
2596 | |
2597 | static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain, |
2598 | bool enable) |
2599 | { |
2600 | struct protection_domain *pdomain = to_pdomain(dom: domain); |
2601 | struct dev_table_entry *dev_table; |
2602 | struct iommu_dev_data *dev_data; |
2603 | bool domain_flush = false; |
2604 | struct amd_iommu *iommu; |
2605 | unsigned long flags; |
2606 | u64 pte_root; |
2607 | |
2608 | spin_lock_irqsave(&pdomain->lock, flags); |
2609 | if (!(pdomain->dirty_tracking ^ enable)) { |
2610 | spin_unlock_irqrestore(lock: &pdomain->lock, flags); |
2611 | return 0; |
2612 | } |
2613 | |
2614 | list_for_each_entry(dev_data, &pdomain->dev_list, list) { |
2615 | iommu = get_amd_iommu_from_dev_data(dev_data); |
2616 | |
2617 | dev_table = get_dev_table(iommu); |
2618 | pte_root = dev_table[dev_data->devid].data[0]; |
2619 | |
2620 | pte_root = (enable ? pte_root | DTE_FLAG_HAD : |
2621 | pte_root & ~DTE_FLAG_HAD); |
2622 | |
2623 | /* Flush device DTE */ |
2624 | dev_table[dev_data->devid].data[0] = pte_root; |
2625 | device_flush_dte(dev_data); |
2626 | domain_flush = true; |
2627 | } |
2628 | |
2629 | /* Flush IOTLB to mark IOPTE dirty on the next translation(s) */ |
2630 | if (domain_flush) |
2631 | amd_iommu_domain_flush_all(domain: pdomain); |
2632 | |
2633 | pdomain->dirty_tracking = enable; |
2634 | spin_unlock_irqrestore(lock: &pdomain->lock, flags); |
2635 | |
2636 | return 0; |
2637 | } |
2638 | |
2639 | static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain, |
2640 | unsigned long iova, size_t size, |
2641 | unsigned long flags, |
2642 | struct iommu_dirty_bitmap *dirty) |
2643 | { |
2644 | struct protection_domain *pdomain = to_pdomain(dom: domain); |
2645 | struct io_pgtable_ops *ops = &pdomain->iop.iop.ops; |
2646 | unsigned long lflags; |
2647 | |
2648 | if (!ops || !ops->read_and_clear_dirty) |
2649 | return -EOPNOTSUPP; |
2650 | |
2651 | spin_lock_irqsave(&pdomain->lock, lflags); |
2652 | if (!pdomain->dirty_tracking && dirty->bitmap) { |
2653 | spin_unlock_irqrestore(lock: &pdomain->lock, flags: lflags); |
2654 | return -EINVAL; |
2655 | } |
2656 | spin_unlock_irqrestore(lock: &pdomain->lock, flags: lflags); |
2657 | |
2658 | return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); |
2659 | } |
2660 | |
2661 | static void amd_iommu_get_resv_regions(struct device *dev, |
2662 | struct list_head *head) |
2663 | { |
2664 | struct iommu_resv_region *region; |
2665 | struct unity_map_entry *entry; |
2666 | struct amd_iommu *iommu; |
2667 | struct amd_iommu_pci_seg *pci_seg; |
2668 | int devid, sbdf; |
2669 | |
2670 | sbdf = get_device_sbdf_id(dev); |
2671 | if (sbdf < 0) |
2672 | return; |
2673 | |
2674 | devid = PCI_SBDF_TO_DEVID(sbdf); |
2675 | iommu = get_amd_iommu_from_dev(dev); |
2676 | pci_seg = iommu->pci_seg; |
2677 | |
2678 | list_for_each_entry(entry, &pci_seg->unity_map, list) { |
2679 | int type, prot = 0; |
2680 | size_t length; |
2681 | |
2682 | if (devid < entry->devid_start || devid > entry->devid_end) |
2683 | continue; |
2684 | |
2685 | type = IOMMU_RESV_DIRECT; |
2686 | length = entry->address_end - entry->address_start; |
2687 | if (entry->prot & IOMMU_PROT_IR) |
2688 | prot |= IOMMU_READ; |
2689 | if (entry->prot & IOMMU_PROT_IW) |
2690 | prot |= IOMMU_WRITE; |
2691 | if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE) |
2692 | /* Exclusion range */ |
2693 | type = IOMMU_RESV_RESERVED; |
2694 | |
2695 | region = iommu_alloc_resv_region(start: entry->address_start, |
2696 | length, prot, type, |
2697 | GFP_KERNEL); |
2698 | if (!region) { |
2699 | dev_err(dev, "Out of memory allocating dm-regions\n" ); |
2700 | return; |
2701 | } |
2702 | list_add_tail(new: ®ion->list, head); |
2703 | } |
2704 | |
2705 | region = iommu_alloc_resv_region(MSI_RANGE_START, |
2706 | MSI_RANGE_END - MSI_RANGE_START + 1, |
2707 | prot: 0, type: IOMMU_RESV_MSI, GFP_KERNEL); |
2708 | if (!region) |
2709 | return; |
2710 | list_add_tail(new: ®ion->list, head); |
2711 | |
2712 | region = iommu_alloc_resv_region(HT_RANGE_START, |
2713 | HT_RANGE_END - HT_RANGE_START + 1, |
2714 | prot: 0, type: IOMMU_RESV_RESERVED, GFP_KERNEL); |
2715 | if (!region) |
2716 | return; |
2717 | list_add_tail(new: ®ion->list, head); |
2718 | } |
2719 | |
2720 | bool amd_iommu_is_attach_deferred(struct device *dev) |
2721 | { |
2722 | struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); |
2723 | |
2724 | return dev_data->defer_attach; |
2725 | } |
2726 | |
2727 | static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) |
2728 | { |
2729 | struct protection_domain *dom = to_pdomain(dom: domain); |
2730 | unsigned long flags; |
2731 | |
2732 | spin_lock_irqsave(&dom->lock, flags); |
2733 | amd_iommu_domain_flush_all(domain: dom); |
2734 | spin_unlock_irqrestore(lock: &dom->lock, flags); |
2735 | } |
2736 | |
2737 | static void amd_iommu_iotlb_sync(struct iommu_domain *domain, |
2738 | struct iommu_iotlb_gather *gather) |
2739 | { |
2740 | struct protection_domain *dom = to_pdomain(dom: domain); |
2741 | unsigned long flags; |
2742 | |
2743 | spin_lock_irqsave(&dom->lock, flags); |
2744 | amd_iommu_domain_flush_pages(domain: dom, address: gather->start, |
2745 | size: gather->end - gather->start + 1); |
2746 | spin_unlock_irqrestore(lock: &dom->lock, flags); |
2747 | } |
2748 | |
2749 | static int amd_iommu_def_domain_type(struct device *dev) |
2750 | { |
2751 | struct iommu_dev_data *dev_data; |
2752 | |
2753 | dev_data = dev_iommu_priv_get(dev); |
2754 | if (!dev_data) |
2755 | return 0; |
2756 | |
2757 | /* |
2758 | * Do not identity map IOMMUv2 capable devices when: |
2759 | * - memory encryption is active, because some of those devices |
2760 | * (AMD GPUs) don't have the encryption bit in their DMA-mask |
2761 | * and require remapping. |
2762 | * - SNP is enabled, because it prohibits DTE[Mode]=0. |
2763 | */ |
2764 | if (pdev_pasid_supported(dev_data) && |
2765 | !cc_platform_has(attr: CC_ATTR_MEM_ENCRYPT) && |
2766 | !amd_iommu_snp_en) { |
2767 | return IOMMU_DOMAIN_IDENTITY; |
2768 | } |
2769 | |
2770 | return 0; |
2771 | } |
2772 | |
2773 | static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain) |
2774 | { |
2775 | /* IOMMU_PTE_FC is always set */ |
2776 | return true; |
2777 | } |
2778 | |
2779 | static const struct iommu_dirty_ops amd_dirty_ops = { |
2780 | .set_dirty_tracking = amd_iommu_set_dirty_tracking, |
2781 | .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, |
2782 | }; |
2783 | |
2784 | const struct iommu_ops amd_iommu_ops = { |
2785 | .capable = amd_iommu_capable, |
2786 | .domain_alloc = amd_iommu_domain_alloc, |
2787 | .domain_alloc_user = amd_iommu_domain_alloc_user, |
2788 | .probe_device = amd_iommu_probe_device, |
2789 | .release_device = amd_iommu_release_device, |
2790 | .probe_finalize = amd_iommu_probe_finalize, |
2791 | .device_group = amd_iommu_device_group, |
2792 | .get_resv_regions = amd_iommu_get_resv_regions, |
2793 | .is_attach_deferred = amd_iommu_is_attach_deferred, |
2794 | .pgsize_bitmap = AMD_IOMMU_PGSIZES, |
2795 | .def_domain_type = amd_iommu_def_domain_type, |
2796 | .default_domain_ops = &(const struct iommu_domain_ops) { |
2797 | .attach_dev = amd_iommu_attach_device, |
2798 | .map_pages = amd_iommu_map_pages, |
2799 | .unmap_pages = amd_iommu_unmap_pages, |
2800 | .iotlb_sync_map = amd_iommu_iotlb_sync_map, |
2801 | .iova_to_phys = amd_iommu_iova_to_phys, |
2802 | .flush_iotlb_all = amd_iommu_flush_iotlb_all, |
2803 | .iotlb_sync = amd_iommu_iotlb_sync, |
2804 | .free = amd_iommu_domain_free, |
2805 | .enforce_cache_coherency = amd_iommu_enforce_cache_coherency, |
2806 | } |
2807 | }; |
2808 | |
2809 | #ifdef CONFIG_IRQ_REMAP |
2810 | |
2811 | /***************************************************************************** |
2812 | * |
2813 | * Interrupt Remapping Implementation |
2814 | * |
2815 | *****************************************************************************/ |
2816 | |
2817 | static struct irq_chip amd_ir_chip; |
2818 | static DEFINE_SPINLOCK(iommu_table_lock); |
2819 | |
2820 | static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid) |
2821 | { |
2822 | int ret; |
2823 | u64 data; |
2824 | unsigned long flags; |
2825 | struct iommu_cmd cmd, cmd2; |
2826 | |
2827 | if (iommu->irtcachedis_enabled) |
2828 | return; |
2829 | |
2830 | build_inv_irt(cmd: &cmd, devid); |
2831 | data = atomic64_add_return(i: 1, v: &iommu->cmd_sem_val); |
2832 | build_completion_wait(cmd: &cmd2, iommu, data); |
2833 | |
2834 | raw_spin_lock_irqsave(&iommu->lock, flags); |
2835 | ret = __iommu_queue_command_sync(iommu, cmd: &cmd, sync: true); |
2836 | if (ret) |
2837 | goto out; |
2838 | ret = __iommu_queue_command_sync(iommu, cmd: &cmd2, sync: false); |
2839 | if (ret) |
2840 | goto out; |
2841 | wait_on_sem(iommu, data); |
2842 | out: |
2843 | raw_spin_unlock_irqrestore(&iommu->lock, flags); |
2844 | } |
2845 | |
2846 | static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, |
2847 | struct irq_remap_table *table) |
2848 | { |
2849 | u64 dte; |
2850 | struct dev_table_entry *dev_table = get_dev_table(iommu); |
2851 | |
2852 | dte = dev_table[devid].data[2]; |
2853 | dte &= ~DTE_IRQ_PHYS_ADDR_MASK; |
2854 | dte |= iommu_virt_to_phys(vaddr: table->table); |
2855 | dte |= DTE_IRQ_REMAP_INTCTL; |
2856 | dte |= DTE_INTTABLEN; |
2857 | dte |= DTE_IRQ_REMAP_ENABLE; |
2858 | |
2859 | dev_table[devid].data[2] = dte; |
2860 | } |
2861 | |
2862 | static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) |
2863 | { |
2864 | struct irq_remap_table *table; |
2865 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
2866 | |
2867 | if (WARN_ONCE(!pci_seg->rlookup_table[devid], |
2868 | "%s: no iommu for devid %x:%x\n" , |
2869 | __func__, pci_seg->id, devid)) |
2870 | return NULL; |
2871 | |
2872 | table = pci_seg->irq_lookup_table[devid]; |
2873 | if (WARN_ONCE(!table, "%s: no table for devid %x:%x\n" , |
2874 | __func__, pci_seg->id, devid)) |
2875 | return NULL; |
2876 | |
2877 | return table; |
2878 | } |
2879 | |
2880 | static struct irq_remap_table *__alloc_irq_table(void) |
2881 | { |
2882 | struct irq_remap_table *table; |
2883 | |
2884 | table = kzalloc(size: sizeof(*table), GFP_KERNEL); |
2885 | if (!table) |
2886 | return NULL; |
2887 | |
2888 | table->table = kmem_cache_alloc(cachep: amd_iommu_irq_cache, GFP_KERNEL); |
2889 | if (!table->table) { |
2890 | kfree(objp: table); |
2891 | return NULL; |
2892 | } |
2893 | raw_spin_lock_init(&table->lock); |
2894 | |
2895 | if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) |
2896 | memset(table->table, 0, |
2897 | MAX_IRQS_PER_TABLE * sizeof(u32)); |
2898 | else |
2899 | memset(table->table, 0, |
2900 | (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); |
2901 | return table; |
2902 | } |
2903 | |
2904 | static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, |
2905 | struct irq_remap_table *table) |
2906 | { |
2907 | struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; |
2908 | |
2909 | pci_seg->irq_lookup_table[devid] = table; |
2910 | set_dte_irq_entry(iommu, devid, table); |
2911 | iommu_flush_dte(iommu, devid); |
2912 | } |
2913 | |
2914 | static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, |
2915 | void *data) |
2916 | { |
2917 | struct irq_remap_table *table = data; |
2918 | struct amd_iommu_pci_seg *pci_seg; |
2919 | struct amd_iommu *iommu = rlookup_amd_iommu(dev: &pdev->dev); |
2920 | |
2921 | if (!iommu) |
2922 | return -EINVAL; |
2923 | |
2924 | pci_seg = iommu->pci_seg; |
2925 | pci_seg->irq_lookup_table[alias] = table; |
2926 | set_dte_irq_entry(iommu, devid: alias, table); |
2927 | iommu_flush_dte(iommu: pci_seg->rlookup_table[alias], devid: alias); |
2928 | |
2929 | return 0; |
2930 | } |
2931 | |
2932 | static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, |
2933 | u16 devid, struct pci_dev *pdev) |
2934 | { |
2935 | struct irq_remap_table *table = NULL; |
2936 | struct irq_remap_table *new_table = NULL; |
2937 | struct amd_iommu_pci_seg *pci_seg; |
2938 | unsigned long flags; |
2939 | u16 alias; |
2940 | |
2941 | spin_lock_irqsave(&iommu_table_lock, flags); |
2942 | |
2943 | pci_seg = iommu->pci_seg; |
2944 | table = pci_seg->irq_lookup_table[devid]; |
2945 | if (table) |
2946 | goto out_unlock; |
2947 | |
2948 | alias = pci_seg->alias_table[devid]; |
2949 | table = pci_seg->irq_lookup_table[alias]; |
2950 | if (table) { |
2951 | set_remap_table_entry(iommu, devid, table); |
2952 | goto out_wait; |
2953 | } |
2954 | spin_unlock_irqrestore(lock: &iommu_table_lock, flags); |
2955 | |
2956 | /* Nothing there yet, allocate new irq remapping table */ |
2957 | new_table = __alloc_irq_table(); |
2958 | if (!new_table) |
2959 | return NULL; |
2960 | |
2961 | spin_lock_irqsave(&iommu_table_lock, flags); |
2962 | |
2963 | table = pci_seg->irq_lookup_table[devid]; |
2964 | if (table) |
2965 | goto out_unlock; |
2966 | |
2967 | table = pci_seg->irq_lookup_table[alias]; |
2968 | if (table) { |
2969 | set_remap_table_entry(iommu, devid, table); |
2970 | goto out_wait; |
2971 | } |
2972 | |
2973 | table = new_table; |
2974 | new_table = NULL; |
2975 | |
2976 | if (pdev) |
2977 | pci_for_each_dma_alias(pdev, fn: set_remap_table_entry_alias, |
2978 | data: table); |
2979 | else |
2980 | set_remap_table_entry(iommu, devid, table); |
2981 | |
2982 | if (devid != alias) |
2983 | set_remap_table_entry(iommu, devid: alias, table); |
2984 | |
2985 | out_wait: |
2986 | iommu_completion_wait(iommu); |
2987 | |
2988 | out_unlock: |
2989 | spin_unlock_irqrestore(lock: &iommu_table_lock, flags); |
2990 | |
2991 | if (new_table) { |
2992 | kmem_cache_free(s: amd_iommu_irq_cache, objp: new_table->table); |
2993 | kfree(objp: new_table); |
2994 | } |
2995 | return table; |
2996 | } |
2997 | |
2998 | static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, |
2999 | bool align, struct pci_dev *pdev) |
3000 | { |
3001 | struct irq_remap_table *table; |
3002 | int index, c, alignment = 1; |
3003 | unsigned long flags; |
3004 | |
3005 | table = alloc_irq_table(iommu, devid, pdev); |
3006 | if (!table) |
3007 | return -ENODEV; |
3008 | |
3009 | if (align) |
3010 | alignment = roundup_pow_of_two(count); |
3011 | |
3012 | raw_spin_lock_irqsave(&table->lock, flags); |
3013 | |
3014 | /* Scan table for free entries */ |
3015 | for (index = ALIGN(table->min_index, alignment), c = 0; |
3016 | index < MAX_IRQS_PER_TABLE;) { |
3017 | if (!iommu->irte_ops->is_allocated(table, index)) { |
3018 | c += 1; |
3019 | } else { |
3020 | c = 0; |
3021 | index = ALIGN(index + 1, alignment); |
3022 | continue; |
3023 | } |
3024 | |
3025 | if (c == count) { |
3026 | for (; c != 0; --c) |
3027 | iommu->irte_ops->set_allocated(table, index - c + 1); |
3028 | |
3029 | index -= count - 1; |
3030 | goto out; |
3031 | } |
3032 | |
3033 | index++; |
3034 | } |
3035 | |
3036 | index = -ENOSPC; |
3037 | |
3038 | out: |
3039 | raw_spin_unlock_irqrestore(&table->lock, flags); |
3040 | |
3041 | return index; |
3042 | } |
3043 | |
3044 | static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, |
3045 | struct irte_ga *irte) |
3046 | { |
3047 | struct irq_remap_table *table; |
3048 | struct irte_ga *entry; |
3049 | unsigned long flags; |
3050 | u128 old; |
3051 | |
3052 | table = get_irq_table(iommu, devid); |
3053 | if (!table) |
3054 | return -ENOMEM; |
3055 | |
3056 | raw_spin_lock_irqsave(&table->lock, flags); |
3057 | |
3058 | entry = (struct irte_ga *)table->table; |
3059 | entry = &entry[index]; |
3060 | |
3061 | /* |
3062 | * We use cmpxchg16 to atomically update the 128-bit IRTE, |
3063 | * and it cannot be updated by the hardware or other processors |
3064 | * behind us, so the return value of cmpxchg16 should be the |
3065 | * same as the old value. |
3066 | */ |
3067 | old = entry->irte; |
3068 | WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte)); |
3069 | |
3070 | raw_spin_unlock_irqrestore(&table->lock, flags); |
3071 | |
3072 | return 0; |
3073 | } |
3074 | |
3075 | static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, |
3076 | struct irte_ga *irte) |
3077 | { |
3078 | bool ret; |
3079 | |
3080 | ret = __modify_irte_ga(iommu, devid, index, irte); |
3081 | if (ret) |
3082 | return ret; |
3083 | |
3084 | iommu_flush_irt_and_complete(iommu, devid); |
3085 | |
3086 | return 0; |
3087 | } |
3088 | |
3089 | static int modify_irte(struct amd_iommu *iommu, |
3090 | u16 devid, int index, union irte *irte) |
3091 | { |
3092 | struct irq_remap_table *table; |
3093 | unsigned long flags; |
3094 | |
3095 | table = get_irq_table(iommu, devid); |
3096 | if (!table) |
3097 | return -ENOMEM; |
3098 | |
3099 | raw_spin_lock_irqsave(&table->lock, flags); |
3100 | table->table[index] = irte->val; |
3101 | raw_spin_unlock_irqrestore(&table->lock, flags); |
3102 | |
3103 | iommu_flush_irt_and_complete(iommu, devid); |
3104 | |
3105 | return 0; |
3106 | } |
3107 | |
3108 | static void free_irte(struct amd_iommu *iommu, u16 devid, int index) |
3109 | { |
3110 | struct irq_remap_table *table; |
3111 | unsigned long flags; |
3112 | |
3113 | table = get_irq_table(iommu, devid); |
3114 | if (!table) |
3115 | return; |
3116 | |
3117 | raw_spin_lock_irqsave(&table->lock, flags); |
3118 | iommu->irte_ops->clear_allocated(table, index); |
3119 | raw_spin_unlock_irqrestore(&table->lock, flags); |
3120 | |
3121 | iommu_flush_irt_and_complete(iommu, devid); |
3122 | } |
3123 | |
3124 | static void irte_prepare(void *entry, |
3125 | u32 delivery_mode, bool dest_mode, |
3126 | u8 vector, u32 dest_apicid, int devid) |
3127 | { |
3128 | union irte *irte = (union irte *) entry; |
3129 | |
3130 | irte->val = 0; |
3131 | irte->fields.vector = vector; |
3132 | irte->fields.int_type = delivery_mode; |
3133 | irte->fields.destination = dest_apicid; |
3134 | irte->fields.dm = dest_mode; |
3135 | irte->fields.valid = 1; |
3136 | } |
3137 | |
3138 | static void irte_ga_prepare(void *entry, |
3139 | u32 delivery_mode, bool dest_mode, |
3140 | u8 vector, u32 dest_apicid, int devid) |
3141 | { |
3142 | struct irte_ga *irte = (struct irte_ga *) entry; |
3143 | |
3144 | irte->lo.val = 0; |
3145 | irte->hi.val = 0; |
3146 | irte->lo.fields_remap.int_type = delivery_mode; |
3147 | irte->lo.fields_remap.dm = dest_mode; |
3148 | irte->hi.fields.vector = vector; |
3149 | irte->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(dest_apicid); |
3150 | irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid); |
3151 | irte->lo.fields_remap.valid = 1; |
3152 | } |
3153 | |
3154 | static void irte_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) |
3155 | { |
3156 | union irte *irte = (union irte *) entry; |
3157 | |
3158 | irte->fields.valid = 1; |
3159 | modify_irte(iommu, devid, index, irte); |
3160 | } |
3161 | |
3162 | static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) |
3163 | { |
3164 | struct irte_ga *irte = (struct irte_ga *) entry; |
3165 | |
3166 | irte->lo.fields_remap.valid = 1; |
3167 | modify_irte_ga(iommu, devid, index, irte); |
3168 | } |
3169 | |
3170 | static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) |
3171 | { |
3172 | union irte *irte = (union irte *) entry; |
3173 | |
3174 | irte->fields.valid = 0; |
3175 | modify_irte(iommu, devid, index, irte); |
3176 | } |
3177 | |
3178 | static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) |
3179 | { |
3180 | struct irte_ga *irte = (struct irte_ga *) entry; |
3181 | |
3182 | irte->lo.fields_remap.valid = 0; |
3183 | modify_irte_ga(iommu, devid, index, irte); |
3184 | } |
3185 | |
3186 | static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, |
3187 | u8 vector, u32 dest_apicid) |
3188 | { |
3189 | union irte *irte = (union irte *) entry; |
3190 | |
3191 | irte->fields.vector = vector; |
3192 | irte->fields.destination = dest_apicid; |
3193 | modify_irte(iommu, devid, index, irte); |
3194 | } |
3195 | |
3196 | static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, |
3197 | u8 vector, u32 dest_apicid) |
3198 | { |
3199 | struct irte_ga *irte = (struct irte_ga *) entry; |
3200 | |
3201 | if (!irte->lo.fields_remap.guest_mode) { |
3202 | irte->hi.fields.vector = vector; |
3203 | irte->lo.fields_remap.destination = |
3204 | APICID_TO_IRTE_DEST_LO(dest_apicid); |
3205 | irte->hi.fields.destination = |
3206 | APICID_TO_IRTE_DEST_HI(dest_apicid); |
3207 | modify_irte_ga(iommu, devid, index, irte); |
3208 | } |
3209 | } |
3210 | |
3211 | #define IRTE_ALLOCATED (~1U) |
3212 | static void irte_set_allocated(struct irq_remap_table *table, int index) |
3213 | { |
3214 | table->table[index] = IRTE_ALLOCATED; |
3215 | } |
3216 | |
3217 | static void irte_ga_set_allocated(struct irq_remap_table *table, int index) |
3218 | { |
3219 | struct irte_ga *ptr = (struct irte_ga *)table->table; |
3220 | struct irte_ga *irte = &ptr[index]; |
3221 | |
3222 | memset(&irte->lo.val, 0, sizeof(u64)); |
3223 | memset(&irte->hi.val, 0, sizeof(u64)); |
3224 | irte->hi.fields.vector = 0xff; |
3225 | } |
3226 | |
3227 | static bool irte_is_allocated(struct irq_remap_table *table, int index) |
3228 | { |
3229 | union irte *ptr = (union irte *)table->table; |
3230 | union irte *irte = &ptr[index]; |
3231 | |
3232 | return irte->val != 0; |
3233 | } |
3234 | |
3235 | static bool irte_ga_is_allocated(struct irq_remap_table *table, int index) |
3236 | { |
3237 | struct irte_ga *ptr = (struct irte_ga *)table->table; |
3238 | struct irte_ga *irte = &ptr[index]; |
3239 | |
3240 | return irte->hi.fields.vector != 0; |
3241 | } |
3242 | |
3243 | static void irte_clear_allocated(struct irq_remap_table *table, int index) |
3244 | { |
3245 | table->table[index] = 0; |
3246 | } |
3247 | |
3248 | static void irte_ga_clear_allocated(struct irq_remap_table *table, int index) |
3249 | { |
3250 | struct irte_ga *ptr = (struct irte_ga *)table->table; |
3251 | struct irte_ga *irte = &ptr[index]; |
3252 | |
3253 | memset(&irte->lo.val, 0, sizeof(u64)); |
3254 | memset(&irte->hi.val, 0, sizeof(u64)); |
3255 | } |
3256 | |
3257 | static int get_devid(struct irq_alloc_info *info) |
3258 | { |
3259 | switch (info->type) { |
3260 | case X86_IRQ_ALLOC_TYPE_IOAPIC: |
3261 | return get_ioapic_devid(id: info->devid); |
3262 | case X86_IRQ_ALLOC_TYPE_HPET: |
3263 | return get_hpet_devid(id: info->devid); |
3264 | case X86_IRQ_ALLOC_TYPE_PCI_MSI: |
3265 | case X86_IRQ_ALLOC_TYPE_PCI_MSIX: |
3266 | return get_device_sbdf_id(msi_desc_to_dev(info->desc)); |
3267 | default: |
3268 | WARN_ON_ONCE(1); |
3269 | return -1; |
3270 | } |
3271 | } |
3272 | |
3273 | struct irq_remap_ops amd_iommu_irq_ops = { |
3274 | .prepare = amd_iommu_prepare, |
3275 | .enable = amd_iommu_enable, |
3276 | .disable = amd_iommu_disable, |
3277 | .reenable = amd_iommu_reenable, |
3278 | .enable_faulting = amd_iommu_enable_faulting, |
3279 | }; |
3280 | |
3281 | static void fill_msi_msg(struct msi_msg *msg, u32 index) |
3282 | { |
3283 | msg->data = index; |
3284 | msg->address_lo = 0; |
3285 | msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; |
3286 | msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; |
3287 | } |
3288 | |
3289 | static void irq_remapping_prepare_irte(struct amd_ir_data *data, |
3290 | struct irq_cfg *irq_cfg, |
3291 | struct irq_alloc_info *info, |
3292 | int devid, int index, int sub_handle) |
3293 | { |
3294 | struct irq_2_irte *irte_info = &data->irq_2_irte; |
3295 | struct amd_iommu *iommu = data->iommu; |
3296 | |
3297 | if (!iommu) |
3298 | return; |
3299 | |
3300 | data->irq_2_irte.devid = devid; |
3301 | data->irq_2_irte.index = index + sub_handle; |
3302 | iommu->irte_ops->prepare(data->entry, APIC_DELIVERY_MODE_FIXED, |
3303 | apic->dest_mode_logical, irq_cfg->vector, |
3304 | irq_cfg->dest_apicid, devid); |
3305 | |
3306 | switch (info->type) { |
3307 | case X86_IRQ_ALLOC_TYPE_IOAPIC: |
3308 | case X86_IRQ_ALLOC_TYPE_HPET: |
3309 | case X86_IRQ_ALLOC_TYPE_PCI_MSI: |
3310 | case X86_IRQ_ALLOC_TYPE_PCI_MSIX: |
3311 | fill_msi_msg(msg: &data->msi_entry, index: irte_info->index); |
3312 | break; |
3313 | |
3314 | default: |
3315 | BUG_ON(1); |
3316 | break; |
3317 | } |
3318 | } |
3319 | |
3320 | struct amd_irte_ops irte_32_ops = { |
3321 | .prepare = irte_prepare, |
3322 | .activate = irte_activate, |
3323 | .deactivate = irte_deactivate, |
3324 | .set_affinity = irte_set_affinity, |
3325 | .set_allocated = irte_set_allocated, |
3326 | .is_allocated = irte_is_allocated, |
3327 | .clear_allocated = irte_clear_allocated, |
3328 | }; |
3329 | |
3330 | struct amd_irte_ops irte_128_ops = { |
3331 | .prepare = irte_ga_prepare, |
3332 | .activate = irte_ga_activate, |
3333 | .deactivate = irte_ga_deactivate, |
3334 | .set_affinity = irte_ga_set_affinity, |
3335 | .set_allocated = irte_ga_set_allocated, |
3336 | .is_allocated = irte_ga_is_allocated, |
3337 | .clear_allocated = irte_ga_clear_allocated, |
3338 | }; |
3339 | |
3340 | static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, |
3341 | unsigned int nr_irqs, void *arg) |
3342 | { |
3343 | struct irq_alloc_info *info = arg; |
3344 | struct irq_data *irq_data; |
3345 | struct amd_ir_data *data = NULL; |
3346 | struct amd_iommu *iommu; |
3347 | struct irq_cfg *cfg; |
3348 | int i, ret, devid, seg, sbdf; |
3349 | int index; |
3350 | |
3351 | if (!info) |
3352 | return -EINVAL; |
3353 | if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) |
3354 | return -EINVAL; |
3355 | |
3356 | sbdf = get_devid(info); |
3357 | if (sbdf < 0) |
3358 | return -EINVAL; |
3359 | |
3360 | seg = PCI_SBDF_TO_SEGID(sbdf); |
3361 | devid = PCI_SBDF_TO_DEVID(sbdf); |
3362 | iommu = __rlookup_amd_iommu(seg, devid); |
3363 | if (!iommu) |
3364 | return -EINVAL; |
3365 | |
3366 | ret = irq_domain_alloc_irqs_parent(domain, irq_base: virq, nr_irqs, arg); |
3367 | if (ret < 0) |
3368 | return ret; |
3369 | |
3370 | if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { |
3371 | struct irq_remap_table *table; |
3372 | |
3373 | table = alloc_irq_table(iommu, devid, NULL); |
3374 | if (table) { |
3375 | if (!table->min_index) { |
3376 | /* |
3377 | * Keep the first 32 indexes free for IOAPIC |
3378 | * interrupts. |
3379 | */ |
3380 | table->min_index = 32; |
3381 | for (i = 0; i < 32; ++i) |
3382 | iommu->irte_ops->set_allocated(table, i); |
3383 | } |
3384 | WARN_ON(table->min_index != 32); |
3385 | index = info->ioapic.pin; |
3386 | } else { |
3387 | index = -ENOMEM; |
3388 | } |
3389 | } else if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI || |
3390 | info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) { |
3391 | bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); |
3392 | |
3393 | index = alloc_irq_index(iommu, devid, count: nr_irqs, align, |
3394 | pdev: msi_desc_to_pci_dev(desc: info->desc)); |
3395 | } else { |
3396 | index = alloc_irq_index(iommu, devid, count: nr_irqs, align: false, NULL); |
3397 | } |
3398 | |
3399 | if (index < 0) { |
3400 | pr_warn("Failed to allocate IRTE\n" ); |
3401 | ret = index; |
3402 | goto out_free_parent; |
3403 | } |
3404 | |
3405 | for (i = 0; i < nr_irqs; i++) { |
3406 | irq_data = irq_domain_get_irq_data(domain, virq: virq + i); |
3407 | cfg = irq_data ? irqd_cfg(irq_data) : NULL; |
3408 | if (!cfg) { |
3409 | ret = -EINVAL; |
3410 | goto out_free_data; |
3411 | } |
3412 | |
3413 | ret = -ENOMEM; |
3414 | data = kzalloc(size: sizeof(*data), GFP_KERNEL); |
3415 | if (!data) |
3416 | goto out_free_data; |
3417 | |
3418 | if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) |
3419 | data->entry = kzalloc(size: sizeof(union irte), GFP_KERNEL); |
3420 | else |
3421 | data->entry = kzalloc(size: sizeof(struct irte_ga), |
3422 | GFP_KERNEL); |
3423 | if (!data->entry) { |
3424 | kfree(objp: data); |
3425 | goto out_free_data; |
3426 | } |
3427 | |
3428 | data->iommu = iommu; |
3429 | irq_data->hwirq = (devid << 16) + i; |
3430 | irq_data->chip_data = data; |
3431 | irq_data->chip = &amd_ir_chip; |
3432 | irq_remapping_prepare_irte(data, irq_cfg: cfg, info, devid, index, sub_handle: i); |
3433 | irq_set_status_flags(irq: virq + i, set: IRQ_MOVE_PCNTXT); |
3434 | } |
3435 | |
3436 | return 0; |
3437 | |
3438 | out_free_data: |
3439 | for (i--; i >= 0; i--) { |
3440 | irq_data = irq_domain_get_irq_data(domain, virq: virq + i); |
3441 | if (irq_data) |
3442 | kfree(objp: irq_data->chip_data); |
3443 | } |
3444 | for (i = 0; i < nr_irqs; i++) |
3445 | free_irte(iommu, devid, index: index + i); |
3446 | out_free_parent: |
3447 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
3448 | return ret; |
3449 | } |
3450 | |
3451 | static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, |
3452 | unsigned int nr_irqs) |
3453 | { |
3454 | struct irq_2_irte *irte_info; |
3455 | struct irq_data *irq_data; |
3456 | struct amd_ir_data *data; |
3457 | int i; |
3458 | |
3459 | for (i = 0; i < nr_irqs; i++) { |
3460 | irq_data = irq_domain_get_irq_data(domain, virq: virq + i); |
3461 | if (irq_data && irq_data->chip_data) { |
3462 | data = irq_data->chip_data; |
3463 | irte_info = &data->irq_2_irte; |
3464 | free_irte(iommu: data->iommu, devid: irte_info->devid, index: irte_info->index); |
3465 | kfree(objp: data->entry); |
3466 | kfree(objp: data); |
3467 | } |
3468 | } |
3469 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
3470 | } |
3471 | |
3472 | static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, |
3473 | struct amd_ir_data *ir_data, |
3474 | struct irq_2_irte *irte_info, |
3475 | struct irq_cfg *cfg); |
3476 | |
3477 | static int irq_remapping_activate(struct irq_domain *domain, |
3478 | struct irq_data *irq_data, bool reserve) |
3479 | { |
3480 | struct amd_ir_data *data = irq_data->chip_data; |
3481 | struct irq_2_irte *irte_info = &data->irq_2_irte; |
3482 | struct amd_iommu *iommu = data->iommu; |
3483 | struct irq_cfg *cfg = irqd_cfg(irq_data); |
3484 | |
3485 | if (!iommu) |
3486 | return 0; |
3487 | |
3488 | iommu->irte_ops->activate(iommu, data->entry, irte_info->devid, |
3489 | irte_info->index); |
3490 | amd_ir_update_irte(irqd: irq_data, iommu, ir_data: data, irte_info, cfg); |
3491 | return 0; |
3492 | } |
3493 | |
3494 | static void irq_remapping_deactivate(struct irq_domain *domain, |
3495 | struct irq_data *irq_data) |
3496 | { |
3497 | struct amd_ir_data *data = irq_data->chip_data; |
3498 | struct irq_2_irte *irte_info = &data->irq_2_irte; |
3499 | struct amd_iommu *iommu = data->iommu; |
3500 | |
3501 | if (iommu) |
3502 | iommu->irte_ops->deactivate(iommu, data->entry, irte_info->devid, |
3503 | irte_info->index); |
3504 | } |
3505 | |
3506 | static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, |
3507 | enum irq_domain_bus_token bus_token) |
3508 | { |
3509 | struct amd_iommu *iommu; |
3510 | int devid = -1; |
3511 | |
3512 | if (!amd_iommu_irq_remap) |
3513 | return 0; |
3514 | |
3515 | if (x86_fwspec_is_ioapic(fwspec)) |
3516 | devid = get_ioapic_devid(id: fwspec->param[0]); |
3517 | else if (x86_fwspec_is_hpet(fwspec)) |
3518 | devid = get_hpet_devid(id: fwspec->param[0]); |
3519 | |
3520 | if (devid < 0) |
3521 | return 0; |
3522 | iommu = __rlookup_amd_iommu(seg: (devid >> 16), devid: (devid & 0xffff)); |
3523 | |
3524 | return iommu && iommu->ir_domain == d; |
3525 | } |
3526 | |
3527 | static const struct irq_domain_ops amd_ir_domain_ops = { |
3528 | .select = irq_remapping_select, |
3529 | .alloc = irq_remapping_alloc, |
3530 | .free = irq_remapping_free, |
3531 | .activate = irq_remapping_activate, |
3532 | .deactivate = irq_remapping_deactivate, |
3533 | }; |
3534 | |
3535 | int amd_iommu_activate_guest_mode(void *data) |
3536 | { |
3537 | struct amd_ir_data *ir_data = (struct amd_ir_data *)data; |
3538 | struct irte_ga *entry = (struct irte_ga *) ir_data->entry; |
3539 | u64 valid; |
3540 | |
3541 | if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry) |
3542 | return 0; |
3543 | |
3544 | valid = entry->lo.fields_vapic.valid; |
3545 | |
3546 | entry->lo.val = 0; |
3547 | entry->hi.val = 0; |
3548 | |
3549 | entry->lo.fields_vapic.valid = valid; |
3550 | entry->lo.fields_vapic.guest_mode = 1; |
3551 | entry->lo.fields_vapic.ga_log_intr = 1; |
3552 | entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr; |
3553 | entry->hi.fields.vector = ir_data->ga_vector; |
3554 | entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; |
3555 | |
3556 | return modify_irte_ga(iommu: ir_data->iommu, devid: ir_data->irq_2_irte.devid, |
3557 | index: ir_data->irq_2_irte.index, irte: entry); |
3558 | } |
3559 | EXPORT_SYMBOL(amd_iommu_activate_guest_mode); |
3560 | |
3561 | int amd_iommu_deactivate_guest_mode(void *data) |
3562 | { |
3563 | struct amd_ir_data *ir_data = (struct amd_ir_data *)data; |
3564 | struct irte_ga *entry = (struct irte_ga *) ir_data->entry; |
3565 | struct irq_cfg *cfg = ir_data->cfg; |
3566 | u64 valid; |
3567 | |
3568 | if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || |
3569 | !entry || !entry->lo.fields_vapic.guest_mode) |
3570 | return 0; |
3571 | |
3572 | valid = entry->lo.fields_remap.valid; |
3573 | |
3574 | entry->lo.val = 0; |
3575 | entry->hi.val = 0; |
3576 | |
3577 | entry->lo.fields_remap.valid = valid; |
3578 | entry->lo.fields_remap.dm = apic->dest_mode_logical; |
3579 | entry->lo.fields_remap.int_type = APIC_DELIVERY_MODE_FIXED; |
3580 | entry->hi.fields.vector = cfg->vector; |
3581 | entry->lo.fields_remap.destination = |
3582 | APICID_TO_IRTE_DEST_LO(cfg->dest_apicid); |
3583 | entry->hi.fields.destination = |
3584 | APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); |
3585 | |
3586 | return modify_irte_ga(iommu: ir_data->iommu, devid: ir_data->irq_2_irte.devid, |
3587 | index: ir_data->irq_2_irte.index, irte: entry); |
3588 | } |
3589 | EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); |
3590 | |
3591 | static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) |
3592 | { |
3593 | int ret; |
3594 | struct amd_iommu_pi_data *pi_data = vcpu_info; |
3595 | struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; |
3596 | struct amd_ir_data *ir_data = data->chip_data; |
3597 | struct irq_2_irte *irte_info = &ir_data->irq_2_irte; |
3598 | struct iommu_dev_data *dev_data; |
3599 | |
3600 | if (ir_data->iommu == NULL) |
3601 | return -EINVAL; |
3602 | |
3603 | dev_data = search_dev_data(iommu: ir_data->iommu, devid: irte_info->devid); |
3604 | |
3605 | /* Note: |
3606 | * This device has never been set up for guest mode. |
3607 | * we should not modify the IRTE |
3608 | */ |
3609 | if (!dev_data || !dev_data->use_vapic) |
3610 | return 0; |
3611 | |
3612 | ir_data->cfg = irqd_cfg(irq_data: data); |
3613 | pi_data->ir_data = ir_data; |
3614 | |
3615 | /* Note: |
3616 | * SVM tries to set up for VAPIC mode, but we are in |
3617 | * legacy mode. So, we force legacy mode instead. |
3618 | */ |
3619 | if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { |
3620 | pr_debug("%s: Fall back to using intr legacy remap\n" , |
3621 | __func__); |
3622 | pi_data->is_guest_mode = false; |
3623 | } |
3624 | |
3625 | pi_data->prev_ga_tag = ir_data->cached_ga_tag; |
3626 | if (pi_data->is_guest_mode) { |
3627 | ir_data->ga_root_ptr = (pi_data->base >> 12); |
3628 | ir_data->ga_vector = vcpu_pi_info->vector; |
3629 | ir_data->ga_tag = pi_data->ga_tag; |
3630 | ret = amd_iommu_activate_guest_mode(ir_data); |
3631 | if (!ret) |
3632 | ir_data->cached_ga_tag = pi_data->ga_tag; |
3633 | } else { |
3634 | ret = amd_iommu_deactivate_guest_mode(ir_data); |
3635 | |
3636 | /* |
3637 | * This communicates the ga_tag back to the caller |
3638 | * so that it can do all the necessary clean up. |
3639 | */ |
3640 | if (!ret) |
3641 | ir_data->cached_ga_tag = 0; |
3642 | } |
3643 | |
3644 | return ret; |
3645 | } |
3646 | |
3647 | |
3648 | static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, |
3649 | struct amd_ir_data *ir_data, |
3650 | struct irq_2_irte *irte_info, |
3651 | struct irq_cfg *cfg) |
3652 | { |
3653 | |
3654 | /* |
3655 | * Atomically updates the IRTE with the new destination, vector |
3656 | * and flushes the interrupt entry cache. |
3657 | */ |
3658 | iommu->irte_ops->set_affinity(iommu, ir_data->entry, irte_info->devid, |
3659 | irte_info->index, cfg->vector, |
3660 | cfg->dest_apicid); |
3661 | } |
3662 | |
3663 | static int amd_ir_set_affinity(struct irq_data *data, |
3664 | const struct cpumask *mask, bool force) |
3665 | { |
3666 | struct amd_ir_data *ir_data = data->chip_data; |
3667 | struct irq_2_irte *irte_info = &ir_data->irq_2_irte; |
3668 | struct irq_cfg *cfg = irqd_cfg(irq_data: data); |
3669 | struct irq_data *parent = data->parent_data; |
3670 | struct amd_iommu *iommu = ir_data->iommu; |
3671 | int ret; |
3672 | |
3673 | if (!iommu) |
3674 | return -ENODEV; |
3675 | |
3676 | ret = parent->chip->irq_set_affinity(parent, mask, force); |
3677 | if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) |
3678 | return ret; |
3679 | |
3680 | amd_ir_update_irte(irqd: data, iommu, ir_data, irte_info, cfg); |
3681 | /* |
3682 | * After this point, all the interrupts will start arriving |
3683 | * at the new destination. So, time to cleanup the previous |
3684 | * vector allocation. |
3685 | */ |
3686 | vector_schedule_cleanup(cfg); |
3687 | |
3688 | return IRQ_SET_MASK_OK_DONE; |
3689 | } |
3690 | |
3691 | static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) |
3692 | { |
3693 | struct amd_ir_data *ir_data = irq_data->chip_data; |
3694 | |
3695 | *msg = ir_data->msi_entry; |
3696 | } |
3697 | |
3698 | static struct irq_chip amd_ir_chip = { |
3699 | .name = "AMD-IR" , |
3700 | .irq_ack = apic_ack_irq, |
3701 | .irq_set_affinity = amd_ir_set_affinity, |
3702 | .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, |
3703 | .irq_compose_msi_msg = ir_compose_msi_msg, |
3704 | }; |
3705 | |
3706 | static const struct msi_parent_ops amdvi_msi_parent_ops = { |
3707 | .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | |
3708 | MSI_FLAG_MULTI_PCI_MSI | |
3709 | MSI_FLAG_PCI_IMS, |
3710 | .prefix = "IR-" , |
3711 | .init_dev_msi_info = msi_parent_init_dev_msi_info, |
3712 | }; |
3713 | |
3714 | static const struct msi_parent_ops virt_amdvi_msi_parent_ops = { |
3715 | .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | |
3716 | MSI_FLAG_MULTI_PCI_MSI, |
3717 | .prefix = "vIR-" , |
3718 | .init_dev_msi_info = msi_parent_init_dev_msi_info, |
3719 | }; |
3720 | |
3721 | int amd_iommu_create_irq_domain(struct amd_iommu *iommu) |
3722 | { |
3723 | struct fwnode_handle *fn; |
3724 | |
3725 | fn = irq_domain_alloc_named_id_fwnode(name: "AMD-IR" , id: iommu->index); |
3726 | if (!fn) |
3727 | return -ENOMEM; |
3728 | iommu->ir_domain = irq_domain_create_hierarchy(parent: arch_get_ir_parent_domain(), flags: 0, size: 0, |
3729 | fwnode: fn, ops: &amd_ir_domain_ops, host_data: iommu); |
3730 | if (!iommu->ir_domain) { |
3731 | irq_domain_free_fwnode(fwnode: fn); |
3732 | return -ENOMEM; |
3733 | } |
3734 | |
3735 | irq_domain_update_bus_token(domain: iommu->ir_domain, bus_token: DOMAIN_BUS_AMDVI); |
3736 | iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | |
3737 | IRQ_DOMAIN_FLAG_ISOLATED_MSI; |
3738 | |
3739 | if (amd_iommu_np_cache) |
3740 | iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops; |
3741 | else |
3742 | iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; |
3743 | |
3744 | return 0; |
3745 | } |
3746 | |
3747 | int amd_iommu_update_ga(int cpu, bool is_run, void *data) |
3748 | { |
3749 | struct amd_ir_data *ir_data = (struct amd_ir_data *)data; |
3750 | struct irte_ga *entry = (struct irte_ga *) ir_data->entry; |
3751 | |
3752 | if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || |
3753 | !entry || !entry->lo.fields_vapic.guest_mode) |
3754 | return 0; |
3755 | |
3756 | if (!ir_data->iommu) |
3757 | return -ENODEV; |
3758 | |
3759 | if (cpu >= 0) { |
3760 | entry->lo.fields_vapic.destination = |
3761 | APICID_TO_IRTE_DEST_LO(cpu); |
3762 | entry->hi.fields.destination = |
3763 | APICID_TO_IRTE_DEST_HI(cpu); |
3764 | } |
3765 | entry->lo.fields_vapic.is_run = is_run; |
3766 | |
3767 | return __modify_irte_ga(iommu: ir_data->iommu, devid: ir_data->irq_2_irte.devid, |
3768 | index: ir_data->irq_2_irte.index, irte: entry); |
3769 | } |
3770 | EXPORT_SYMBOL(amd_iommu_update_ga); |
3771 | #endif |
3772 | |