1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #define pr_fmt(fmt) "PCI: " fmt |
4 | |
5 | #include <linux/pci.h> |
6 | #include <linux/acpi.h> |
7 | #include <linux/init.h> |
8 | #include <linux/irq.h> |
9 | #include <linux/dmi.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/pci-acpi.h> |
12 | #include <asm/numa.h> |
13 | #include <asm/pci_x86.h> |
14 | |
15 | struct pci_root_info { |
16 | struct acpi_pci_root_info common; |
17 | struct pci_sysdata sd; |
18 | #ifdef CONFIG_PCI_MMCONFIG |
19 | bool mcfg_added; |
20 | u8 start_bus; |
21 | u8 end_bus; |
22 | #endif |
23 | }; |
24 | |
25 | bool pci_use_e820 = true; |
26 | static bool pci_use_crs = true; |
27 | static bool pci_ignore_seg; |
28 | |
29 | static int __init set_use_crs(const struct dmi_system_id *id) |
30 | { |
31 | pci_use_crs = true; |
32 | return 0; |
33 | } |
34 | |
35 | static int __init set_nouse_crs(const struct dmi_system_id *id) |
36 | { |
37 | pci_use_crs = false; |
38 | return 0; |
39 | } |
40 | |
41 | static int __init set_ignore_seg(const struct dmi_system_id *id) |
42 | { |
43 | pr_info("%s detected: ignoring ACPI _SEG\n" , id->ident); |
44 | pci_ignore_seg = true; |
45 | return 0; |
46 | } |
47 | |
48 | static int __init set_no_e820(const struct dmi_system_id *id) |
49 | { |
50 | pr_info("%s detected: not clipping E820 regions from _CRS\n" , |
51 | id->ident); |
52 | pci_use_e820 = false; |
53 | return 0; |
54 | } |
55 | |
56 | static const struct dmi_system_id pci_crs_quirks[] __initconst = { |
57 | /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ |
58 | { |
59 | .callback = set_use_crs, |
60 | .ident = "IBM System x3800" , |
61 | .matches = { |
62 | DMI_MATCH(DMI_SYS_VENDOR, "IBM" ), |
63 | DMI_MATCH(DMI_PRODUCT_NAME, "x3800" ), |
64 | }, |
65 | }, |
66 | /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */ |
67 | /* 2006 AMD HT/VIA system with two host bridges */ |
68 | { |
69 | .callback = set_use_crs, |
70 | .ident = "ASRock ALiveSATA2-GLAN" , |
71 | .matches = { |
72 | DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN" ), |
73 | }, |
74 | }, |
75 | /* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */ |
76 | /* 2006 AMD HT/VIA system with two host bridges */ |
77 | { |
78 | .callback = set_use_crs, |
79 | .ident = "ASUS M2V-MX SE" , |
80 | .matches = { |
81 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC." ), |
82 | DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE" ), |
83 | DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc." ), |
84 | }, |
85 | }, |
86 | /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */ |
87 | { |
88 | .callback = set_use_crs, |
89 | .ident = "MSI MS-7253" , |
90 | .matches = { |
91 | DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD" ), |
92 | DMI_MATCH(DMI_BOARD_NAME, "MS-7253" ), |
93 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD" ), |
94 | }, |
95 | }, |
96 | /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */ |
97 | /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */ |
98 | { |
99 | .callback = set_use_crs, |
100 | .ident = "Foxconn K8M890-8237A" , |
101 | .matches = { |
102 | DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn" ), |
103 | DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A" ), |
104 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD" ), |
105 | }, |
106 | }, |
107 | |
108 | /* Now for the blacklist.. */ |
109 | |
110 | /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ |
111 | { |
112 | .callback = set_nouse_crs, |
113 | .ident = "Dell Studio 1557" , |
114 | .matches = { |
115 | DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc." ), |
116 | DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557" ), |
117 | DMI_MATCH(DMI_BIOS_VERSION, "A09" ), |
118 | }, |
119 | }, |
120 | /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ |
121 | { |
122 | .callback = set_nouse_crs, |
123 | .ident = "Thinkpad SL510" , |
124 | .matches = { |
125 | DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO" ), |
126 | DMI_MATCH(DMI_BOARD_NAME, "2847DFG" ), |
127 | DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )" ), |
128 | }, |
129 | }, |
130 | /* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */ |
131 | { |
132 | .callback = set_nouse_crs, |
133 | .ident = "Supermicro X8DTH" , |
134 | .matches = { |
135 | DMI_MATCH(DMI_SYS_VENDOR, "Supermicro" ), |
136 | DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F" ), |
137 | DMI_MATCH(DMI_BIOS_VERSION, "2.0a" ), |
138 | }, |
139 | }, |
140 | |
141 | /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ |
142 | { |
143 | .callback = set_ignore_seg, |
144 | .ident = "HP xw9300" , |
145 | .matches = { |
146 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard" ), |
147 | DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation" ), |
148 | }, |
149 | }, |
150 | |
151 | /* |
152 | * Many Lenovo models with "IIL" in their DMI_PRODUCT_VERSION have |
153 | * an E820 reserved region that covers the entire 32-bit host |
154 | * bridge memory window from _CRS. Using the E820 region to clip |
155 | * _CRS means no space is available for hot-added or uninitialized |
156 | * PCI devices. This typically breaks I2C controllers for touchpads |
157 | * and hot-added Thunderbolt devices. See the commit log for |
158 | * models known to require this quirk and related bug reports. |
159 | */ |
160 | { |
161 | .callback = set_no_e820, |
162 | .ident = "Lenovo *IIL* product version" , |
163 | .matches = { |
164 | DMI_MATCH(DMI_SYS_VENDOR, "LENOVO" ), |
165 | DMI_MATCH(DMI_PRODUCT_VERSION, "IIL" ), |
166 | }, |
167 | }, |
168 | |
169 | /* |
170 | * The Acer Spin 5 (SP513-54N) has the same E820 reservation covering |
171 | * the entire _CRS 32-bit window issue as the Lenovo *IIL* models. |
172 | * See https://bugs.launchpad.net/bugs/1884232 |
173 | */ |
174 | { |
175 | .callback = set_no_e820, |
176 | .ident = "Acer Spin 5 (SP513-54N)" , |
177 | .matches = { |
178 | DMI_MATCH(DMI_SYS_VENDOR, "Acer" ), |
179 | DMI_MATCH(DMI_PRODUCT_NAME, "Spin SP513-54N" ), |
180 | }, |
181 | }, |
182 | |
183 | /* |
184 | * Clevo X170KM-G barebones have the same E820 reservation covering |
185 | * the entire _CRS 32-bit window issue as the Lenovo *IIL* models. |
186 | * See https://bugzilla.kernel.org/show_bug.cgi?id=214259 |
187 | */ |
188 | { |
189 | .callback = set_no_e820, |
190 | .ident = "Clevo X170KM-G Barebone" , |
191 | .matches = { |
192 | DMI_MATCH(DMI_BOARD_NAME, "X170KM-G" ), |
193 | }, |
194 | }, |
195 | {} |
196 | }; |
197 | |
198 | void __init pci_acpi_crs_quirks(void) |
199 | { |
200 | int year = dmi_get_bios_year(); |
201 | |
202 | if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff) |
203 | pci_use_crs = false; |
204 | |
205 | /* |
206 | * Some firmware includes unusable space (host bridge registers, |
207 | * hidden PCI device BARs, etc) in PCI host bridge _CRS. This is a |
208 | * firmware defect, and 4dc2287c1805 ("x86: avoid E820 regions when |
209 | * allocating address space") has clipped out the unusable space in |
210 | * the past. |
211 | * |
212 | * But other firmware supplies E820 reserved regions that cover |
213 | * entire _CRS windows, so clipping throws away the entire window, |
214 | * leaving none for hot-added or uninitialized devices. These E820 |
215 | * entries are probably *not* a firmware defect, so disable the |
216 | * clipping by default for post-2022 machines. |
217 | * |
218 | * We already have quirks to disable clipping for pre-2023 |
219 | * machines, and we'll likely need quirks to *enable* clipping for |
220 | * post-2022 machines that incorrectly include unusable space in |
221 | * _CRS. |
222 | */ |
223 | if (year >= 2023) |
224 | pci_use_e820 = false; |
225 | |
226 | dmi_check_system(list: pci_crs_quirks); |
227 | |
228 | /* |
229 | * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that |
230 | * takes precedence over anything we figured out above. |
231 | */ |
232 | if (pci_probe & PCI_ROOT_NO_CRS) |
233 | pci_use_crs = false; |
234 | else if (pci_probe & PCI_USE__CRS) |
235 | pci_use_crs = true; |
236 | |
237 | pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n" , |
238 | pci_use_crs ? "Using" : "Ignoring" , |
239 | pci_use_crs ? "nocrs" : "use_crs" ); |
240 | |
241 | /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */ |
242 | if (pci_probe & PCI_NO_E820) |
243 | pci_use_e820 = false; |
244 | else if (pci_probe & PCI_USE_E820) |
245 | pci_use_e820 = true; |
246 | |
247 | pr_info("%s E820 reservations for host bridge windows\n" , |
248 | pci_use_e820 ? "Using" : "Ignoring" ); |
249 | if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) |
250 | pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n" ); |
251 | } |
252 | |
253 | #ifdef CONFIG_PCI_MMCONFIG |
254 | static int check_segment(u16 seg, struct device *dev, char *estr) |
255 | { |
256 | if (seg) { |
257 | dev_err(dev, "%s can't access configuration space under this host bridge\n" , |
258 | estr); |
259 | return -EIO; |
260 | } |
261 | |
262 | /* |
263 | * Failure in adding MMCFG information is not fatal, |
264 | * just can't access extended configuration space of |
265 | * devices under this host bridge. |
266 | */ |
267 | dev_warn(dev, "%s can't access extended configuration space under this bridge\n" , |
268 | estr); |
269 | |
270 | return 0; |
271 | } |
272 | |
273 | static int setup_mcfg_map(struct acpi_pci_root_info *ci) |
274 | { |
275 | int result, seg; |
276 | struct pci_root_info *info; |
277 | struct acpi_pci_root *root = ci->root; |
278 | struct device *dev = &ci->bridge->dev; |
279 | |
280 | info = container_of(ci, struct pci_root_info, common); |
281 | info->start_bus = (u8)root->secondary.start; |
282 | info->end_bus = (u8)root->secondary.end; |
283 | info->mcfg_added = false; |
284 | seg = info->sd.domain; |
285 | |
286 | dev_dbg(dev, "%s(%04x %pR ECAM %pa)\n" , __func__, seg, |
287 | &root->secondary, &root->mcfg_addr); |
288 | |
289 | /* return success if MMCFG is not in use */ |
290 | if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) |
291 | return 0; |
292 | |
293 | if (!(pci_probe & PCI_PROBE_MMCONF)) |
294 | return check_segment(seg, dev, estr: "MMCONFIG is disabled," ); |
295 | |
296 | result = pci_mmconfig_insert(dev, seg, start: info->start_bus, end: info->end_bus, |
297 | addr: root->mcfg_addr); |
298 | if (result == 0) { |
299 | /* enable MMCFG if it hasn't been enabled yet */ |
300 | if (raw_pci_ext_ops == NULL) |
301 | raw_pci_ext_ops = &pci_mmcfg; |
302 | info->mcfg_added = true; |
303 | } else if (result != -EEXIST) |
304 | return check_segment(seg, dev, |
305 | estr: "fail to add MMCONFIG information," ); |
306 | |
307 | return 0; |
308 | } |
309 | |
310 | static void teardown_mcfg_map(struct acpi_pci_root_info *ci) |
311 | { |
312 | struct pci_root_info *info; |
313 | |
314 | info = container_of(ci, struct pci_root_info, common); |
315 | if (info->mcfg_added) { |
316 | pci_mmconfig_delete(seg: info->sd.domain, |
317 | start: info->start_bus, end: info->end_bus); |
318 | info->mcfg_added = false; |
319 | } |
320 | } |
321 | #else |
322 | static int setup_mcfg_map(struct acpi_pci_root_info *ci) |
323 | { |
324 | return 0; |
325 | } |
326 | |
327 | static void teardown_mcfg_map(struct acpi_pci_root_info *ci) |
328 | { |
329 | } |
330 | #endif |
331 | |
332 | static int pci_acpi_root_get_node(struct acpi_pci_root *root) |
333 | { |
334 | int busnum = root->secondary.start; |
335 | struct acpi_device *device = root->device; |
336 | int node = acpi_get_node(handle: device->handle); |
337 | |
338 | if (node == NUMA_NO_NODE) { |
339 | node = x86_pci_root_bus_node(bus: busnum); |
340 | if (node != 0 && node != NUMA_NO_NODE) |
341 | dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n" , |
342 | node); |
343 | } |
344 | if (node != NUMA_NO_NODE && !node_online(node)) |
345 | node = NUMA_NO_NODE; |
346 | |
347 | return node; |
348 | } |
349 | |
350 | static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci) |
351 | { |
352 | return setup_mcfg_map(ci); |
353 | } |
354 | |
355 | static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) |
356 | { |
357 | teardown_mcfg_map(ci); |
358 | kfree(container_of(ci, struct pci_root_info, common)); |
359 | } |
360 | |
361 | /* |
362 | * An IO port or MMIO resource assigned to a PCI host bridge may be |
363 | * consumed by the host bridge itself or available to its child |
364 | * bus/devices. The ACPI specification defines a bit (Producer/Consumer) |
365 | * to tell whether the resource is consumed by the host bridge itself, |
366 | * but firmware hasn't used that bit consistently, so we can't rely on it. |
367 | * |
368 | * On x86 and IA64 platforms, all IO port and MMIO resources are assumed |
369 | * to be available to child bus/devices except one special case: |
370 | * IO port [0xCF8-0xCFF] is consumed by the host bridge itself |
371 | * to access PCI configuration space. |
372 | * |
373 | * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. |
374 | */ |
375 | static bool resource_is_pcicfg_ioport(struct resource *res) |
376 | { |
377 | return (res->flags & IORESOURCE_IO) && |
378 | res->start == 0xCF8 && res->end == 0xCFF; |
379 | } |
380 | |
381 | static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) |
382 | { |
383 | struct acpi_device *device = ci->bridge; |
384 | int busnum = ci->root->secondary.start; |
385 | struct resource_entry *entry, *tmp; |
386 | int status; |
387 | |
388 | status = acpi_pci_probe_root_resources(info: ci); |
389 | |
390 | if (pci_use_crs) { |
391 | resource_list_for_each_entry_safe(entry, tmp, &ci->resources) |
392 | if (resource_is_pcicfg_ioport(res: entry->res)) |
393 | resource_list_destroy_entry(entry); |
394 | return status; |
395 | } |
396 | |
397 | resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { |
398 | dev_printk(KERN_DEBUG, &device->dev, |
399 | "host bridge window %pR (ignored)\n" , entry->res); |
400 | resource_list_destroy_entry(entry); |
401 | } |
402 | x86_pci_root_bus_resources(bus: busnum, resources: &ci->resources); |
403 | |
404 | return 0; |
405 | } |
406 | |
407 | static struct acpi_pci_root_ops acpi_pci_root_ops = { |
408 | .pci_ops = &pci_root_ops, |
409 | .init_info = pci_acpi_root_init_info, |
410 | .release_info = pci_acpi_root_release_info, |
411 | .prepare_resources = pci_acpi_root_prepare_resources, |
412 | }; |
413 | |
414 | struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) |
415 | { |
416 | int domain = root->segment; |
417 | int busnum = root->secondary.start; |
418 | int node = pci_acpi_root_get_node(root); |
419 | struct pci_bus *bus; |
420 | |
421 | if (pci_ignore_seg) |
422 | root->segment = domain = 0; |
423 | |
424 | if (domain && !pci_domains_supported) { |
425 | pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n" , |
426 | domain, busnum); |
427 | return NULL; |
428 | } |
429 | |
430 | bus = pci_find_bus(domain, busnr: busnum); |
431 | if (bus) { |
432 | /* |
433 | * If the desired bus has been scanned already, replace |
434 | * its bus->sysdata. |
435 | */ |
436 | struct pci_sysdata sd = { |
437 | .domain = domain, |
438 | .node = node, |
439 | .companion = root->device |
440 | }; |
441 | |
442 | memcpy(bus->sysdata, &sd, sizeof(sd)); |
443 | } else { |
444 | struct pci_root_info *info; |
445 | |
446 | info = kzalloc(size: sizeof(*info), GFP_KERNEL); |
447 | if (!info) |
448 | dev_err(&root->device->dev, |
449 | "pci_bus %04x:%02x: ignored (out of memory)\n" , |
450 | domain, busnum); |
451 | else { |
452 | info->sd.domain = domain; |
453 | info->sd.node = node; |
454 | info->sd.companion = root->device; |
455 | bus = acpi_pci_root_create(root, ops: &acpi_pci_root_ops, |
456 | info: &info->common, sd: &info->sd); |
457 | } |
458 | } |
459 | |
460 | /* After the PCI-E bus has been walked and all devices discovered, |
461 | * configure any settings of the fabric that might be necessary. |
462 | */ |
463 | if (bus) { |
464 | struct pci_bus *child; |
465 | list_for_each_entry(child, &bus->children, node) |
466 | pcie_bus_configure_settings(bus: child); |
467 | } |
468 | |
469 | return bus; |
470 | } |
471 | |
472 | int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) |
473 | { |
474 | /* |
475 | * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL |
476 | * here, pci_create_root_bus() has been called by someone else and |
477 | * sysdata is likely to be different from what we expect. Let it go in |
478 | * that case. |
479 | */ |
480 | if (!bridge->dev.parent) { |
481 | struct pci_sysdata *sd = bridge->bus->sysdata; |
482 | ACPI_COMPANION_SET(&bridge->dev, sd->companion); |
483 | } |
484 | return 0; |
485 | } |
486 | |
487 | int __init pci_acpi_init(void) |
488 | { |
489 | struct pci_dev *dev = NULL; |
490 | |
491 | if (acpi_noirq) |
492 | return -ENODEV; |
493 | |
494 | pr_info("Using ACPI for IRQ routing\n" ); |
495 | acpi_irq_penalty_init(); |
496 | pcibios_enable_irq = acpi_pci_irq_enable; |
497 | pcibios_disable_irq = acpi_pci_irq_disable; |
498 | x86_init.pci.init_irq = x86_init_noop; |
499 | |
500 | if (pci_routeirq) { |
501 | /* |
502 | * PCI IRQ routing is set up by pci_enable_device(), but we |
503 | * also do it here in case there are still broken drivers that |
504 | * don't use pci_enable_device(). |
505 | */ |
506 | pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n" ); |
507 | for_each_pci_dev(dev) |
508 | acpi_pci_irq_enable(dev); |
509 | } |
510 | |
511 | return 0; |
512 | } |
513 | |