1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * NFIT - Machine Check Handler |
4 | * |
5 | * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. |
6 | */ |
7 | #include <linux/notifier.h> |
8 | #include <linux/acpi.h> |
9 | #include <linux/nd.h> |
10 | #include <asm/mce.h> |
11 | #include "nfit.h" |
12 | |
13 | static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, |
14 | void *data) |
15 | { |
16 | struct mce *mce = (struct mce *)data; |
17 | struct acpi_nfit_desc *acpi_desc; |
18 | struct nfit_spa *nfit_spa; |
19 | |
20 | /* We only care about uncorrectable memory errors */ |
21 | if (!mce_is_memory_error(m: mce) || mce_is_correctable(m: mce)) |
22 | return NOTIFY_DONE; |
23 | |
24 | /* Verify the address reported in the MCE is valid. */ |
25 | if (!mce_usable_address(m: mce)) |
26 | return NOTIFY_DONE; |
27 | |
28 | /* |
29 | * mce->addr contains the physical addr accessed that caused the |
30 | * machine check. We need to walk through the list of NFITs, and see |
31 | * if any of them matches that address, and only then start a scrub. |
32 | */ |
33 | mutex_lock(&acpi_desc_lock); |
34 | list_for_each_entry(acpi_desc, &acpi_descs, list) { |
35 | unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc); |
36 | struct device *dev = acpi_desc->dev; |
37 | int found_match = 0; |
38 | |
39 | mutex_lock(&acpi_desc->init_mutex); |
40 | list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { |
41 | struct acpi_nfit_system_address *spa = nfit_spa->spa; |
42 | |
43 | if (nfit_spa_type(spa) != NFIT_SPA_PM) |
44 | continue; |
45 | /* find the spa that covers the mce addr */ |
46 | if (spa->address > mce->addr) |
47 | continue; |
48 | if ((spa->address + spa->length - 1) < mce->addr) |
49 | continue; |
50 | found_match = 1; |
51 | dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n" , |
52 | spa->range_index, spa->address, spa->length); |
53 | /* |
54 | * We can break at the first match because we're going |
55 | * to rescan all the SPA ranges. There shouldn't be any |
56 | * aliasing anyway. |
57 | */ |
58 | break; |
59 | } |
60 | mutex_unlock(lock: &acpi_desc->init_mutex); |
61 | |
62 | if (!found_match) |
63 | continue; |
64 | |
65 | /* If this fails due to an -ENOMEM, there is little we can do */ |
66 | nvdimm_bus_add_badrange(nvdimm_bus: acpi_desc->nvdimm_bus, |
67 | ALIGN_DOWN(mce->addr, align), length: align); |
68 | nvdimm_region_notify(nd_region: nfit_spa->nd_region, |
69 | event: NVDIMM_REVALIDATE_POISON); |
70 | |
71 | if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { |
72 | /* |
73 | * We can ignore an -EBUSY here because if an ARS is |
74 | * already in progress, just let that be the last |
75 | * authoritative one |
76 | */ |
77 | acpi_nfit_ars_rescan(acpi_desc, req_type: 0); |
78 | } |
79 | mce->kflags |= MCE_HANDLED_NFIT; |
80 | break; |
81 | } |
82 | |
83 | mutex_unlock(lock: &acpi_desc_lock); |
84 | return NOTIFY_DONE; |
85 | } |
86 | |
87 | static struct notifier_block nfit_mce_dec = { |
88 | .notifier_call = nfit_handle_mce, |
89 | .priority = MCE_PRIO_NFIT, |
90 | }; |
91 | |
92 | void nfit_mce_register(void) |
93 | { |
94 | mce_register_decode_chain(nb: &nfit_mce_dec); |
95 | } |
96 | |
97 | void nfit_mce_unregister(void) |
98 | { |
99 | mce_unregister_decode_chain(nb: &nfit_mce_dec); |
100 | } |
101 | |