1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Bluefield-specific EDAC driver. |
4 | * |
5 | * Copyright (c) 2019 Mellanox Technologies. |
6 | */ |
7 | |
8 | #include <linux/acpi.h> |
9 | #include <linux/arm-smccc.h> |
10 | #include <linux/bitfield.h> |
11 | #include <linux/edac.h> |
12 | #include <linux/io.h> |
13 | #include <linux/module.h> |
14 | #include <linux/platform_device.h> |
15 | |
16 | #include "edac_module.h" |
17 | |
18 | #define DRIVER_NAME "bluefield-edac" |
19 | |
20 | /* |
21 | * Mellanox BlueField EMI (External Memory Interface) register definitions. |
22 | */ |
23 | |
24 | #define MLXBF_ECC_CNT 0x340 |
25 | #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) |
26 | #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) |
27 | |
28 | #define MLXBF_ECC_ERR 0x348 |
29 | #define MLXBF_ECC_ERR__SECC BIT(0) |
30 | #define MLXBF_ECC_ERR__DECC BIT(16) |
31 | |
32 | #define MLXBF_ECC_LATCH_SEL 0x354 |
33 | #define MLXBF_ECC_LATCH_SEL__START BIT(24) |
34 | |
35 | #define MLXBF_ERR_ADDR_0 0x358 |
36 | |
37 | #define MLXBF_ERR_ADDR_1 0x37c |
38 | |
39 | #define MLXBF_SYNDROM 0x35c |
40 | #define MLXBF_SYNDROM__DERR BIT(0) |
41 | #define MLXBF_SYNDROM__SERR BIT(1) |
42 | #define MLXBF_SYNDROM__SYN GENMASK(25, 16) |
43 | |
44 | #define MLXBF_ADD_INFO 0x364 |
45 | #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) |
46 | |
47 | #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 |
48 | #define MLXBF_EDAC_ERROR_GRAIN 8 |
49 | |
50 | /* |
51 | * Request MLNX_SIP_GET_DIMM_INFO |
52 | * |
53 | * Retrieve information about DIMM on a certain slot. |
54 | * |
55 | * Call register usage: |
56 | * a0: MLNX_SIP_GET_DIMM_INFO |
57 | * a1: (Memory controller index) << 16 | (Dimm index in memory controller) |
58 | * a2-7: not used. |
59 | * |
60 | * Return status: |
61 | * a0: MLXBF_DIMM_INFO defined below describing the DIMM. |
62 | * a1-3: not used. |
63 | */ |
64 | #define MLNX_SIP_GET_DIMM_INFO 0x82000008 |
65 | |
66 | /* Format for the SMC response about the memory information */ |
67 | #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) |
68 | #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) |
69 | #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) |
70 | #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) |
71 | #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) |
72 | #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) |
73 | |
74 | struct bluefield_edac_priv { |
75 | int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; |
76 | void __iomem *emi_base; |
77 | int dimm_per_mc; |
78 | }; |
79 | |
80 | static u64 smc_call1(u64 smc_op, u64 smc_arg) |
81 | { |
82 | struct arm_smccc_res res; |
83 | |
84 | arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); |
85 | |
86 | return res.a0; |
87 | } |
88 | |
89 | /* |
90 | * Gather the ECC information from the External Memory Interface registers |
91 | * and report it to the edac handler. |
92 | */ |
93 | static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, |
94 | int error_cnt, |
95 | int is_single_ecc) |
96 | { |
97 | struct bluefield_edac_priv *priv = mci->pvt_info; |
98 | u32 dram_additional_info, err_prank, edea0, edea1; |
99 | u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; |
100 | enum hw_event_mc_err_type ecc_type; |
101 | u64 ecc_dimm_addr; |
102 | int ecc_dimm; |
103 | |
104 | ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : |
105 | HW_EVENT_ERR_UNCORRECTED; |
106 | |
107 | /* |
108 | * Tell the External Memory Interface to populate the relevant |
109 | * registers with information about the last ECC error occurrence. |
110 | */ |
111 | ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; |
112 | writel(val: ecc_latch_select, addr: priv->emi_base + MLXBF_ECC_LATCH_SEL); |
113 | |
114 | /* |
115 | * Verify that the ECC reported info in the registers is of the |
116 | * same type as the one asked to report. If not, just report the |
117 | * error without the detailed information. |
118 | */ |
119 | dram_syndrom = readl(addr: priv->emi_base + MLXBF_SYNDROM); |
120 | serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); |
121 | derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); |
122 | syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); |
123 | |
124 | if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { |
125 | edac_mc_handle_error(type: ecc_type, mci, error_count: error_cnt, page_frame_number: 0, offset_in_page: 0, syndrome: 0, |
126 | top_layer: 0, mid_layer: 0, low_layer: -1, msg: mci->ctl_name, other_detail: "" ); |
127 | return; |
128 | } |
129 | |
130 | dram_additional_info = readl(addr: priv->emi_base + MLXBF_ADD_INFO); |
131 | err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); |
132 | |
133 | ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; |
134 | |
135 | edea0 = readl(addr: priv->emi_base + MLXBF_ERR_ADDR_0); |
136 | edea1 = readl(addr: priv->emi_base + MLXBF_ERR_ADDR_1); |
137 | |
138 | ecc_dimm_addr = ((u64)edea1 << 32) | edea0; |
139 | |
140 | edac_mc_handle_error(type: ecc_type, mci, error_count: error_cnt, |
141 | PFN_DOWN(ecc_dimm_addr), |
142 | offset_in_page(ecc_dimm_addr), |
143 | syndrome: syndrom, top_layer: ecc_dimm, mid_layer: 0, low_layer: 0, msg: mci->ctl_name, other_detail: "" ); |
144 | } |
145 | |
146 | static void bluefield_edac_check(struct mem_ctl_info *mci) |
147 | { |
148 | struct bluefield_edac_priv *priv = mci->pvt_info; |
149 | u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; |
150 | |
151 | /* |
152 | * The memory controller might not be initialized by the firmware |
153 | * when there isn't memory, which may lead to bad register readings. |
154 | */ |
155 | if (mci->edac_cap == EDAC_FLAG_NONE) |
156 | return; |
157 | |
158 | ecc_count = readl(addr: priv->emi_base + MLXBF_ECC_CNT); |
159 | single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); |
160 | double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); |
161 | |
162 | if (single_error_count) { |
163 | ecc_error |= MLXBF_ECC_ERR__SECC; |
164 | |
165 | bluefield_gather_report_ecc(mci, error_cnt: single_error_count, is_single_ecc: 1); |
166 | } |
167 | |
168 | if (double_error_count) { |
169 | ecc_error |= MLXBF_ECC_ERR__DECC; |
170 | |
171 | bluefield_gather_report_ecc(mci, error_cnt: double_error_count, is_single_ecc: 0); |
172 | } |
173 | |
174 | /* Write to clear reported errors. */ |
175 | if (ecc_count) |
176 | writel(val: ecc_error, addr: priv->emi_base + MLXBF_ECC_ERR); |
177 | } |
178 | |
179 | /* Initialize the DIMMs information for the given memory controller. */ |
180 | static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) |
181 | { |
182 | struct bluefield_edac_priv *priv = mci->pvt_info; |
183 | int mem_ctrl_idx = mci->mc_idx; |
184 | struct dimm_info *dimm; |
185 | u64 smc_info, smc_arg; |
186 | int is_empty = 1, i; |
187 | |
188 | for (i = 0; i < priv->dimm_per_mc; i++) { |
189 | dimm = mci->dimms[i]; |
190 | |
191 | smc_arg = mem_ctrl_idx << 16 | i; |
192 | smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); |
193 | |
194 | if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { |
195 | dimm->mtype = MEM_EMPTY; |
196 | continue; |
197 | } |
198 | |
199 | is_empty = 0; |
200 | |
201 | dimm->edac_mode = EDAC_SECDED; |
202 | |
203 | if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) |
204 | dimm->mtype = MEM_NVDIMM; |
205 | else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) |
206 | dimm->mtype = MEM_LRDDR4; |
207 | else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) |
208 | dimm->mtype = MEM_RDDR4; |
209 | else |
210 | dimm->mtype = MEM_DDR4; |
211 | |
212 | dimm->nr_pages = |
213 | FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * |
214 | (SZ_1G / PAGE_SIZE); |
215 | dimm->grain = MLXBF_EDAC_ERROR_GRAIN; |
216 | |
217 | /* Mem controller for BlueField only supports x4, x8 and x16 */ |
218 | switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { |
219 | case 4: |
220 | dimm->dtype = DEV_X4; |
221 | break; |
222 | case 8: |
223 | dimm->dtype = DEV_X8; |
224 | break; |
225 | case 16: |
226 | dimm->dtype = DEV_X16; |
227 | break; |
228 | default: |
229 | dimm->dtype = DEV_UNKNOWN; |
230 | } |
231 | |
232 | priv->dimm_ranks[i] = |
233 | FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); |
234 | } |
235 | |
236 | if (is_empty) |
237 | mci->edac_cap = EDAC_FLAG_NONE; |
238 | else |
239 | mci->edac_cap = EDAC_FLAG_SECDED; |
240 | } |
241 | |
242 | static int bluefield_edac_mc_probe(struct platform_device *pdev) |
243 | { |
244 | struct bluefield_edac_priv *priv; |
245 | struct device *dev = &pdev->dev; |
246 | struct edac_mc_layer layers[1]; |
247 | struct mem_ctl_info *mci; |
248 | struct resource *emi_res; |
249 | unsigned int mc_idx, dimm_count; |
250 | int rc, ret; |
251 | |
252 | /* Read the MSS (Memory SubSystem) index from ACPI table. */ |
253 | if (device_property_read_u32(dev, propname: "mss_number" , val: &mc_idx)) { |
254 | dev_warn(dev, "bf_edac: MSS number unknown\n" ); |
255 | return -EINVAL; |
256 | } |
257 | |
258 | /* Read the DIMMs per MC from ACPI table. */ |
259 | if (device_property_read_u32(dev, propname: "dimm_per_mc" , val: &dimm_count)) { |
260 | dev_warn(dev, "bf_edac: DIMMs per MC unknown\n" ); |
261 | return -EINVAL; |
262 | } |
263 | |
264 | if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { |
265 | dev_warn(dev, "bf_edac: DIMMs per MC not valid\n" ); |
266 | return -EINVAL; |
267 | } |
268 | |
269 | emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
270 | if (!emi_res) |
271 | return -EINVAL; |
272 | |
273 | layers[0].type = EDAC_MC_LAYER_SLOT; |
274 | layers[0].size = dimm_count; |
275 | layers[0].is_virt_csrow = true; |
276 | |
277 | mci = edac_mc_alloc(mc_num: mc_idx, ARRAY_SIZE(layers), layers, sz_pvt: sizeof(*priv)); |
278 | if (!mci) |
279 | return -ENOMEM; |
280 | |
281 | priv = mci->pvt_info; |
282 | |
283 | priv->dimm_per_mc = dimm_count; |
284 | priv->emi_base = devm_ioremap_resource(dev, res: emi_res); |
285 | if (IS_ERR(ptr: priv->emi_base)) { |
286 | dev_err(dev, "failed to map EMI IO resource\n" ); |
287 | ret = PTR_ERR(ptr: priv->emi_base); |
288 | goto err; |
289 | } |
290 | |
291 | mci->pdev = dev; |
292 | mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | |
293 | MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; |
294 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; |
295 | |
296 | mci->mod_name = DRIVER_NAME; |
297 | mci->ctl_name = "BlueField_Memory_Controller" ; |
298 | mci->dev_name = dev_name(dev); |
299 | mci->edac_check = bluefield_edac_check; |
300 | |
301 | /* Initialize mci with the actual populated DIMM information. */ |
302 | bluefield_edac_init_dimms(mci); |
303 | |
304 | platform_set_drvdata(pdev, data: mci); |
305 | |
306 | /* Register with EDAC core */ |
307 | rc = edac_mc_add_mc(mci); |
308 | if (rc) { |
309 | dev_err(dev, "failed to register with EDAC core\n" ); |
310 | ret = rc; |
311 | goto err; |
312 | } |
313 | |
314 | /* Only POLL mode supported so far. */ |
315 | edac_op_state = EDAC_OPSTATE_POLL; |
316 | |
317 | return 0; |
318 | |
319 | err: |
320 | edac_mc_free(mci); |
321 | |
322 | return ret; |
323 | |
324 | } |
325 | |
326 | static void bluefield_edac_mc_remove(struct platform_device *pdev) |
327 | { |
328 | struct mem_ctl_info *mci = platform_get_drvdata(pdev); |
329 | |
330 | edac_mc_del_mc(dev: &pdev->dev); |
331 | edac_mc_free(mci); |
332 | } |
333 | |
334 | static const struct acpi_device_id bluefield_mc_acpi_ids[] = { |
335 | {"MLNXBF08" , 0}, |
336 | {} |
337 | }; |
338 | |
339 | MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); |
340 | |
341 | static struct platform_driver bluefield_edac_mc_driver = { |
342 | .driver = { |
343 | .name = DRIVER_NAME, |
344 | .acpi_match_table = bluefield_mc_acpi_ids, |
345 | }, |
346 | .probe = bluefield_edac_mc_probe, |
347 | .remove_new = bluefield_edac_mc_remove, |
348 | }; |
349 | |
350 | module_platform_driver(bluefield_edac_mc_driver); |
351 | |
352 | MODULE_DESCRIPTION("Mellanox BlueField memory edac driver" ); |
353 | MODULE_AUTHOR("Mellanox Technologies" ); |
354 | MODULE_LICENSE("GPL v2" ); |
355 | |