1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Driver for Intel client SoC with integrated memory controller using IBECC |
4 | * |
5 | * Copyright (C) 2020 Intel Corporation |
6 | * |
7 | * The In-Band ECC (IBECC) IP provides ECC protection to all or specific |
8 | * regions of the physical memory space. It's used for memory controllers |
9 | * that don't support the out-of-band ECC which often needs an additional |
10 | * storage device to each channel for storing ECC data. |
11 | */ |
12 | |
13 | #include <linux/module.h> |
14 | #include <linux/init.h> |
15 | #include <linux/pci.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/irq_work.h> |
18 | #include <linux/llist.h> |
19 | #include <linux/genalloc.h> |
20 | #include <linux/edac.h> |
21 | #include <linux/bits.h> |
22 | #include <linux/io.h> |
23 | #include <asm/mach_traps.h> |
24 | #include <asm/nmi.h> |
25 | #include <asm/mce.h> |
26 | |
27 | #include "edac_mc.h" |
28 | #include "edac_module.h" |
29 | |
30 | #define IGEN6_REVISION "v2.5.1" |
31 | |
32 | #define EDAC_MOD_STR "igen6_edac" |
33 | #define IGEN6_NMI_NAME "igen6_ibecc" |
34 | |
35 | /* Debug macros */ |
36 | #define igen6_printk(level, fmt, arg...) \ |
37 | edac_printk(level, "igen6", fmt, ##arg) |
38 | |
39 | #define igen6_mc_printk(mci, level, fmt, arg...) \ |
40 | edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg) |
41 | |
42 | #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) |
43 | |
44 | #define NUM_IMC 2 /* Max memory controllers */ |
45 | #define NUM_CHANNELS 2 /* Max channels */ |
46 | #define NUM_DIMMS 2 /* Max DIMMs per channel */ |
47 | |
48 | #define _4GB BIT_ULL(32) |
49 | |
50 | /* Size of physical memory */ |
51 | #define TOM_OFFSET 0xa0 |
52 | /* Top of low usable DRAM */ |
53 | #define TOLUD_OFFSET 0xbc |
54 | /* Capability register C */ |
55 | #define CAPID_C_OFFSET 0xec |
56 | #define CAPID_C_IBECC BIT(15) |
57 | |
58 | /* Capability register E */ |
59 | #define CAPID_E_OFFSET 0xf0 |
60 | #define CAPID_E_IBECC BIT(12) |
61 | #define CAPID_E_IBECC_BIT18 BIT(18) |
62 | |
63 | /* Error Status */ |
64 | #define ERRSTS_OFFSET 0xc8 |
65 | #define ERRSTS_CE BIT_ULL(6) |
66 | #define ERRSTS_UE BIT_ULL(7) |
67 | |
68 | /* Error Command */ |
69 | #define ERRCMD_OFFSET 0xca |
70 | #define ERRCMD_CE BIT_ULL(6) |
71 | #define ERRCMD_UE BIT_ULL(7) |
72 | |
73 | /* IBECC MMIO base address */ |
74 | #define IBECC_BASE (res_cfg->ibecc_base) |
75 | #define IBECC_ACTIVATE_OFFSET IBECC_BASE |
76 | #define IBECC_ACTIVATE_EN BIT(0) |
77 | |
78 | /* IBECC error log */ |
79 | #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset) |
80 | #define ECC_ERROR_LOG_CE BIT_ULL(62) |
81 | #define ECC_ERROR_LOG_UE BIT_ULL(63) |
82 | #define ECC_ERROR_LOG_ADDR_SHIFT 5 |
83 | #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38) |
84 | #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45) |
85 | #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61) |
86 | |
87 | /* Host MMIO base address */ |
88 | #define MCHBAR_OFFSET 0x48 |
89 | #define MCHBAR_EN BIT_ULL(0) |
90 | #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16) |
91 | #define MCHBAR_SIZE 0x10000 |
92 | |
93 | /* Parameters for the channel decode stage */ |
94 | #define IMC_BASE (res_cfg->imc_base) |
95 | #define MAD_INTER_CHANNEL_OFFSET IMC_BASE |
96 | #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2) |
97 | #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3) |
98 | #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4) |
99 | #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29) |
100 | |
101 | /* Parameters for DRAM decode stage */ |
102 | #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4) |
103 | #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0) |
104 | |
105 | /* DIMM characteristics */ |
106 | #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc) |
107 | #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29) |
108 | #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8) |
109 | #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29) |
110 | #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25) |
111 | |
112 | /* Hash for memory controller selection */ |
113 | #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8) |
114 | #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3) |
115 | |
116 | /* Hash for channel selection */ |
117 | #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24) |
118 | /* Hash for enhanced channel selection */ |
119 | #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28) |
120 | #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) |
121 | #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) |
122 | #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28) |
123 | |
124 | /* Parameters for memory slice decode stage */ |
125 | #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) |
126 | #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) |
127 | |
128 | static struct res_config { |
129 | bool machine_check; |
130 | int num_imc; |
131 | u32 imc_base; |
132 | u32 cmf_base; |
133 | u32 cmf_size; |
134 | u32 ms_hash_offset; |
135 | u32 ibecc_base; |
136 | u32 ibecc_error_log_offset; |
137 | bool (*ibecc_available)(struct pci_dev *pdev); |
138 | /* Extract error address logged in IBECC */ |
139 | u64 (*err_addr)(u64 ecclog); |
140 | /* Convert error address logged in IBECC to system physical address */ |
141 | u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc); |
142 | /* Convert error address logged in IBECC to integrated memory controller address */ |
143 | u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc); |
144 | } *res_cfg; |
145 | |
146 | struct igen6_imc { |
147 | int mc; |
148 | struct mem_ctl_info *mci; |
149 | struct pci_dev *pdev; |
150 | struct device dev; |
151 | void __iomem *window; |
152 | u64 size; |
153 | u64 ch_s_size; |
154 | int ch_l_map; |
155 | u64 dimm_s_size[NUM_CHANNELS]; |
156 | u64 dimm_l_size[NUM_CHANNELS]; |
157 | int dimm_l_map[NUM_CHANNELS]; |
158 | }; |
159 | |
160 | static struct igen6_pvt { |
161 | struct igen6_imc imc[NUM_IMC]; |
162 | u64 ms_hash; |
163 | u64 ms_s_size; |
164 | int ms_l_map; |
165 | } *igen6_pvt; |
166 | |
167 | /* The top of low usable DRAM */ |
168 | static u32 igen6_tolud; |
169 | /* The size of physical memory */ |
170 | static u64 igen6_tom; |
171 | |
172 | struct decoded_addr { |
173 | int mc; |
174 | u64 imc_addr; |
175 | u64 sys_addr; |
176 | int channel_idx; |
177 | u64 channel_addr; |
178 | int sub_channel_idx; |
179 | u64 sub_channel_addr; |
180 | }; |
181 | |
182 | struct ecclog_node { |
183 | struct llist_node llnode; |
184 | int mc; |
185 | u64 ecclog; |
186 | }; |
187 | |
188 | /* |
189 | * In the NMI handler, the driver uses the lock-less memory allocator |
190 | * to allocate memory to store the IBECC error logs and links the logs |
191 | * to the lock-less list. Delay printk() and the work of error reporting |
192 | * to EDAC core in a worker. |
193 | */ |
194 | #define ECCLOG_POOL_SIZE PAGE_SIZE |
195 | static LLIST_HEAD(ecclog_llist); |
196 | static struct gen_pool *ecclog_pool; |
197 | static char ecclog_buf[ECCLOG_POOL_SIZE]; |
198 | static struct irq_work ecclog_irq_work; |
199 | static struct work_struct ecclog_work; |
200 | |
201 | /* Compute die IDs for Elkhart Lake with IBECC */ |
202 | #define DID_EHL_SKU5 0x4514 |
203 | #define DID_EHL_SKU6 0x4528 |
204 | #define DID_EHL_SKU7 0x452a |
205 | #define DID_EHL_SKU8 0x4516 |
206 | #define DID_EHL_SKU9 0x452c |
207 | #define DID_EHL_SKU10 0x452e |
208 | #define DID_EHL_SKU11 0x4532 |
209 | #define DID_EHL_SKU12 0x4518 |
210 | #define DID_EHL_SKU13 0x451a |
211 | #define DID_EHL_SKU14 0x4534 |
212 | #define DID_EHL_SKU15 0x4536 |
213 | |
214 | /* Compute die IDs for ICL-NNPI with IBECC */ |
215 | #define DID_ICL_SKU8 0x4581 |
216 | #define DID_ICL_SKU10 0x4585 |
217 | #define DID_ICL_SKU11 0x4589 |
218 | #define DID_ICL_SKU12 0x458d |
219 | |
220 | /* Compute die IDs for Tiger Lake with IBECC */ |
221 | #define DID_TGL_SKU 0x9a14 |
222 | |
223 | /* Compute die IDs for Alder Lake with IBECC */ |
224 | #define DID_ADL_SKU1 0x4601 |
225 | #define DID_ADL_SKU2 0x4602 |
226 | #define DID_ADL_SKU3 0x4621 |
227 | #define DID_ADL_SKU4 0x4641 |
228 | |
229 | /* Compute die IDs for Alder Lake-N with IBECC */ |
230 | #define DID_ADL_N_SKU1 0x4614 |
231 | #define DID_ADL_N_SKU2 0x4617 |
232 | #define DID_ADL_N_SKU3 0x461b |
233 | #define DID_ADL_N_SKU4 0x461c |
234 | #define DID_ADL_N_SKU5 0x4673 |
235 | #define DID_ADL_N_SKU6 0x4674 |
236 | #define DID_ADL_N_SKU7 0x4675 |
237 | #define DID_ADL_N_SKU8 0x4677 |
238 | #define DID_ADL_N_SKU9 0x4678 |
239 | #define DID_ADL_N_SKU10 0x4679 |
240 | #define DID_ADL_N_SKU11 0x467c |
241 | #define DID_ADL_N_SKU12 0x4632 |
242 | |
243 | /* Compute die IDs for Raptor Lake-P with IBECC */ |
244 | #define DID_RPL_P_SKU1 0xa706 |
245 | #define DID_RPL_P_SKU2 0xa707 |
246 | #define DID_RPL_P_SKU3 0xa708 |
247 | #define DID_RPL_P_SKU4 0xa716 |
248 | #define DID_RPL_P_SKU5 0xa718 |
249 | |
250 | /* Compute die IDs for Meteor Lake-PS with IBECC */ |
251 | #define DID_MTL_PS_SKU1 0x7d21 |
252 | #define DID_MTL_PS_SKU2 0x7d22 |
253 | #define DID_MTL_PS_SKU3 0x7d23 |
254 | #define DID_MTL_PS_SKU4 0x7d24 |
255 | |
256 | /* Compute die IDs for Meteor Lake-P with IBECC */ |
257 | #define DID_MTL_P_SKU1 0x7d01 |
258 | #define DID_MTL_P_SKU2 0x7d02 |
259 | #define DID_MTL_P_SKU3 0x7d14 |
260 | |
261 | static int get_mchbar(struct pci_dev *pdev, u64 *mchbar) |
262 | { |
263 | union { |
264 | u64 v; |
265 | struct { |
266 | u32 v_lo; |
267 | u32 v_hi; |
268 | }; |
269 | } u; |
270 | |
271 | if (pci_read_config_dword(dev: pdev, MCHBAR_OFFSET, val: &u.v_lo)) { |
272 | igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n" ); |
273 | return -ENODEV; |
274 | } |
275 | |
276 | if (pci_read_config_dword(dev: pdev, MCHBAR_OFFSET + 4, val: &u.v_hi)) { |
277 | igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n" ); |
278 | return -ENODEV; |
279 | } |
280 | |
281 | if (!(u.v & MCHBAR_EN)) { |
282 | igen6_printk(KERN_ERR, "MCHBAR is disabled\n" ); |
283 | return -ENODEV; |
284 | } |
285 | |
286 | *mchbar = MCHBAR_BASE(u.v); |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | static bool ehl_ibecc_available(struct pci_dev *pdev) |
292 | { |
293 | u32 v; |
294 | |
295 | if (pci_read_config_dword(dev: pdev, CAPID_C_OFFSET, val: &v)) |
296 | return false; |
297 | |
298 | return !!(CAPID_C_IBECC & v); |
299 | } |
300 | |
301 | static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc) |
302 | { |
303 | return eaddr; |
304 | } |
305 | |
306 | static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc) |
307 | { |
308 | if (eaddr < igen6_tolud) |
309 | return eaddr; |
310 | |
311 | if (igen6_tom <= _4GB) |
312 | return eaddr + igen6_tolud - _4GB; |
313 | |
314 | if (eaddr < _4GB) |
315 | return eaddr + igen6_tolud - igen6_tom; |
316 | |
317 | return eaddr; |
318 | } |
319 | |
320 | static bool icl_ibecc_available(struct pci_dev *pdev) |
321 | { |
322 | u32 v; |
323 | |
324 | if (pci_read_config_dword(dev: pdev, CAPID_C_OFFSET, val: &v)) |
325 | return false; |
326 | |
327 | return !(CAPID_C_IBECC & v) && |
328 | (boot_cpu_data.x86_stepping >= 1); |
329 | } |
330 | |
331 | static bool tgl_ibecc_available(struct pci_dev *pdev) |
332 | { |
333 | u32 v; |
334 | |
335 | if (pci_read_config_dword(dev: pdev, CAPID_E_OFFSET, val: &v)) |
336 | return false; |
337 | |
338 | return !(CAPID_E_IBECC & v); |
339 | } |
340 | |
341 | static bool mtl_p_ibecc_available(struct pci_dev *pdev) |
342 | { |
343 | u32 v; |
344 | |
345 | if (pci_read_config_dword(dev: pdev, CAPID_E_OFFSET, val: &v)) |
346 | return false; |
347 | |
348 | return !(CAPID_E_IBECC_BIT18 & v); |
349 | } |
350 | |
351 | static bool mtl_ps_ibecc_available(struct pci_dev *pdev) |
352 | { |
353 | #define MCHBAR_MEMSS_IBECCDIS 0x13c00 |
354 | void __iomem *window; |
355 | u64 mchbar; |
356 | u32 val; |
357 | |
358 | if (get_mchbar(pdev, mchbar: &mchbar)) |
359 | return false; |
360 | |
361 | window = ioremap(offset: mchbar, MCHBAR_SIZE * 2); |
362 | if (!window) { |
363 | igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n" , mchbar); |
364 | return false; |
365 | } |
366 | |
367 | val = readl(addr: window + MCHBAR_MEMSS_IBECCDIS); |
368 | iounmap(addr: window); |
369 | |
370 | /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */ |
371 | return !GET_BITFIELD(val, 6, 6); |
372 | } |
373 | |
374 | static u64 mem_addr_to_sys_addr(u64 maddr) |
375 | { |
376 | if (maddr < igen6_tolud) |
377 | return maddr; |
378 | |
379 | if (igen6_tom <= _4GB) |
380 | return maddr - igen6_tolud + _4GB; |
381 | |
382 | if (maddr < _4GB) |
383 | return maddr - igen6_tolud + igen6_tom; |
384 | |
385 | return maddr; |
386 | } |
387 | |
388 | static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit) |
389 | { |
390 | u64 hash_addr = addr & mask, hash = hash_init; |
391 | u64 intlv = (addr >> intlv_bit) & 1; |
392 | int i; |
393 | |
394 | for (i = 6; i < 20; i++) |
395 | hash ^= (hash_addr >> i) & 1; |
396 | |
397 | return hash ^ intlv; |
398 | } |
399 | |
400 | static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc) |
401 | { |
402 | u64 maddr, hash, mask, ms_s_size; |
403 | int intlv_bit; |
404 | u32 ms_hash; |
405 | |
406 | ms_s_size = igen6_pvt->ms_s_size; |
407 | if (eaddr >= ms_s_size) |
408 | return eaddr + ms_s_size; |
409 | |
410 | ms_hash = igen6_pvt->ms_hash; |
411 | |
412 | mask = MEM_SLICE_HASH_MASK(ms_hash); |
413 | intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6; |
414 | |
415 | maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) | |
416 | GET_BITFIELD(eaddr, 0, intlv_bit - 1); |
417 | |
418 | hash = mem_slice_hash(addr: maddr, mask, hash_init: mc, intlv_bit); |
419 | |
420 | return maddr | (hash << intlv_bit); |
421 | } |
422 | |
423 | static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc) |
424 | { |
425 | u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc); |
426 | |
427 | return mem_addr_to_sys_addr(maddr); |
428 | } |
429 | |
430 | static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc) |
431 | { |
432 | return eaddr; |
433 | } |
434 | |
435 | static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc) |
436 | { |
437 | return mem_addr_to_sys_addr(maddr: eaddr); |
438 | } |
439 | |
440 | static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc) |
441 | { |
442 | u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size; |
443 | struct igen6_imc *imc = &igen6_pvt->imc[mc]; |
444 | int intlv_bit; |
445 | u32 mc_hash; |
446 | |
447 | if (eaddr >= 2 * ms_s_size) |
448 | return eaddr - ms_s_size; |
449 | |
450 | mc_hash = readl(addr: imc->window + MAD_MC_HASH_OFFSET); |
451 | |
452 | intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6; |
453 | |
454 | imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit | |
455 | GET_BITFIELD(eaddr, 0, intlv_bit - 1); |
456 | |
457 | return imc_addr; |
458 | } |
459 | |
460 | static u64 rpl_p_err_addr(u64 ecclog) |
461 | { |
462 | return ECC_ERROR_LOG_ADDR45(ecclog); |
463 | } |
464 | |
465 | static struct res_config ehl_cfg = { |
466 | .num_imc = 1, |
467 | .imc_base = 0x5000, |
468 | .ibecc_base = 0xdc00, |
469 | .ibecc_available = ehl_ibecc_available, |
470 | .ibecc_error_log_offset = 0x170, |
471 | .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, |
472 | .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, |
473 | }; |
474 | |
475 | static struct res_config icl_cfg = { |
476 | .num_imc = 1, |
477 | .imc_base = 0x5000, |
478 | .ibecc_base = 0xd800, |
479 | .ibecc_error_log_offset = 0x170, |
480 | .ibecc_available = icl_ibecc_available, |
481 | .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr, |
482 | .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, |
483 | }; |
484 | |
485 | static struct res_config tgl_cfg = { |
486 | .machine_check = true, |
487 | .num_imc = 2, |
488 | .imc_base = 0x5000, |
489 | .cmf_base = 0x11000, |
490 | .cmf_size = 0x800, |
491 | .ms_hash_offset = 0xac, |
492 | .ibecc_base = 0xd400, |
493 | .ibecc_error_log_offset = 0x170, |
494 | .ibecc_available = tgl_ibecc_available, |
495 | .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr, |
496 | .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, |
497 | }; |
498 | |
499 | static struct res_config adl_cfg = { |
500 | .machine_check = true, |
501 | .num_imc = 2, |
502 | .imc_base = 0xd800, |
503 | .ibecc_base = 0xd400, |
504 | .ibecc_error_log_offset = 0x68, |
505 | .ibecc_available = tgl_ibecc_available, |
506 | .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, |
507 | .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, |
508 | }; |
509 | |
510 | static struct res_config adl_n_cfg = { |
511 | .machine_check = true, |
512 | .num_imc = 1, |
513 | .imc_base = 0xd800, |
514 | .ibecc_base = 0xd400, |
515 | .ibecc_error_log_offset = 0x68, |
516 | .ibecc_available = tgl_ibecc_available, |
517 | .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, |
518 | .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, |
519 | }; |
520 | |
521 | static struct res_config rpl_p_cfg = { |
522 | .machine_check = true, |
523 | .num_imc = 2, |
524 | .imc_base = 0xd800, |
525 | .ibecc_base = 0xd400, |
526 | .ibecc_error_log_offset = 0x68, |
527 | .ibecc_available = tgl_ibecc_available, |
528 | .err_addr = rpl_p_err_addr, |
529 | .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, |
530 | .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, |
531 | }; |
532 | |
533 | static struct res_config mtl_ps_cfg = { |
534 | .machine_check = true, |
535 | .num_imc = 2, |
536 | .imc_base = 0xd800, |
537 | .ibecc_base = 0xd400, |
538 | .ibecc_error_log_offset = 0x170, |
539 | .ibecc_available = mtl_ps_ibecc_available, |
540 | .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, |
541 | .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, |
542 | }; |
543 | |
544 | static struct res_config mtl_p_cfg = { |
545 | .machine_check = true, |
546 | .num_imc = 2, |
547 | .imc_base = 0xd800, |
548 | .ibecc_base = 0xd400, |
549 | .ibecc_error_log_offset = 0x170, |
550 | .ibecc_available = mtl_p_ibecc_available, |
551 | .err_addr_to_sys_addr = adl_err_addr_to_sys_addr, |
552 | .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, |
553 | }; |
554 | |
555 | static const struct pci_device_id igen6_pci_tbl[] = { |
556 | { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, |
557 | { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, |
558 | { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, |
559 | { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg }, |
560 | { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg }, |
561 | { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg }, |
562 | { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg }, |
563 | { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg }, |
564 | { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg }, |
565 | { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg }, |
566 | { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg }, |
567 | { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg }, |
568 | { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg }, |
569 | { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg }, |
570 | { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg }, |
571 | { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg }, |
572 | { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg }, |
573 | { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg }, |
574 | { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg }, |
575 | { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg }, |
576 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg }, |
577 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg }, |
578 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg }, |
579 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg }, |
580 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg }, |
581 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg }, |
582 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg }, |
583 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg }, |
584 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg }, |
585 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, |
586 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, |
587 | { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, |
588 | { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, |
589 | { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, |
590 | { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, |
591 | { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg }, |
592 | { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg }, |
593 | { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg }, |
594 | { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg }, |
595 | { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg }, |
596 | { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg }, |
597 | { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg }, |
598 | { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg }, |
599 | { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg }, |
600 | { }, |
601 | }; |
602 | MODULE_DEVICE_TABLE(pci, igen6_pci_tbl); |
603 | |
604 | static enum dev_type get_width(int dimm_l, u32 mad_dimm) |
605 | { |
606 | u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) : |
607 | MAD_DIMM_CH_DSW(mad_dimm); |
608 | |
609 | switch (w) { |
610 | case 0: |
611 | return DEV_X8; |
612 | case 1: |
613 | return DEV_X16; |
614 | case 2: |
615 | return DEV_X32; |
616 | default: |
617 | return DEV_UNKNOWN; |
618 | } |
619 | } |
620 | |
621 | static enum mem_type get_memory_type(u32 mad_inter) |
622 | { |
623 | u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter); |
624 | |
625 | switch (t) { |
626 | case 0: |
627 | return MEM_DDR4; |
628 | case 1: |
629 | return MEM_DDR3; |
630 | case 2: |
631 | return MEM_LPDDR3; |
632 | case 3: |
633 | return MEM_LPDDR4; |
634 | case 4: |
635 | return MEM_WIO2; |
636 | default: |
637 | return MEM_UNKNOWN; |
638 | } |
639 | } |
640 | |
641 | static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit) |
642 | { |
643 | u64 hash_addr = addr & mask, hash = 0; |
644 | u64 intlv = (addr >> intlv_bit) & 1; |
645 | int i; |
646 | |
647 | for (i = 6; i < 20; i++) |
648 | hash ^= (hash_addr >> i) & 1; |
649 | |
650 | return (int)hash ^ intlv; |
651 | } |
652 | |
653 | static u64 decode_channel_addr(u64 addr, int intlv_bit) |
654 | { |
655 | u64 channel_addr; |
656 | |
657 | /* Remove the interleave bit and shift upper part down to fill gap */ |
658 | channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit; |
659 | channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1); |
660 | |
661 | return channel_addr; |
662 | } |
663 | |
664 | static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map, |
665 | int *idx, u64 *sub_addr) |
666 | { |
667 | int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6; |
668 | |
669 | if (addr > 2 * s_size) { |
670 | *sub_addr = addr - s_size; |
671 | *idx = l_map; |
672 | return; |
673 | } |
674 | |
675 | if (CHANNEL_HASH_MODE(hash)) { |
676 | *sub_addr = decode_channel_addr(addr, intlv_bit); |
677 | *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit); |
678 | } else { |
679 | *sub_addr = decode_channel_addr(addr, intlv_bit: 6); |
680 | *idx = GET_BITFIELD(addr, 6, 6); |
681 | } |
682 | } |
683 | |
684 | static int igen6_decode(struct decoded_addr *res) |
685 | { |
686 | struct igen6_imc *imc = &igen6_pvt->imc[res->mc]; |
687 | u64 addr = res->imc_addr, sub_addr, s_size; |
688 | int idx, l_map; |
689 | u32 hash; |
690 | |
691 | if (addr >= igen6_tom) { |
692 | edac_dbg(0, "Address 0x%llx out of range\n" , addr); |
693 | return -EINVAL; |
694 | } |
695 | |
696 | /* Decode channel */ |
697 | hash = readl(addr: imc->window + CHANNEL_HASH_OFFSET); |
698 | s_size = imc->ch_s_size; |
699 | l_map = imc->ch_l_map; |
700 | decode_addr(addr, hash, s_size, l_map, idx: &idx, sub_addr: &sub_addr); |
701 | res->channel_idx = idx; |
702 | res->channel_addr = sub_addr; |
703 | |
704 | /* Decode sub-channel/DIMM */ |
705 | hash = readl(addr: imc->window + CHANNEL_EHASH_OFFSET); |
706 | s_size = imc->dimm_s_size[idx]; |
707 | l_map = imc->dimm_l_map[idx]; |
708 | decode_addr(addr: res->channel_addr, hash, s_size, l_map, idx: &idx, sub_addr: &sub_addr); |
709 | res->sub_channel_idx = idx; |
710 | res->sub_channel_addr = sub_addr; |
711 | |
712 | return 0; |
713 | } |
714 | |
715 | static void igen6_output_error(struct decoded_addr *res, |
716 | struct mem_ctl_info *mci, u64 ecclog) |
717 | { |
718 | enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ? |
719 | HW_EVENT_ERR_UNCORRECTED : |
720 | HW_EVENT_ERR_CORRECTED; |
721 | |
722 | edac_mc_handle_error(type, mci, error_count: 1, |
723 | page_frame_number: res->sys_addr >> PAGE_SHIFT, |
724 | offset_in_page: res->sys_addr & ~PAGE_MASK, |
725 | ECC_ERROR_LOG_SYND(ecclog), |
726 | top_layer: res->channel_idx, mid_layer: res->sub_channel_idx, |
727 | low_layer: -1, msg: "" , other_detail: "" ); |
728 | } |
729 | |
730 | static struct gen_pool *ecclog_gen_pool_create(void) |
731 | { |
732 | struct gen_pool *pool; |
733 | |
734 | pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1); |
735 | if (!pool) |
736 | return NULL; |
737 | |
738 | if (gen_pool_add(pool, addr: (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, nid: -1)) { |
739 | gen_pool_destroy(pool); |
740 | return NULL; |
741 | } |
742 | |
743 | return pool; |
744 | } |
745 | |
746 | static int ecclog_gen_pool_add(int mc, u64 ecclog) |
747 | { |
748 | struct ecclog_node *node; |
749 | |
750 | node = (void *)gen_pool_alloc(pool: ecclog_pool, size: sizeof(*node)); |
751 | if (!node) |
752 | return -ENOMEM; |
753 | |
754 | node->mc = mc; |
755 | node->ecclog = ecclog; |
756 | llist_add(new: &node->llnode, head: &ecclog_llist); |
757 | |
758 | return 0; |
759 | } |
760 | |
761 | /* |
762 | * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI |
763 | * configuration space status register ERRSTS can indicate whether a |
764 | * correctable error or an uncorrectable error occurred. We only use the |
765 | * ECC_ERROR_LOG register to check error type, but need to clear both |
766 | * registers to enable future error events. |
767 | */ |
768 | static u64 ecclog_read_and_clear(struct igen6_imc *imc) |
769 | { |
770 | u64 ecclog = readq(addr: imc->window + ECC_ERROR_LOG_OFFSET); |
771 | |
772 | if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) { |
773 | /* Clear CE/UE bits by writing 1s */ |
774 | writeq(val: ecclog, addr: imc->window + ECC_ERROR_LOG_OFFSET); |
775 | return ecclog; |
776 | } |
777 | |
778 | return 0; |
779 | } |
780 | |
781 | static void errsts_clear(struct igen6_imc *imc) |
782 | { |
783 | u16 errsts; |
784 | |
785 | if (pci_read_config_word(dev: imc->pdev, ERRSTS_OFFSET, val: &errsts)) { |
786 | igen6_printk(KERN_ERR, "Failed to read ERRSTS\n" ); |
787 | return; |
788 | } |
789 | |
790 | /* Clear CE/UE bits by writing 1s */ |
791 | if (errsts & (ERRSTS_CE | ERRSTS_UE)) |
792 | pci_write_config_word(dev: imc->pdev, ERRSTS_OFFSET, val: errsts); |
793 | } |
794 | |
795 | static int errcmd_enable_error_reporting(bool enable) |
796 | { |
797 | struct igen6_imc *imc = &igen6_pvt->imc[0]; |
798 | u16 errcmd; |
799 | int rc; |
800 | |
801 | rc = pci_read_config_word(dev: imc->pdev, ERRCMD_OFFSET, val: &errcmd); |
802 | if (rc) |
803 | return rc; |
804 | |
805 | if (enable) |
806 | errcmd |= ERRCMD_CE | ERRSTS_UE; |
807 | else |
808 | errcmd &= ~(ERRCMD_CE | ERRSTS_UE); |
809 | |
810 | rc = pci_write_config_word(dev: imc->pdev, ERRCMD_OFFSET, val: errcmd); |
811 | if (rc) |
812 | return rc; |
813 | |
814 | return 0; |
815 | } |
816 | |
817 | static int ecclog_handler(void) |
818 | { |
819 | struct igen6_imc *imc; |
820 | int i, n = 0; |
821 | u64 ecclog; |
822 | |
823 | for (i = 0; i < res_cfg->num_imc; i++) { |
824 | imc = &igen6_pvt->imc[i]; |
825 | |
826 | /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */ |
827 | |
828 | ecclog = ecclog_read_and_clear(imc); |
829 | if (!ecclog) |
830 | continue; |
831 | |
832 | if (!ecclog_gen_pool_add(mc: i, ecclog)) |
833 | irq_work_queue(work: &ecclog_irq_work); |
834 | |
835 | n++; |
836 | } |
837 | |
838 | return n; |
839 | } |
840 | |
841 | static void ecclog_work_cb(struct work_struct *work) |
842 | { |
843 | struct ecclog_node *node, *tmp; |
844 | struct mem_ctl_info *mci; |
845 | struct llist_node *head; |
846 | struct decoded_addr res; |
847 | u64 eaddr; |
848 | |
849 | head = llist_del_all(head: &ecclog_llist); |
850 | if (!head) |
851 | return; |
852 | |
853 | llist_for_each_entry_safe(node, tmp, head, llnode) { |
854 | memset(&res, 0, sizeof(res)); |
855 | if (res_cfg->err_addr) |
856 | eaddr = res_cfg->err_addr(node->ecclog); |
857 | else |
858 | eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) << |
859 | ECC_ERROR_LOG_ADDR_SHIFT; |
860 | res.mc = node->mc; |
861 | res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc); |
862 | res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc); |
863 | |
864 | mci = igen6_pvt->imc[res.mc].mci; |
865 | |
866 | edac_dbg(2, "MC %d, ecclog = 0x%llx\n" , node->mc, node->ecclog); |
867 | igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n" ); |
868 | igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx " , res.sys_addr); |
869 | |
870 | if (!igen6_decode(res: &res)) |
871 | igen6_output_error(res: &res, mci, ecclog: node->ecclog); |
872 | |
873 | gen_pool_free(pool: ecclog_pool, addr: (unsigned long)node, size: sizeof(*node)); |
874 | } |
875 | } |
876 | |
877 | static void ecclog_irq_work_cb(struct irq_work *irq_work) |
878 | { |
879 | int i; |
880 | |
881 | for (i = 0; i < res_cfg->num_imc; i++) |
882 | errsts_clear(imc: &igen6_pvt->imc[i]); |
883 | |
884 | if (!llist_empty(head: &ecclog_llist)) |
885 | schedule_work(work: &ecclog_work); |
886 | } |
887 | |
888 | static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
889 | { |
890 | unsigned char reason; |
891 | |
892 | if (!ecclog_handler()) |
893 | return NMI_DONE; |
894 | |
895 | /* |
896 | * Both In-Band ECC correctable error and uncorrectable error are |
897 | * reported by SERR# NMI. The NMI generic code (see pci_serr_error()) |
898 | * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to |
899 | * re-enable the SERR# NMI after NMI handling. So clear this bit here |
900 | * to re-enable SERR# NMI for receiving future In-Band ECC errors. |
901 | */ |
902 | reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK; |
903 | reason |= NMI_REASON_CLEAR_SERR; |
904 | outb(value: reason, NMI_REASON_PORT); |
905 | reason &= ~NMI_REASON_CLEAR_SERR; |
906 | outb(value: reason, NMI_REASON_PORT); |
907 | |
908 | return NMI_HANDLED; |
909 | } |
910 | |
911 | static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val, |
912 | void *data) |
913 | { |
914 | struct mce *mce = (struct mce *)data; |
915 | char *type; |
916 | |
917 | if (mce->kflags & MCE_HANDLED_CEC) |
918 | return NOTIFY_DONE; |
919 | |
920 | /* |
921 | * Ignore unless this is a memory related error. |
922 | * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here, |
923 | * since this bit isn't set on some CPU (e.g., Tiger Lake UP3). |
924 | */ |
925 | if ((mce->status & 0xefff) >> 7 != 1) |
926 | return NOTIFY_DONE; |
927 | |
928 | if (mce->mcgstatus & MCG_STATUS_MCIP) |
929 | type = "Exception" ; |
930 | else |
931 | type = "Event" ; |
932 | |
933 | edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n" , |
934 | mce->extcpu, type, mce->mcgstatus, |
935 | mce->bank, mce->status); |
936 | edac_dbg(0, "TSC 0x%llx\n" , mce->tsc); |
937 | edac_dbg(0, "ADDR 0x%llx\n" , mce->addr); |
938 | edac_dbg(0, "MISC 0x%llx\n" , mce->misc); |
939 | edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n" , |
940 | mce->cpuvendor, mce->cpuid, mce->time, |
941 | mce->socketid, mce->apicid); |
942 | /* |
943 | * We just use the Machine Check for the memory error notification. |
944 | * Each memory controller is associated with an IBECC instance. |
945 | * Directly read and clear the error information(error address and |
946 | * error type) on all the IBECC instances so that we know on which |
947 | * memory controller the memory error(s) occurred. |
948 | */ |
949 | if (!ecclog_handler()) |
950 | return NOTIFY_DONE; |
951 | |
952 | mce->kflags |= MCE_HANDLED_EDAC; |
953 | |
954 | return NOTIFY_DONE; |
955 | } |
956 | |
957 | static struct notifier_block ecclog_mce_dec = { |
958 | .notifier_call = ecclog_mce_handler, |
959 | .priority = MCE_PRIO_EDAC, |
960 | }; |
961 | |
962 | static bool igen6_check_ecc(struct igen6_imc *imc) |
963 | { |
964 | u32 activate = readl(addr: imc->window + IBECC_ACTIVATE_OFFSET); |
965 | |
966 | return !!(activate & IBECC_ACTIVATE_EN); |
967 | } |
968 | |
969 | static int igen6_get_dimm_config(struct mem_ctl_info *mci) |
970 | { |
971 | struct igen6_imc *imc = mci->pvt_info; |
972 | u32 mad_inter, mad_intra, mad_dimm; |
973 | int i, j, ndimms, mc = imc->mc; |
974 | struct dimm_info *dimm; |
975 | enum mem_type mtype; |
976 | enum dev_type dtype; |
977 | u64 dsize; |
978 | bool ecc; |
979 | |
980 | edac_dbg(2, "\n" ); |
981 | |
982 | mad_inter = readl(addr: imc->window + MAD_INTER_CHANNEL_OFFSET); |
983 | mtype = get_memory_type(mad_inter); |
984 | ecc = igen6_check_ecc(imc); |
985 | imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter); |
986 | imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter); |
987 | |
988 | for (i = 0; i < NUM_CHANNELS; i++) { |
989 | mad_intra = readl(addr: imc->window + MAD_INTRA_CH0_OFFSET + i * 4); |
990 | mad_dimm = readl(addr: imc->window + MAD_DIMM_CH0_OFFSET + i * 4); |
991 | |
992 | imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm); |
993 | imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm); |
994 | imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra); |
995 | imc->size += imc->dimm_s_size[i]; |
996 | imc->size += imc->dimm_l_size[i]; |
997 | ndimms = 0; |
998 | |
999 | for (j = 0; j < NUM_DIMMS; j++) { |
1000 | dimm = edac_get_dimm(mci, layer0: i, layer1: j, layer2: 0); |
1001 | |
1002 | if (j ^ imc->dimm_l_map[i]) { |
1003 | dtype = get_width(dimm_l: 0, mad_dimm); |
1004 | dsize = imc->dimm_s_size[i]; |
1005 | } else { |
1006 | dtype = get_width(dimm_l: 1, mad_dimm); |
1007 | dsize = imc->dimm_l_size[i]; |
1008 | } |
1009 | |
1010 | if (!dsize) |
1011 | continue; |
1012 | |
1013 | dimm->grain = 64; |
1014 | dimm->mtype = mtype; |
1015 | dimm->dtype = dtype; |
1016 | dimm->nr_pages = MiB_TO_PAGES(dsize >> 20); |
1017 | dimm->edac_mode = EDAC_SECDED; |
1018 | snprintf(buf: dimm->label, size: sizeof(dimm->label), |
1019 | fmt: "MC#%d_Chan#%d_DIMM#%d" , mc, i, j); |
1020 | edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n" , |
1021 | mc, i, j, dsize >> 20, dimm->nr_pages); |
1022 | |
1023 | ndimms++; |
1024 | } |
1025 | |
1026 | if (ndimms && !ecc) { |
1027 | igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n" , mc); |
1028 | return -ENODEV; |
1029 | } |
1030 | } |
1031 | |
1032 | edac_dbg(0, "MC %d, total size %llu MiB\n" , mc, imc->size >> 20); |
1033 | |
1034 | return 0; |
1035 | } |
1036 | |
1037 | #ifdef CONFIG_EDAC_DEBUG |
1038 | /* Top of upper usable DRAM */ |
1039 | static u64 igen6_touud; |
1040 | #define TOUUD_OFFSET 0xa8 |
1041 | |
1042 | static void igen6_reg_dump(struct igen6_imc *imc) |
1043 | { |
1044 | int i; |
1045 | |
1046 | edac_dbg(2, "CHANNEL_HASH : 0x%x\n" , |
1047 | readl(imc->window + CHANNEL_HASH_OFFSET)); |
1048 | edac_dbg(2, "CHANNEL_EHASH : 0x%x\n" , |
1049 | readl(imc->window + CHANNEL_EHASH_OFFSET)); |
1050 | edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n" , |
1051 | readl(imc->window + MAD_INTER_CHANNEL_OFFSET)); |
1052 | edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n" , |
1053 | readq(imc->window + ECC_ERROR_LOG_OFFSET)); |
1054 | |
1055 | for (i = 0; i < NUM_CHANNELS; i++) { |
1056 | edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n" , i, |
1057 | readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4)); |
1058 | edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n" , i, |
1059 | readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4)); |
1060 | } |
1061 | edac_dbg(2, "TOLUD : 0x%x" , igen6_tolud); |
1062 | edac_dbg(2, "TOUUD : 0x%llx" , igen6_touud); |
1063 | edac_dbg(2, "TOM : 0x%llx" , igen6_tom); |
1064 | } |
1065 | |
1066 | static struct dentry *igen6_test; |
1067 | |
1068 | static int debugfs_u64_set(void *data, u64 val) |
1069 | { |
1070 | u64 ecclog; |
1071 | |
1072 | if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) { |
1073 | edac_dbg(0, "Address 0x%llx out of range\n" , val); |
1074 | return 0; |
1075 | } |
1076 | |
1077 | pr_warn_once("Fake error to 0x%llx injected via debugfs\n" , val); |
1078 | |
1079 | val >>= ECC_ERROR_LOG_ADDR_SHIFT; |
1080 | ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE; |
1081 | |
1082 | if (!ecclog_gen_pool_add(mc: 0, ecclog)) |
1083 | irq_work_queue(work: &ecclog_irq_work); |
1084 | |
1085 | return 0; |
1086 | } |
1087 | DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n" ); |
1088 | |
1089 | static void igen6_debug_setup(void) |
1090 | { |
1091 | igen6_test = edac_debugfs_create_dir(dirname: "igen6_test" ); |
1092 | if (!igen6_test) |
1093 | return; |
1094 | |
1095 | if (!edac_debugfs_create_file(name: "addr" , mode: 0200, parent: igen6_test, |
1096 | NULL, fops: &fops_u64_wo)) { |
1097 | debugfs_remove(dentry: igen6_test); |
1098 | igen6_test = NULL; |
1099 | } |
1100 | } |
1101 | |
1102 | static void igen6_debug_teardown(void) |
1103 | { |
1104 | debugfs_remove_recursive(dentry: igen6_test); |
1105 | } |
1106 | #else |
1107 | static void igen6_reg_dump(struct igen6_imc *imc) {} |
1108 | static void igen6_debug_setup(void) {} |
1109 | static void igen6_debug_teardown(void) {} |
1110 | #endif |
1111 | |
1112 | static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar) |
1113 | { |
1114 | union { |
1115 | u64 v; |
1116 | struct { |
1117 | u32 v_lo; |
1118 | u32 v_hi; |
1119 | }; |
1120 | } u; |
1121 | |
1122 | edac_dbg(2, "\n" ); |
1123 | |
1124 | if (!res_cfg->ibecc_available(pdev)) { |
1125 | edac_dbg(2, "No In-Band ECC IP\n" ); |
1126 | goto fail; |
1127 | } |
1128 | |
1129 | if (pci_read_config_dword(dev: pdev, TOLUD_OFFSET, val: &igen6_tolud)) { |
1130 | igen6_printk(KERN_ERR, "Failed to read TOLUD\n" ); |
1131 | goto fail; |
1132 | } |
1133 | |
1134 | igen6_tolud &= GENMASK(31, 20); |
1135 | |
1136 | if (pci_read_config_dword(dev: pdev, TOM_OFFSET, val: &u.v_lo)) { |
1137 | igen6_printk(KERN_ERR, "Failed to read lower TOM\n" ); |
1138 | goto fail; |
1139 | } |
1140 | |
1141 | if (pci_read_config_dword(dev: pdev, TOM_OFFSET + 4, val: &u.v_hi)) { |
1142 | igen6_printk(KERN_ERR, "Failed to read upper TOM\n" ); |
1143 | goto fail; |
1144 | } |
1145 | |
1146 | igen6_tom = u.v & GENMASK_ULL(38, 20); |
1147 | |
1148 | if (get_mchbar(pdev, mchbar)) |
1149 | goto fail; |
1150 | |
1151 | #ifdef CONFIG_EDAC_DEBUG |
1152 | if (pci_read_config_dword(dev: pdev, TOUUD_OFFSET, val: &u.v_lo)) |
1153 | edac_dbg(2, "Failed to read lower TOUUD\n" ); |
1154 | else if (pci_read_config_dword(dev: pdev, TOUUD_OFFSET + 4, val: &u.v_hi)) |
1155 | edac_dbg(2, "Failed to read upper TOUUD\n" ); |
1156 | else |
1157 | igen6_touud = u.v & GENMASK_ULL(38, 20); |
1158 | #endif |
1159 | |
1160 | return 0; |
1161 | fail: |
1162 | return -ENODEV; |
1163 | } |
1164 | |
1165 | static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) |
1166 | { |
1167 | struct edac_mc_layer layers[2]; |
1168 | struct mem_ctl_info *mci; |
1169 | struct igen6_imc *imc; |
1170 | void __iomem *window; |
1171 | int rc; |
1172 | |
1173 | edac_dbg(2, "\n" ); |
1174 | |
1175 | mchbar += mc * MCHBAR_SIZE; |
1176 | window = ioremap(offset: mchbar, MCHBAR_SIZE); |
1177 | if (!window) { |
1178 | igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n" , mchbar); |
1179 | return -ENODEV; |
1180 | } |
1181 | |
1182 | layers[0].type = EDAC_MC_LAYER_CHANNEL; |
1183 | layers[0].size = NUM_CHANNELS; |
1184 | layers[0].is_virt_csrow = false; |
1185 | layers[1].type = EDAC_MC_LAYER_SLOT; |
1186 | layers[1].size = NUM_DIMMS; |
1187 | layers[1].is_virt_csrow = true; |
1188 | |
1189 | mci = edac_mc_alloc(mc_num: mc, ARRAY_SIZE(layers), layers, sz_pvt: 0); |
1190 | if (!mci) { |
1191 | rc = -ENOMEM; |
1192 | goto fail; |
1193 | } |
1194 | |
1195 | mci->ctl_name = kasprintf(GFP_KERNEL, fmt: "Intel_client_SoC MC#%d" , mc); |
1196 | if (!mci->ctl_name) { |
1197 | rc = -ENOMEM; |
1198 | goto fail2; |
1199 | } |
1200 | |
1201 | mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4; |
1202 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; |
1203 | mci->edac_cap = EDAC_FLAG_SECDED; |
1204 | mci->mod_name = EDAC_MOD_STR; |
1205 | mci->dev_name = pci_name(pdev); |
1206 | mci->pvt_info = &igen6_pvt->imc[mc]; |
1207 | |
1208 | imc = mci->pvt_info; |
1209 | device_initialize(dev: &imc->dev); |
1210 | /* |
1211 | * EDAC core uses mci->pdev(pointer of structure device) as |
1212 | * memory controller ID. The client SoCs attach one or more |
1213 | * memory controllers to single pci_dev (single pci_dev->dev |
1214 | * can be for multiple memory controllers). |
1215 | * |
1216 | * To make mci->pdev unique, assign pci_dev->dev to mci->pdev |
1217 | * for the first memory controller and assign a unique imc->dev |
1218 | * to mci->pdev for each non-first memory controller. |
1219 | */ |
1220 | mci->pdev = mc ? &imc->dev : &pdev->dev; |
1221 | imc->mc = mc; |
1222 | imc->pdev = pdev; |
1223 | imc->window = window; |
1224 | |
1225 | igen6_reg_dump(imc); |
1226 | |
1227 | rc = igen6_get_dimm_config(mci); |
1228 | if (rc) |
1229 | goto fail3; |
1230 | |
1231 | rc = edac_mc_add_mc(mci); |
1232 | if (rc) { |
1233 | igen6_printk(KERN_ERR, "Failed to register mci#%d\n" , mc); |
1234 | goto fail3; |
1235 | } |
1236 | |
1237 | imc->mci = mci; |
1238 | return 0; |
1239 | fail3: |
1240 | kfree(objp: mci->ctl_name); |
1241 | fail2: |
1242 | edac_mc_free(mci); |
1243 | fail: |
1244 | iounmap(addr: window); |
1245 | return rc; |
1246 | } |
1247 | |
1248 | static void igen6_unregister_mcis(void) |
1249 | { |
1250 | struct mem_ctl_info *mci; |
1251 | struct igen6_imc *imc; |
1252 | int i; |
1253 | |
1254 | edac_dbg(2, "\n" ); |
1255 | |
1256 | for (i = 0; i < res_cfg->num_imc; i++) { |
1257 | imc = &igen6_pvt->imc[i]; |
1258 | mci = imc->mci; |
1259 | if (!mci) |
1260 | continue; |
1261 | |
1262 | edac_mc_del_mc(dev: mci->pdev); |
1263 | kfree(objp: mci->ctl_name); |
1264 | edac_mc_free(mci); |
1265 | iounmap(addr: imc->window); |
1266 | } |
1267 | } |
1268 | |
1269 | static int igen6_mem_slice_setup(u64 mchbar) |
1270 | { |
1271 | struct igen6_imc *imc = &igen6_pvt->imc[0]; |
1272 | u64 base = mchbar + res_cfg->cmf_base; |
1273 | u32 offset = res_cfg->ms_hash_offset; |
1274 | u32 size = res_cfg->cmf_size; |
1275 | u64 ms_s_size, ms_hash; |
1276 | void __iomem *cmf; |
1277 | int ms_l_map; |
1278 | |
1279 | edac_dbg(2, "\n" ); |
1280 | |
1281 | if (imc[0].size < imc[1].size) { |
1282 | ms_s_size = imc[0].size; |
1283 | ms_l_map = 1; |
1284 | } else { |
1285 | ms_s_size = imc[1].size; |
1286 | ms_l_map = 0; |
1287 | } |
1288 | |
1289 | igen6_pvt->ms_s_size = ms_s_size; |
1290 | igen6_pvt->ms_l_map = ms_l_map; |
1291 | |
1292 | edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n" , |
1293 | ms_s_size >> 20, ms_l_map); |
1294 | |
1295 | if (!size) |
1296 | return 0; |
1297 | |
1298 | cmf = ioremap(offset: base, size); |
1299 | if (!cmf) { |
1300 | igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n" , base); |
1301 | return -ENODEV; |
1302 | } |
1303 | |
1304 | ms_hash = readq(addr: cmf + offset); |
1305 | igen6_pvt->ms_hash = ms_hash; |
1306 | |
1307 | edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n" , ms_hash); |
1308 | |
1309 | iounmap(addr: cmf); |
1310 | |
1311 | return 0; |
1312 | } |
1313 | |
1314 | static int register_err_handler(void) |
1315 | { |
1316 | int rc; |
1317 | |
1318 | if (res_cfg->machine_check) { |
1319 | mce_register_decode_chain(nb: &ecclog_mce_dec); |
1320 | return 0; |
1321 | } |
1322 | |
1323 | rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler, |
1324 | 0, IGEN6_NMI_NAME); |
1325 | if (rc) { |
1326 | igen6_printk(KERN_ERR, "Failed to register NMI handler\n" ); |
1327 | return rc; |
1328 | } |
1329 | |
1330 | return 0; |
1331 | } |
1332 | |
1333 | static void unregister_err_handler(void) |
1334 | { |
1335 | if (res_cfg->machine_check) { |
1336 | mce_unregister_decode_chain(nb: &ecclog_mce_dec); |
1337 | return; |
1338 | } |
1339 | |
1340 | unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); |
1341 | } |
1342 | |
1343 | static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) |
1344 | { |
1345 | u64 mchbar; |
1346 | int i, rc; |
1347 | |
1348 | edac_dbg(2, "\n" ); |
1349 | |
1350 | igen6_pvt = kzalloc(size: sizeof(*igen6_pvt), GFP_KERNEL); |
1351 | if (!igen6_pvt) |
1352 | return -ENOMEM; |
1353 | |
1354 | res_cfg = (struct res_config *)ent->driver_data; |
1355 | |
1356 | rc = igen6_pci_setup(pdev, mchbar: &mchbar); |
1357 | if (rc) |
1358 | goto fail; |
1359 | |
1360 | for (i = 0; i < res_cfg->num_imc; i++) { |
1361 | rc = igen6_register_mci(mc: i, mchbar, pdev); |
1362 | if (rc) |
1363 | goto fail2; |
1364 | } |
1365 | |
1366 | if (res_cfg->num_imc > 1) { |
1367 | rc = igen6_mem_slice_setup(mchbar); |
1368 | if (rc) |
1369 | goto fail2; |
1370 | } |
1371 | |
1372 | ecclog_pool = ecclog_gen_pool_create(); |
1373 | if (!ecclog_pool) { |
1374 | rc = -ENOMEM; |
1375 | goto fail2; |
1376 | } |
1377 | |
1378 | INIT_WORK(&ecclog_work, ecclog_work_cb); |
1379 | init_irq_work(work: &ecclog_irq_work, func: ecclog_irq_work_cb); |
1380 | |
1381 | rc = register_err_handler(); |
1382 | if (rc) |
1383 | goto fail3; |
1384 | |
1385 | /* Enable error reporting */ |
1386 | rc = errcmd_enable_error_reporting(enable: true); |
1387 | if (rc) { |
1388 | igen6_printk(KERN_ERR, "Failed to enable error reporting\n" ); |
1389 | goto fail4; |
1390 | } |
1391 | |
1392 | /* Check if any pending errors before/during the registration of the error handler */ |
1393 | ecclog_handler(); |
1394 | |
1395 | igen6_debug_setup(); |
1396 | return 0; |
1397 | fail4: |
1398 | unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME); |
1399 | fail3: |
1400 | gen_pool_destroy(ecclog_pool); |
1401 | fail2: |
1402 | igen6_unregister_mcis(); |
1403 | fail: |
1404 | kfree(objp: igen6_pvt); |
1405 | return rc; |
1406 | } |
1407 | |
1408 | static void igen6_remove(struct pci_dev *pdev) |
1409 | { |
1410 | edac_dbg(2, "\n" ); |
1411 | |
1412 | igen6_debug_teardown(); |
1413 | errcmd_enable_error_reporting(enable: false); |
1414 | unregister_err_handler(); |
1415 | irq_work_sync(work: &ecclog_irq_work); |
1416 | flush_work(work: &ecclog_work); |
1417 | gen_pool_destroy(ecclog_pool); |
1418 | igen6_unregister_mcis(); |
1419 | kfree(objp: igen6_pvt); |
1420 | } |
1421 | |
1422 | static struct pci_driver igen6_driver = { |
1423 | .name = EDAC_MOD_STR, |
1424 | .probe = igen6_probe, |
1425 | .remove = igen6_remove, |
1426 | .id_table = igen6_pci_tbl, |
1427 | }; |
1428 | |
1429 | static int __init igen6_init(void) |
1430 | { |
1431 | const char *owner; |
1432 | int rc; |
1433 | |
1434 | edac_dbg(2, "\n" ); |
1435 | |
1436 | if (ghes_get_devices()) |
1437 | return -EBUSY; |
1438 | |
1439 | owner = edac_get_owner(); |
1440 | if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) |
1441 | return -EBUSY; |
1442 | |
1443 | edac_op_state = EDAC_OPSTATE_NMI; |
1444 | |
1445 | rc = pci_register_driver(&igen6_driver); |
1446 | if (rc) |
1447 | return rc; |
1448 | |
1449 | igen6_printk(KERN_INFO, "%s\n" , IGEN6_REVISION); |
1450 | |
1451 | return 0; |
1452 | } |
1453 | |
1454 | static void __exit igen6_exit(void) |
1455 | { |
1456 | edac_dbg(2, "\n" ); |
1457 | |
1458 | pci_unregister_driver(dev: &igen6_driver); |
1459 | } |
1460 | |
1461 | module_init(igen6_init); |
1462 | module_exit(igen6_exit); |
1463 | |
1464 | MODULE_LICENSE("GPL v2" ); |
1465 | MODULE_AUTHOR("Qiuxu Zhuo" ); |
1466 | MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC" ); |
1467 | |