1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Driver for Intel(R) 10nm server memory controller. |
4 | * Copyright (c) 2019, Intel Corporation. |
5 | * |
6 | */ |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/io.h> |
10 | #include <asm/cpu_device_id.h> |
11 | #include <asm/intel-family.h> |
12 | #include <asm/mce.h> |
13 | #include "edac_module.h" |
14 | #include "skx_common.h" |
15 | |
16 | #define I10NM_REVISION "v0.0.6" |
17 | #define EDAC_MOD_STR "i10nm_edac" |
18 | |
19 | /* Debug macros */ |
20 | #define i10nm_printk(level, fmt, arg...) \ |
21 | edac_printk(level, "i10nm", fmt, ##arg) |
22 | |
23 | #define I10NM_GET_SCK_BAR(d, reg) \ |
24 | pci_read_config_dword((d)->uracu, 0xd0, &(reg)) |
25 | #define I10NM_GET_IMC_BAR(d, i, reg) \ |
26 | pci_read_config_dword((d)->uracu, \ |
27 | (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg)) |
28 | #define I10NM_GET_SAD(d, offset, i, reg)\ |
29 | pci_read_config_dword((d)->sad_all, (offset) + (i) * \ |
30 | (res_cfg->type == GNR ? 12 : 8), &(reg)) |
31 | #define I10NM_GET_HBM_IMC_BAR(d, reg) \ |
32 | pci_read_config_dword((d)->uracu, 0xd4, &(reg)) |
33 | #define I10NM_GET_CAPID3_CFG(d, reg) \ |
34 | pci_read_config_dword((d)->pcu_cr3, \ |
35 | res_cfg->type == GNR ? 0x290 : 0x90, &(reg)) |
36 | #define I10NM_GET_CAPID5_CFG(d, reg) \ |
37 | pci_read_config_dword((d)->pcu_cr3, \ |
38 | res_cfg->type == GNR ? 0x298 : 0x98, &(reg)) |
39 | #define I10NM_GET_DIMMMTR(m, i, j) \ |
40 | readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \ |
41 | (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \ |
42 | (i) * (m)->chan_mmio_sz + (j) * 4) |
43 | #define I10NM_GET_MCDDRTCFG(m, i) \ |
44 | readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \ |
45 | (i) * (m)->chan_mmio_sz) |
46 | #define I10NM_GET_MCMTR(m, i) \ |
47 | readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ |
48 | (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ |
49 | (i) * (m)->chan_mmio_sz) |
50 | #define I10NM_GET_AMAP(m, i) \ |
51 | readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \ |
52 | (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \ |
53 | (i) * (m)->chan_mmio_sz) |
54 | #define I10NM_GET_REG32(m, i, offset) \ |
55 | readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) |
56 | #define I10NM_GET_REG64(m, i, offset) \ |
57 | readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) |
58 | #define I10NM_SET_REG32(m, i, offset, v) \ |
59 | writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) |
60 | |
61 | #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) |
62 | #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) |
63 | #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ |
64 | GET_BITFIELD(reg, 0, 10) + 1) << 12) |
65 | #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \ |
66 | ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) |
67 | |
68 | #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 |
69 | #define I10NM_GNR_IMC_MMIO_SIZE 0x4000 |
70 | #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 |
71 | #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) |
72 | #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30) |
73 | #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29) |
74 | |
75 | #define I10NM_MAX_SAD 16 |
76 | #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) |
77 | #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) |
78 | |
79 | #define RETRY_RD_ERR_LOG_UC BIT(1) |
80 | #define RETRY_RD_ERR_LOG_NOOVER BIT(14) |
81 | #define RETRY_RD_ERR_LOG_EN BIT(15) |
82 | #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) |
83 | #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) |
84 | |
85 | static struct list_head *i10nm_edac_list; |
86 | |
87 | static struct res_config *res_cfg; |
88 | static int retry_rd_err_log; |
89 | static int decoding_via_mca; |
90 | static bool mem_cfg_2lm; |
91 | |
92 | static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; |
93 | static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; |
94 | static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; |
95 | static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; |
96 | static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; |
97 | static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; |
98 | static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; |
99 | static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; |
100 | static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; |
101 | |
102 | static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, |
103 | u32 *offsets_scrub, u32 *offsets_demand, |
104 | u32 *offsets_demand2) |
105 | { |
106 | u32 s, d, d2; |
107 | |
108 | s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); |
109 | d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); |
110 | if (offsets_demand2) |
111 | d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); |
112 | |
113 | if (enable) { |
114 | /* Save default configurations */ |
115 | imc->chan[chan].retry_rd_err_log_s = s; |
116 | imc->chan[chan].retry_rd_err_log_d = d; |
117 | if (offsets_demand2) |
118 | imc->chan[chan].retry_rd_err_log_d2 = d2; |
119 | |
120 | s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; |
121 | s |= RETRY_RD_ERR_LOG_EN; |
122 | d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; |
123 | d |= RETRY_RD_ERR_LOG_EN; |
124 | |
125 | if (offsets_demand2) { |
126 | d2 &= ~RETRY_RD_ERR_LOG_UC; |
127 | d2 |= RETRY_RD_ERR_LOG_NOOVER; |
128 | d2 |= RETRY_RD_ERR_LOG_EN; |
129 | } |
130 | } else { |
131 | /* Restore default configurations */ |
132 | if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) |
133 | s |= RETRY_RD_ERR_LOG_UC; |
134 | if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) |
135 | s |= RETRY_RD_ERR_LOG_NOOVER; |
136 | if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) |
137 | s &= ~RETRY_RD_ERR_LOG_EN; |
138 | if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) |
139 | d |= RETRY_RD_ERR_LOG_UC; |
140 | if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) |
141 | d |= RETRY_RD_ERR_LOG_NOOVER; |
142 | if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) |
143 | d &= ~RETRY_RD_ERR_LOG_EN; |
144 | |
145 | if (offsets_demand2) { |
146 | if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) |
147 | d2 |= RETRY_RD_ERR_LOG_UC; |
148 | if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) |
149 | d2 &= ~RETRY_RD_ERR_LOG_NOOVER; |
150 | if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) |
151 | d2 &= ~RETRY_RD_ERR_LOG_EN; |
152 | } |
153 | } |
154 | |
155 | I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); |
156 | I10NM_SET_REG32(imc, chan, offsets_demand[0], d); |
157 | if (offsets_demand2) |
158 | I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); |
159 | } |
160 | |
161 | static void enable_retry_rd_err_log(bool enable) |
162 | { |
163 | int i, j, imc_num, chan_num; |
164 | struct skx_imc *imc; |
165 | struct skx_dev *d; |
166 | |
167 | edac_dbg(2, "\n" ); |
168 | |
169 | list_for_each_entry(d, i10nm_edac_list, list) { |
170 | imc_num = res_cfg->ddr_imc_num; |
171 | chan_num = res_cfg->ddr_chan_num; |
172 | |
173 | for (i = 0; i < imc_num; i++) { |
174 | imc = &d->imc[i]; |
175 | if (!imc->mbase) |
176 | continue; |
177 | |
178 | for (j = 0; j < chan_num; j++) |
179 | __enable_retry_rd_err_log(imc, chan: j, enable, |
180 | offsets_scrub: res_cfg->offsets_scrub, |
181 | offsets_demand: res_cfg->offsets_demand, |
182 | offsets_demand2: res_cfg->offsets_demand2); |
183 | } |
184 | |
185 | imc_num += res_cfg->hbm_imc_num; |
186 | chan_num = res_cfg->hbm_chan_num; |
187 | |
188 | for (; i < imc_num; i++) { |
189 | imc = &d->imc[i]; |
190 | if (!imc->mbase || !imc->hbm_mc) |
191 | continue; |
192 | |
193 | for (j = 0; j < chan_num; j++) { |
194 | __enable_retry_rd_err_log(imc, chan: j, enable, |
195 | offsets_scrub: res_cfg->offsets_scrub_hbm0, |
196 | offsets_demand: res_cfg->offsets_demand_hbm0, |
197 | NULL); |
198 | __enable_retry_rd_err_log(imc, chan: j, enable, |
199 | offsets_scrub: res_cfg->offsets_scrub_hbm1, |
200 | offsets_demand: res_cfg->offsets_demand_hbm1, |
201 | NULL); |
202 | } |
203 | } |
204 | } |
205 | } |
206 | |
207 | static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, |
208 | int len, bool scrub_err) |
209 | { |
210 | struct skx_imc *imc = &res->dev->imc[res->imc]; |
211 | u32 log0, log1, log2, log3, log4; |
212 | u32 corr0, corr1, corr2, corr3; |
213 | u32 lxg0, lxg1, lxg3, lxg4; |
214 | u32 *xffsets = NULL; |
215 | u64 log2a, log5; |
216 | u64 lxg2a, lxg5; |
217 | u32 *offsets; |
218 | int n, pch; |
219 | |
220 | if (!imc->mbase) |
221 | return; |
222 | |
223 | if (imc->hbm_mc) { |
224 | pch = res->cs & 1; |
225 | |
226 | if (pch) |
227 | offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : |
228 | res_cfg->offsets_demand_hbm1; |
229 | else |
230 | offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : |
231 | res_cfg->offsets_demand_hbm0; |
232 | } else { |
233 | if (scrub_err) { |
234 | offsets = res_cfg->offsets_scrub; |
235 | } else { |
236 | offsets = res_cfg->offsets_demand; |
237 | xffsets = res_cfg->offsets_demand2; |
238 | } |
239 | } |
240 | |
241 | log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); |
242 | log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); |
243 | log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); |
244 | log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); |
245 | log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); |
246 | |
247 | if (xffsets) { |
248 | lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); |
249 | lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); |
250 | lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); |
251 | lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); |
252 | lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); |
253 | } |
254 | |
255 | if (res_cfg->type == SPR) { |
256 | log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); |
257 | n = snprintf(buf: msg, size: len, fmt: " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx" , |
258 | log0, log1, log2a, log3, log4, log5); |
259 | |
260 | if (len - n > 0) { |
261 | if (xffsets) { |
262 | lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); |
263 | n += snprintf(buf: msg + n, size: len - n, fmt: " %.8x %.8x %.16llx %.8x %.8x %.16llx]" , |
264 | lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); |
265 | } else { |
266 | n += snprintf(buf: msg + n, size: len - n, fmt: "]" ); |
267 | } |
268 | } |
269 | } else { |
270 | log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); |
271 | n = snprintf(buf: msg, size: len, fmt: " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]" , |
272 | log0, log1, log2, log3, log4, log5); |
273 | } |
274 | |
275 | if (imc->hbm_mc) { |
276 | if (pch) { |
277 | corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); |
278 | corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); |
279 | corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); |
280 | corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); |
281 | } else { |
282 | corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); |
283 | corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); |
284 | corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); |
285 | corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); |
286 | } |
287 | } else { |
288 | corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); |
289 | corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); |
290 | corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); |
291 | corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); |
292 | } |
293 | |
294 | if (len - n > 0) |
295 | snprintf(buf: msg + n, size: len - n, |
296 | fmt: " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]" , |
297 | corr0 & 0xffff, corr0 >> 16, |
298 | corr1 & 0xffff, corr1 >> 16, |
299 | corr2 & 0xffff, corr2 >> 16, |
300 | corr3 & 0xffff, corr3 >> 16); |
301 | |
302 | /* Clear status bits */ |
303 | if (retry_rd_err_log == 2) { |
304 | if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { |
305 | log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; |
306 | I10NM_SET_REG32(imc, res->channel, offsets[0], log0); |
307 | } |
308 | |
309 | if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { |
310 | lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; |
311 | I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); |
312 | } |
313 | } |
314 | } |
315 | |
316 | static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, |
317 | unsigned int dev, unsigned int fun) |
318 | { |
319 | struct pci_dev *pdev; |
320 | |
321 | pdev = pci_get_domain_bus_and_slot(domain: dom, bus, PCI_DEVFN(dev, fun)); |
322 | if (!pdev) { |
323 | edac_dbg(2, "No device %02x:%02x.%x\n" , |
324 | bus, dev, fun); |
325 | return NULL; |
326 | } |
327 | |
328 | if (unlikely(pci_enable_device(pdev) < 0)) { |
329 | edac_dbg(2, "Failed to enable device %02x:%02x.%x\n" , |
330 | bus, dev, fun); |
331 | pci_dev_put(dev: pdev); |
332 | return NULL; |
333 | } |
334 | |
335 | return pdev; |
336 | } |
337 | |
338 | /** |
339 | * i10nm_get_imc_num() - Get the number of present DDR memory controllers. |
340 | * |
341 | * @cfg : The pointer to the structure of EDAC resource configurations. |
342 | * |
343 | * For Granite Rapids CPUs, the number of present DDR memory controllers read |
344 | * at runtime overwrites the value statically configured in @cfg->ddr_imc_num. |
345 | * For other CPUs, the number of present DDR memory controllers is statically |
346 | * configured in @cfg->ddr_imc_num. |
347 | * |
348 | * RETURNS : 0 on success, < 0 on failure. |
349 | */ |
350 | static int i10nm_get_imc_num(struct res_config *cfg) |
351 | { |
352 | int n, imc_num, chan_num = 0; |
353 | struct skx_dev *d; |
354 | u32 reg; |
355 | |
356 | list_for_each_entry(d, i10nm_edac_list, list) { |
357 | d->pcu_cr3 = pci_get_dev_wrapper(dom: d->seg, bus: d->bus[res_cfg->pcu_cr3_bdf.bus], |
358 | dev: res_cfg->pcu_cr3_bdf.dev, |
359 | fun: res_cfg->pcu_cr3_bdf.fun); |
360 | if (!d->pcu_cr3) |
361 | continue; |
362 | |
363 | if (I10NM_GET_CAPID5_CFG(d, reg)) |
364 | continue; |
365 | |
366 | n = I10NM_DDR_IMC_CH_CNT(reg); |
367 | |
368 | if (!chan_num) { |
369 | chan_num = n; |
370 | edac_dbg(2, "Get DDR CH number: %d\n" , chan_num); |
371 | } else if (chan_num != n) { |
372 | i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n" , chan_num, n); |
373 | } |
374 | } |
375 | |
376 | switch (cfg->type) { |
377 | case GNR: |
378 | /* |
379 | * One channel per DDR memory controller for Granite Rapids CPUs. |
380 | */ |
381 | imc_num = chan_num; |
382 | |
383 | if (!imc_num) { |
384 | i10nm_printk(KERN_ERR, "Invalid DDR MC number\n" ); |
385 | return -ENODEV; |
386 | } |
387 | |
388 | if (imc_num > I10NM_NUM_DDR_IMC) { |
389 | i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n" , imc_num); |
390 | return -EINVAL; |
391 | } |
392 | |
393 | if (cfg->ddr_imc_num != imc_num) { |
394 | /* |
395 | * Store the number of present DDR memory controllers. |
396 | */ |
397 | cfg->ddr_imc_num = imc_num; |
398 | edac_dbg(2, "Set DDR MC number: %d" , imc_num); |
399 | } |
400 | |
401 | return 0; |
402 | default: |
403 | /* |
404 | * For other CPUs, the number of present DDR memory controllers |
405 | * is statically pre-configured in cfg->ddr_imc_num. |
406 | */ |
407 | return 0; |
408 | } |
409 | } |
410 | |
411 | static bool i10nm_check_2lm(struct res_config *cfg) |
412 | { |
413 | struct skx_dev *d; |
414 | u32 reg; |
415 | int i; |
416 | |
417 | list_for_each_entry(d, i10nm_edac_list, list) { |
418 | d->sad_all = pci_get_dev_wrapper(dom: d->seg, bus: d->bus[res_cfg->sad_all_bdf.bus], |
419 | dev: res_cfg->sad_all_bdf.dev, |
420 | fun: res_cfg->sad_all_bdf.fun); |
421 | if (!d->sad_all) |
422 | continue; |
423 | |
424 | for (i = 0; i < I10NM_MAX_SAD; i++) { |
425 | I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg); |
426 | if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) { |
427 | edac_dbg(2, "2-level memory configuration.\n" ); |
428 | return true; |
429 | } |
430 | } |
431 | } |
432 | |
433 | return false; |
434 | } |
435 | |
436 | /* |
437 | * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. |
438 | * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS. |
439 | */ |
440 | static bool i10nm_mscod_is_ddrt(u32 mscod) |
441 | { |
442 | switch (res_cfg->type) { |
443 | case I10NM: |
444 | switch (mscod) { |
445 | case 0x0106: case 0x0107: |
446 | case 0x0800: case 0x0804: |
447 | case 0x0806 ... 0x0808: |
448 | case 0x080a ... 0x080e: |
449 | case 0x0810: case 0x0811: |
450 | case 0x0816: case 0x081e: |
451 | case 0x081f: |
452 | return true; |
453 | } |
454 | |
455 | break; |
456 | case SPR: |
457 | switch (mscod) { |
458 | case 0x0800: case 0x0804: |
459 | case 0x0806 ... 0x0808: |
460 | case 0x080a ... 0x080e: |
461 | case 0x0810: case 0x0811: |
462 | case 0x0816: case 0x081e: |
463 | case 0x081f: |
464 | return true; |
465 | } |
466 | |
467 | break; |
468 | default: |
469 | return false; |
470 | } |
471 | |
472 | return false; |
473 | } |
474 | |
475 | static bool i10nm_mc_decode_available(struct mce *mce) |
476 | { |
477 | #define ICX_IMCx_CHy 0x06666000 |
478 | u8 bank; |
479 | |
480 | if (!decoding_via_mca || mem_cfg_2lm) |
481 | return false; |
482 | |
483 | if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) |
484 | != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) |
485 | return false; |
486 | |
487 | bank = mce->bank; |
488 | |
489 | switch (res_cfg->type) { |
490 | case I10NM: |
491 | /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ |
492 | if (!(ICX_IMCx_CHy & (1 << bank))) |
493 | return false; |
494 | break; |
495 | case SPR: |
496 | if (bank < 13 || bank > 20) |
497 | return false; |
498 | break; |
499 | default: |
500 | return false; |
501 | } |
502 | |
503 | /* DDRT errors can't be decoded from MCA bank registers */ |
504 | if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) |
505 | return false; |
506 | |
507 | if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) |
508 | return false; |
509 | |
510 | return true; |
511 | } |
512 | |
513 | static bool i10nm_mc_decode(struct decoded_addr *res) |
514 | { |
515 | struct mce *m = res->mce; |
516 | struct skx_dev *d; |
517 | u8 bank; |
518 | |
519 | if (!i10nm_mc_decode_available(mce: m)) |
520 | return false; |
521 | |
522 | list_for_each_entry(d, i10nm_edac_list, list) { |
523 | if (d->imc[0].src_id == m->socketid) { |
524 | res->socket = m->socketid; |
525 | res->dev = d; |
526 | break; |
527 | } |
528 | } |
529 | |
530 | switch (res_cfg->type) { |
531 | case I10NM: |
532 | bank = m->bank - 13; |
533 | res->imc = bank / 4; |
534 | res->channel = bank % 2; |
535 | res->column = GET_BITFIELD(m->misc, 9, 18) << 2; |
536 | res->row = GET_BITFIELD(m->misc, 19, 39); |
537 | res->bank_group = GET_BITFIELD(m->misc, 40, 41); |
538 | res->bank_address = GET_BITFIELD(m->misc, 42, 43); |
539 | res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; |
540 | res->rank = GET_BITFIELD(m->misc, 56, 58); |
541 | res->dimm = res->rank >> 2; |
542 | res->rank = res->rank % 4; |
543 | break; |
544 | case SPR: |
545 | bank = m->bank - 13; |
546 | res->imc = bank / 2; |
547 | res->channel = bank % 2; |
548 | res->column = GET_BITFIELD(m->misc, 9, 18) << 2; |
549 | res->row = GET_BITFIELD(m->misc, 19, 36); |
550 | res->bank_group = GET_BITFIELD(m->misc, 37, 38); |
551 | res->bank_address = GET_BITFIELD(m->misc, 39, 40); |
552 | res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2; |
553 | res->rank = GET_BITFIELD(m->misc, 57, 57); |
554 | res->dimm = GET_BITFIELD(m->misc, 58, 58); |
555 | break; |
556 | default: |
557 | return false; |
558 | } |
559 | |
560 | if (!res->dev) { |
561 | skx_printk(KERN_ERR, "No device for src_id %d imc %d\n" , |
562 | m->socketid, res->imc); |
563 | return false; |
564 | } |
565 | |
566 | return true; |
567 | } |
568 | |
569 | /** |
570 | * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller. |
571 | * |
572 | * @d : The pointer to the structure of CPU socket EDAC device. |
573 | * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1). |
574 | * @physical_idx : To store the corresponding physical index of @logical_idx. |
575 | * |
576 | * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure. |
577 | */ |
578 | static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx) |
579 | { |
580 | #define GNR_MAX_IMC_PCI_CNT 28 |
581 | |
582 | struct pci_dev *mdev; |
583 | int i, logical = 0; |
584 | |
585 | /* |
586 | * Detect present memory controllers from { PCI device: 8-5, function 7-1 } |
587 | */ |
588 | for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) { |
589 | mdev = pci_get_dev_wrapper(dom: d->seg, |
590 | bus: d->bus[res_cfg->ddr_mdev_bdf.bus], |
591 | dev: res_cfg->ddr_mdev_bdf.dev + i / 7, |
592 | fun: res_cfg->ddr_mdev_bdf.fun + i % 7); |
593 | |
594 | if (mdev) { |
595 | if (logical == logical_idx) { |
596 | *physical_idx = i; |
597 | return mdev; |
598 | } |
599 | |
600 | pci_dev_put(dev: mdev); |
601 | logical++; |
602 | } |
603 | } |
604 | |
605 | return NULL; |
606 | } |
607 | |
608 | /** |
609 | * get_ddr_munit() - Get the resource of the i-th DDR memory controller. |
610 | * |
611 | * @d : The pointer to the structure of CPU socket EDAC device. |
612 | * @i : The index of the CPU socket relative DDR memory controller. |
613 | * @offset : To store the MMIO offset of the i-th DDR memory controller. |
614 | * @size : To store the MMIO size of the i-th DDR memory controller. |
615 | * |
616 | * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure. |
617 | */ |
618 | static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size) |
619 | { |
620 | struct pci_dev *mdev; |
621 | int physical_idx; |
622 | u32 reg; |
623 | |
624 | switch (res_cfg->type) { |
625 | case GNR: |
626 | if (I10NM_GET_IMC_BAR(d, 0, reg)) { |
627 | i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n" ); |
628 | return NULL; |
629 | } |
630 | |
631 | mdev = get_gnr_mdev(d, logical_idx: i, physical_idx: &physical_idx); |
632 | if (!mdev) |
633 | return NULL; |
634 | |
635 | *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + |
636 | I10NM_GNR_IMC_MMIO_OFFSET + |
637 | physical_idx * I10NM_GNR_IMC_MMIO_SIZE; |
638 | *size = I10NM_GNR_IMC_MMIO_SIZE; |
639 | |
640 | break; |
641 | default: |
642 | if (I10NM_GET_IMC_BAR(d, i, reg)) { |
643 | i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n" , i); |
644 | return NULL; |
645 | } |
646 | |
647 | mdev = pci_get_dev_wrapper(dom: d->seg, |
648 | bus: d->bus[res_cfg->ddr_mdev_bdf.bus], |
649 | dev: res_cfg->ddr_mdev_bdf.dev + i, |
650 | fun: res_cfg->ddr_mdev_bdf.fun); |
651 | if (!mdev) |
652 | return NULL; |
653 | |
654 | *offset = I10NM_GET_IMC_MMIO_OFFSET(reg); |
655 | *size = I10NM_GET_IMC_MMIO_SIZE(reg); |
656 | } |
657 | |
658 | return mdev; |
659 | } |
660 | |
661 | /** |
662 | * i10nm_imc_absent() - Check whether the memory controller @imc is absent |
663 | * |
664 | * @imc : The pointer to the structure of memory controller EDAC device. |
665 | * |
666 | * RETURNS : true if the memory controller EDAC device is absent, false otherwise. |
667 | */ |
668 | static bool i10nm_imc_absent(struct skx_imc *imc) |
669 | { |
670 | u32 mcmtr; |
671 | int i; |
672 | |
673 | switch (res_cfg->type) { |
674 | case SPR: |
675 | for (i = 0; i < res_cfg->ddr_chan_num; i++) { |
676 | mcmtr = I10NM_GET_MCMTR(imc, i); |
677 | edac_dbg(1, "ch%d mcmtr reg %x\n" , i, mcmtr); |
678 | if (mcmtr != ~0) |
679 | return false; |
680 | } |
681 | |
682 | /* |
683 | * Some workstations' absent memory controllers still |
684 | * appear as PCIe devices, misleading the EDAC driver. |
685 | * By observing that the MMIO registers of these absent |
686 | * memory controllers consistently hold the value of ~0. |
687 | * |
688 | * We identify a memory controller as absent by checking |
689 | * if its MMIO register "mcmtr" == ~0 in all its channels. |
690 | */ |
691 | return true; |
692 | default: |
693 | return false; |
694 | } |
695 | } |
696 | |
697 | static int i10nm_get_ddr_munits(void) |
698 | { |
699 | struct pci_dev *mdev; |
700 | void __iomem *mbase; |
701 | unsigned long size; |
702 | struct skx_dev *d; |
703 | int i, lmc, j = 0; |
704 | u32 reg, off; |
705 | u64 base; |
706 | |
707 | list_for_each_entry(d, i10nm_edac_list, list) { |
708 | d->util_all = pci_get_dev_wrapper(dom: d->seg, bus: d->bus[res_cfg->util_all_bdf.bus], |
709 | dev: res_cfg->util_all_bdf.dev, |
710 | fun: res_cfg->util_all_bdf.fun); |
711 | if (!d->util_all) |
712 | return -ENODEV; |
713 | |
714 | d->uracu = pci_get_dev_wrapper(dom: d->seg, bus: d->bus[res_cfg->uracu_bdf.bus], |
715 | dev: res_cfg->uracu_bdf.dev, |
716 | fun: res_cfg->uracu_bdf.fun); |
717 | if (!d->uracu) |
718 | return -ENODEV; |
719 | |
720 | if (I10NM_GET_SCK_BAR(d, reg)) { |
721 | i10nm_printk(KERN_ERR, "Failed to socket bar\n" ); |
722 | return -ENODEV; |
723 | } |
724 | |
725 | base = I10NM_GET_SCK_MMIO_BASE(reg); |
726 | edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n" , |
727 | j++, base, reg); |
728 | |
729 | for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) { |
730 | mdev = get_ddr_munit(d, i, offset: &off, size: &size); |
731 | |
732 | if (i == 0 && !mdev) { |
733 | i10nm_printk(KERN_ERR, "No IMC found\n" ); |
734 | return -ENODEV; |
735 | } |
736 | if (!mdev) |
737 | continue; |
738 | |
739 | edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n" , |
740 | i, base + off, size, reg); |
741 | |
742 | mbase = ioremap(offset: base + off, size); |
743 | if (!mbase) { |
744 | i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n" , |
745 | base + off); |
746 | return -ENODEV; |
747 | } |
748 | |
749 | d->imc[lmc].mbase = mbase; |
750 | if (i10nm_imc_absent(imc: &d->imc[lmc])) { |
751 | pci_dev_put(dev: mdev); |
752 | iounmap(addr: mbase); |
753 | d->imc[lmc].mbase = NULL; |
754 | edac_dbg(2, "Skip absent mc%d\n" , i); |
755 | continue; |
756 | } else { |
757 | d->imc[lmc].mdev = mdev; |
758 | lmc++; |
759 | } |
760 | } |
761 | } |
762 | |
763 | return 0; |
764 | } |
765 | |
766 | static bool i10nm_check_hbm_imc(struct skx_dev *d) |
767 | { |
768 | u32 reg; |
769 | |
770 | if (I10NM_GET_CAPID3_CFG(d, reg)) { |
771 | i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n" ); |
772 | return false; |
773 | } |
774 | |
775 | return I10NM_IS_HBM_PRESENT(reg) != 0; |
776 | } |
777 | |
778 | static int i10nm_get_hbm_munits(void) |
779 | { |
780 | struct pci_dev *mdev; |
781 | void __iomem *mbase; |
782 | u32 reg, off, mcmtr; |
783 | struct skx_dev *d; |
784 | int i, lmc; |
785 | u64 base; |
786 | |
787 | list_for_each_entry(d, i10nm_edac_list, list) { |
788 | if (!d->pcu_cr3) |
789 | return -ENODEV; |
790 | |
791 | if (!i10nm_check_hbm_imc(d)) { |
792 | i10nm_printk(KERN_DEBUG, "No hbm memory\n" ); |
793 | return -ENODEV; |
794 | } |
795 | |
796 | if (I10NM_GET_SCK_BAR(d, reg)) { |
797 | i10nm_printk(KERN_ERR, "Failed to get socket bar\n" ); |
798 | return -ENODEV; |
799 | } |
800 | base = I10NM_GET_SCK_MMIO_BASE(reg); |
801 | |
802 | if (I10NM_GET_HBM_IMC_BAR(d, reg)) { |
803 | i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n" ); |
804 | return -ENODEV; |
805 | } |
806 | base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg); |
807 | |
808 | lmc = res_cfg->ddr_imc_num; |
809 | |
810 | for (i = 0; i < res_cfg->hbm_imc_num; i++) { |
811 | mdev = pci_get_dev_wrapper(dom: d->seg, bus: d->bus[res_cfg->hbm_mdev_bdf.bus], |
812 | dev: res_cfg->hbm_mdev_bdf.dev + i / 4, |
813 | fun: res_cfg->hbm_mdev_bdf.fun + i % 4); |
814 | |
815 | if (i == 0 && !mdev) { |
816 | i10nm_printk(KERN_ERR, "No hbm mc found\n" ); |
817 | return -ENODEV; |
818 | } |
819 | if (!mdev) |
820 | continue; |
821 | |
822 | d->imc[lmc].mdev = mdev; |
823 | off = i * I10NM_HBM_IMC_MMIO_SIZE; |
824 | |
825 | edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n" , |
826 | lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE); |
827 | |
828 | mbase = ioremap(offset: base + off, I10NM_HBM_IMC_MMIO_SIZE); |
829 | if (!mbase) { |
830 | pci_dev_put(dev: d->imc[lmc].mdev); |
831 | d->imc[lmc].mdev = NULL; |
832 | |
833 | i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n" , |
834 | base + off); |
835 | return -ENOMEM; |
836 | } |
837 | |
838 | d->imc[lmc].mbase = mbase; |
839 | d->imc[lmc].hbm_mc = true; |
840 | |
841 | mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); |
842 | if (!I10NM_IS_HBM_IMC(mcmtr)) { |
843 | iounmap(addr: d->imc[lmc].mbase); |
844 | d->imc[lmc].mbase = NULL; |
845 | d->imc[lmc].hbm_mc = false; |
846 | pci_dev_put(dev: d->imc[lmc].mdev); |
847 | d->imc[lmc].mdev = NULL; |
848 | |
849 | i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n" ); |
850 | return -ENODEV; |
851 | } |
852 | |
853 | lmc++; |
854 | } |
855 | } |
856 | |
857 | return 0; |
858 | } |
859 | |
860 | static struct res_config i10nm_cfg0 = { |
861 | .type = I10NM, |
862 | .decs_did = 0x3452, |
863 | .busno_cfg_offset = 0xcc, |
864 | .ddr_imc_num = 4, |
865 | .ddr_chan_num = 2, |
866 | .ddr_dimm_num = 2, |
867 | .ddr_chan_mmio_sz = 0x4000, |
868 | .sad_all_bdf = {1, 29, 0}, |
869 | .pcu_cr3_bdf = {1, 30, 3}, |
870 | .util_all_bdf = {1, 29, 1}, |
871 | .uracu_bdf = {0, 0, 1}, |
872 | .ddr_mdev_bdf = {0, 12, 0}, |
873 | .hbm_mdev_bdf = {0, 12, 1}, |
874 | .sad_all_offset = 0x108, |
875 | .offsets_scrub = offsets_scrub_icx, |
876 | .offsets_demand = offsets_demand_icx, |
877 | }; |
878 | |
879 | static struct res_config i10nm_cfg1 = { |
880 | .type = I10NM, |
881 | .decs_did = 0x3452, |
882 | .busno_cfg_offset = 0xd0, |
883 | .ddr_imc_num = 4, |
884 | .ddr_chan_num = 2, |
885 | .ddr_dimm_num = 2, |
886 | .ddr_chan_mmio_sz = 0x4000, |
887 | .sad_all_bdf = {1, 29, 0}, |
888 | .pcu_cr3_bdf = {1, 30, 3}, |
889 | .util_all_bdf = {1, 29, 1}, |
890 | .uracu_bdf = {0, 0, 1}, |
891 | .ddr_mdev_bdf = {0, 12, 0}, |
892 | .hbm_mdev_bdf = {0, 12, 1}, |
893 | .sad_all_offset = 0x108, |
894 | .offsets_scrub = offsets_scrub_icx, |
895 | .offsets_demand = offsets_demand_icx, |
896 | }; |
897 | |
898 | static struct res_config spr_cfg = { |
899 | .type = SPR, |
900 | .decs_did = 0x3252, |
901 | .busno_cfg_offset = 0xd0, |
902 | .ddr_imc_num = 4, |
903 | .ddr_chan_num = 2, |
904 | .ddr_dimm_num = 2, |
905 | .hbm_imc_num = 16, |
906 | .hbm_chan_num = 2, |
907 | .hbm_dimm_num = 1, |
908 | .ddr_chan_mmio_sz = 0x8000, |
909 | .hbm_chan_mmio_sz = 0x4000, |
910 | .support_ddr5 = true, |
911 | .sad_all_bdf = {.bus: 1, .dev: 10, .fun: 0}, |
912 | .pcu_cr3_bdf = {1, 30, 3}, |
913 | .util_all_bdf = {1, 29, 1}, |
914 | .uracu_bdf = {0, 0, 1}, |
915 | .ddr_mdev_bdf = {0, 12, 0}, |
916 | .hbm_mdev_bdf = {0, 12, 1}, |
917 | .sad_all_offset = 0x300, |
918 | .offsets_scrub = offsets_scrub_spr, |
919 | .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, |
920 | .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, |
921 | .offsets_demand = offsets_demand_spr, |
922 | .offsets_demand2 = offsets_demand2_spr, |
923 | .offsets_demand_hbm0 = offsets_demand_spr_hbm0, |
924 | .offsets_demand_hbm1 = offsets_demand_spr_hbm1, |
925 | }; |
926 | |
927 | static struct res_config gnr_cfg = { |
928 | .type = GNR, |
929 | .decs_did = 0x3252, |
930 | .busno_cfg_offset = 0xd0, |
931 | .ddr_imc_num = 12, |
932 | .ddr_chan_num = 1, |
933 | .ddr_dimm_num = 2, |
934 | .ddr_chan_mmio_sz = 0x4000, |
935 | .support_ddr5 = true, |
936 | .sad_all_bdf = {.bus: 0, .dev: 13, .fun: 0}, |
937 | .pcu_cr3_bdf = {0, 5, 0}, |
938 | .util_all_bdf = {0, 13, 1}, |
939 | .uracu_bdf = {0, 0, 1}, |
940 | .ddr_mdev_bdf = {0, 5, 1}, |
941 | .sad_all_offset = 0x300, |
942 | }; |
943 | |
944 | static const struct x86_cpu_id i10nm_cpuids[] = { |
945 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), |
946 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), |
947 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), |
948 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), |
949 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1), |
950 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), |
951 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), |
952 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), |
953 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), |
954 | X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), |
955 | {} |
956 | }; |
957 | MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); |
958 | |
959 | static bool i10nm_check_ecc(struct skx_imc *imc, int chan) |
960 | { |
961 | u32 mcmtr; |
962 | |
963 | mcmtr = I10NM_GET_MCMTR(imc, chan); |
964 | edac_dbg(1, "ch%d mcmtr reg %x\n" , chan, mcmtr); |
965 | |
966 | return !!GET_BITFIELD(mcmtr, 2, 2); |
967 | } |
968 | |
969 | static int i10nm_get_dimm_config(struct mem_ctl_info *mci, |
970 | struct res_config *cfg) |
971 | { |
972 | struct skx_pvt *pvt = mci->pvt_info; |
973 | struct skx_imc *imc = pvt->imc; |
974 | u32 mtr, amap, mcddrtcfg = 0; |
975 | struct dimm_info *dimm; |
976 | int i, j, ndimms; |
977 | |
978 | for (i = 0; i < imc->num_channels; i++) { |
979 | if (!imc->mbase) |
980 | continue; |
981 | |
982 | ndimms = 0; |
983 | amap = I10NM_GET_AMAP(imc, i); |
984 | |
985 | if (res_cfg->type != GNR) |
986 | mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); |
987 | |
988 | for (j = 0; j < imc->num_dimms; j++) { |
989 | dimm = edac_get_dimm(mci, layer0: i, layer1: j, layer2: 0); |
990 | mtr = I10NM_GET_DIMMMTR(imc, i, j); |
991 | edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n" , |
992 | mtr, mcddrtcfg, imc->mc, i, j); |
993 | |
994 | if (IS_DIMM_PRESENT(mtr)) |
995 | ndimms += skx_get_dimm_info(mtr, mcmtr: 0, amap, dimm, |
996 | imc, chan: i, dimmno: j, cfg); |
997 | else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) |
998 | ndimms += skx_get_nvdimm_info(dimm, imc, chan: i, dimmno: j, |
999 | EDAC_MOD_STR); |
1000 | } |
1001 | if (ndimms && !i10nm_check_ecc(imc, chan: i)) { |
1002 | i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n" , |
1003 | imc->mc, i); |
1004 | return -ENODEV; |
1005 | } |
1006 | } |
1007 | |
1008 | return 0; |
1009 | } |
1010 | |
1011 | static struct notifier_block i10nm_mce_dec = { |
1012 | .notifier_call = skx_mce_check_error, |
1013 | .priority = MCE_PRIO_EDAC, |
1014 | }; |
1015 | |
1016 | #ifdef CONFIG_EDAC_DEBUG |
1017 | /* |
1018 | * Debug feature. |
1019 | * Exercise the address decode logic by writing an address to |
1020 | * /sys/kernel/debug/edac/i10nm_test/addr. |
1021 | */ |
1022 | static struct dentry *i10nm_test; |
1023 | |
1024 | static int debugfs_u64_set(void *data, u64 val) |
1025 | { |
1026 | struct mce m; |
1027 | |
1028 | pr_warn_once("Fake error to 0x%llx injected via debugfs\n" , val); |
1029 | |
1030 | memset(&m, 0, sizeof(m)); |
1031 | /* ADDRV + MemRd + Unknown channel */ |
1032 | m.status = MCI_STATUS_ADDRV + 0x90; |
1033 | /* One corrected error */ |
1034 | m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); |
1035 | m.addr = val; |
1036 | skx_mce_check_error(NULL, val: 0, data: &m); |
1037 | |
1038 | return 0; |
1039 | } |
1040 | DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n" ); |
1041 | |
1042 | static void setup_i10nm_debug(void) |
1043 | { |
1044 | i10nm_test = edac_debugfs_create_dir(dirname: "i10nm_test" ); |
1045 | if (!i10nm_test) |
1046 | return; |
1047 | |
1048 | if (!edac_debugfs_create_file(name: "addr" , mode: 0200, parent: i10nm_test, |
1049 | NULL, fops: &fops_u64_wo)) { |
1050 | debugfs_remove(dentry: i10nm_test); |
1051 | i10nm_test = NULL; |
1052 | } |
1053 | } |
1054 | |
1055 | static void teardown_i10nm_debug(void) |
1056 | { |
1057 | debugfs_remove_recursive(dentry: i10nm_test); |
1058 | } |
1059 | #else |
1060 | static inline void setup_i10nm_debug(void) {} |
1061 | static inline void teardown_i10nm_debug(void) {} |
1062 | #endif /*CONFIG_EDAC_DEBUG*/ |
1063 | |
1064 | static int __init i10nm_init(void) |
1065 | { |
1066 | u8 mc = 0, src_id = 0, node_id = 0; |
1067 | const struct x86_cpu_id *id; |
1068 | struct res_config *cfg; |
1069 | const char *owner; |
1070 | struct skx_dev *d; |
1071 | int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; |
1072 | u64 tolm, tohm; |
1073 | int imc_num; |
1074 | |
1075 | edac_dbg(2, "\n" ); |
1076 | |
1077 | if (ghes_get_devices()) |
1078 | return -EBUSY; |
1079 | |
1080 | owner = edac_get_owner(); |
1081 | if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR))) |
1082 | return -EBUSY; |
1083 | |
1084 | if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) |
1085 | return -ENODEV; |
1086 | |
1087 | id = x86_match_cpu(match: i10nm_cpuids); |
1088 | if (!id) |
1089 | return -ENODEV; |
1090 | |
1091 | cfg = (struct res_config *)id->driver_data; |
1092 | res_cfg = cfg; |
1093 | |
1094 | rc = skx_get_hi_lo(did: 0x09a2, off, tolm: &tolm, tohm: &tohm); |
1095 | if (rc) |
1096 | return rc; |
1097 | |
1098 | rc = skx_get_all_bus_mappings(cfg, list: &i10nm_edac_list); |
1099 | if (rc < 0) |
1100 | goto fail; |
1101 | if (rc == 0) { |
1102 | i10nm_printk(KERN_ERR, "No memory controllers found\n" ); |
1103 | return -ENODEV; |
1104 | } |
1105 | |
1106 | rc = i10nm_get_imc_num(cfg); |
1107 | if (rc < 0) |
1108 | goto fail; |
1109 | |
1110 | mem_cfg_2lm = i10nm_check_2lm(cfg); |
1111 | skx_set_mem_cfg(mem_cfg_2lm); |
1112 | |
1113 | rc = i10nm_get_ddr_munits(); |
1114 | |
1115 | if (i10nm_get_hbm_munits() && rc) |
1116 | goto fail; |
1117 | |
1118 | imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num; |
1119 | |
1120 | list_for_each_entry(d, i10nm_edac_list, list) { |
1121 | rc = skx_get_src_id(d, off: 0xf8, id: &src_id); |
1122 | if (rc < 0) |
1123 | goto fail; |
1124 | |
1125 | rc = skx_get_node_id(d, id: &node_id); |
1126 | if (rc < 0) |
1127 | goto fail; |
1128 | |
1129 | edac_dbg(2, "src_id = %d node_id = %d\n" , src_id, node_id); |
1130 | for (i = 0; i < imc_num; i++) { |
1131 | if (!d->imc[i].mdev) |
1132 | continue; |
1133 | |
1134 | d->imc[i].mc = mc++; |
1135 | d->imc[i].lmc = i; |
1136 | d->imc[i].src_id = src_id; |
1137 | d->imc[i].node_id = node_id; |
1138 | if (d->imc[i].hbm_mc) { |
1139 | d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; |
1140 | d->imc[i].num_channels = cfg->hbm_chan_num; |
1141 | d->imc[i].num_dimms = cfg->hbm_dimm_num; |
1142 | } else { |
1143 | d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz; |
1144 | d->imc[i].num_channels = cfg->ddr_chan_num; |
1145 | d->imc[i].num_dimms = cfg->ddr_dimm_num; |
1146 | } |
1147 | |
1148 | rc = skx_register_mci(imc: &d->imc[i], pdev: d->imc[i].mdev, |
1149 | ctl_name: "Intel_10nm Socket" , EDAC_MOD_STR, |
1150 | get_dimm_config: i10nm_get_dimm_config, cfg); |
1151 | if (rc < 0) |
1152 | goto fail; |
1153 | } |
1154 | } |
1155 | |
1156 | rc = skx_adxl_get(); |
1157 | if (rc) |
1158 | goto fail; |
1159 | |
1160 | opstate_init(); |
1161 | mce_register_decode_chain(nb: &i10nm_mce_dec); |
1162 | setup_i10nm_debug(); |
1163 | |
1164 | if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { |
1165 | skx_set_decode(decode: i10nm_mc_decode, show_retry_log: show_retry_rd_err_log); |
1166 | if (retry_rd_err_log == 2) |
1167 | enable_retry_rd_err_log(enable: true); |
1168 | } else { |
1169 | skx_set_decode(decode: i10nm_mc_decode, NULL); |
1170 | } |
1171 | |
1172 | i10nm_printk(KERN_INFO, "%s\n" , I10NM_REVISION); |
1173 | |
1174 | return 0; |
1175 | fail: |
1176 | skx_remove(); |
1177 | return rc; |
1178 | } |
1179 | |
1180 | static void __exit i10nm_exit(void) |
1181 | { |
1182 | edac_dbg(2, "\n" ); |
1183 | |
1184 | if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { |
1185 | skx_set_decode(NULL, NULL); |
1186 | if (retry_rd_err_log == 2) |
1187 | enable_retry_rd_err_log(enable: false); |
1188 | } |
1189 | |
1190 | teardown_i10nm_debug(); |
1191 | mce_unregister_decode_chain(nb: &i10nm_mce_dec); |
1192 | skx_adxl_put(); |
1193 | skx_remove(); |
1194 | } |
1195 | |
1196 | module_init(i10nm_init); |
1197 | module_exit(i10nm_exit); |
1198 | |
1199 | static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp) |
1200 | { |
1201 | unsigned long val; |
1202 | int ret; |
1203 | |
1204 | ret = kstrtoul(s: buf, base: 0, res: &val); |
1205 | |
1206 | if (ret || val > 1) |
1207 | return -EINVAL; |
1208 | |
1209 | if (val && mem_cfg_2lm) { |
1210 | i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n" ); |
1211 | return -EIO; |
1212 | } |
1213 | |
1214 | ret = param_set_int(val: buf, kp); |
1215 | |
1216 | return ret; |
1217 | } |
1218 | |
1219 | static const struct kernel_param_ops decoding_via_mca_param_ops = { |
1220 | .set = set_decoding_via_mca, |
1221 | .get = param_get_int, |
1222 | }; |
1223 | |
1224 | module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644); |
1225 | MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable" ); |
1226 | |
1227 | module_param(retry_rd_err_log, int, 0444); |
1228 | MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)" ); |
1229 | |
1230 | MODULE_LICENSE("GPL v2" ); |
1231 | MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors" ); |
1232 | |