1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Intel specific MCE features. |
4 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> |
5 | * Copyright (C) 2008, 2009 Intel Corporation |
6 | * Author: Andi Kleen |
7 | */ |
8 | |
9 | #include <linux/gfp.h> |
10 | #include <linux/interrupt.h> |
11 | #include <linux/percpu.h> |
12 | #include <linux/sched.h> |
13 | #include <linux/cpumask.h> |
14 | #include <asm/apic.h> |
15 | #include <asm/cpufeature.h> |
16 | #include <asm/intel-family.h> |
17 | #include <asm/processor.h> |
18 | #include <asm/msr.h> |
19 | #include <asm/mce.h> |
20 | |
21 | #include "internal.h" |
22 | |
23 | /* |
24 | * Support for Intel Correct Machine Check Interrupts. This allows |
25 | * the CPU to raise an interrupt when a corrected machine check happened. |
26 | * Normally we pick those up using a regular polling timer. |
27 | * Also supports reliable discovery of shared banks. |
28 | */ |
29 | |
30 | /* |
31 | * CMCI can be delivered to multiple cpus that share a machine check bank |
32 | * so we need to designate a single cpu to process errors logged in each bank |
33 | * in the interrupt handler (otherwise we would have many races and potential |
34 | * double reporting of the same error). |
35 | * Note that this can change when a cpu is offlined or brought online since |
36 | * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() |
37 | * disables CMCI on all banks owned by the cpu and clears this bitfield. At |
38 | * this point, cmci_rediscover() kicks in and a different cpu may end up |
39 | * taking ownership of some of the shared MCA banks that were previously |
40 | * owned by the offlined cpu. |
41 | */ |
42 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
43 | |
44 | /* |
45 | * CMCI storm detection backoff counter |
46 | * |
47 | * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've |
48 | * encountered an error. If not, we decrement it by one. We signal the end of |
49 | * the CMCI storm when it reaches 0. |
50 | */ |
51 | static DEFINE_PER_CPU(int, cmci_backoff_cnt); |
52 | |
53 | /* |
54 | * cmci_discover_lock protects against parallel discovery attempts |
55 | * which could race against each other. |
56 | */ |
57 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); |
58 | |
59 | /* |
60 | * On systems that do support CMCI but it's disabled, polling for MCEs can |
61 | * cause the same event to be reported multiple times because IA32_MCi_STATUS |
62 | * is shared by the same package. |
63 | */ |
64 | static DEFINE_SPINLOCK(cmci_poll_lock); |
65 | |
66 | #define CMCI_THRESHOLD 1 |
67 | #define CMCI_POLL_INTERVAL (30 * HZ) |
68 | #define CMCI_STORM_INTERVAL (HZ) |
69 | #define CMCI_STORM_THRESHOLD 15 |
70 | |
71 | static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); |
72 | static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); |
73 | static DEFINE_PER_CPU(unsigned int, cmci_storm_state); |
74 | |
75 | enum { |
76 | CMCI_STORM_NONE, |
77 | CMCI_STORM_ACTIVE, |
78 | CMCI_STORM_SUBSIDED, |
79 | }; |
80 | |
81 | static atomic_t cmci_storm_on_cpus; |
82 | |
83 | static int cmci_supported(int *banks) |
84 | { |
85 | u64 cap; |
86 | |
87 | if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) |
88 | return 0; |
89 | |
90 | /* |
91 | * Vendor check is not strictly needed, but the initial |
92 | * initialization is vendor keyed and this |
93 | * makes sure none of the backdoors are entered otherwise. |
94 | */ |
95 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && |
96 | boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) |
97 | return 0; |
98 | |
99 | if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) |
100 | return 0; |
101 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
102 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); |
103 | return !!(cap & MCG_CMCI_P); |
104 | } |
105 | |
106 | static bool lmce_supported(void) |
107 | { |
108 | u64 tmp; |
109 | |
110 | if (mca_cfg.lmce_disabled) |
111 | return false; |
112 | |
113 | rdmsrl(MSR_IA32_MCG_CAP, tmp); |
114 | |
115 | /* |
116 | * LMCE depends on recovery support in the processor. Hence both |
117 | * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. |
118 | */ |
119 | if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != |
120 | (MCG_SER_P | MCG_LMCE_P)) |
121 | return false; |
122 | |
123 | /* |
124 | * BIOS should indicate support for LMCE by setting bit 20 in |
125 | * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP |
126 | * fault. The MSR must also be locked for LMCE_ENABLED to take effect. |
127 | * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally |
128 | * locks the MSR in the event that it wasn't already locked by BIOS. |
129 | */ |
130 | rdmsrl(MSR_IA32_FEAT_CTL, tmp); |
131 | if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) |
132 | return false; |
133 | |
134 | return tmp & FEAT_CTL_LMCE_ENABLED; |
135 | } |
136 | |
137 | bool mce_intel_cmci_poll(void) |
138 | { |
139 | if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) |
140 | return false; |
141 | |
142 | /* |
143 | * Reset the counter if we've logged an error in the last poll |
144 | * during the storm. |
145 | */ |
146 | if (machine_check_poll(flags: 0, this_cpu_ptr(&mce_banks_owned))) |
147 | this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); |
148 | else |
149 | this_cpu_dec(cmci_backoff_cnt); |
150 | |
151 | return true; |
152 | } |
153 | |
154 | void mce_intel_hcpu_update(unsigned long cpu) |
155 | { |
156 | if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) |
157 | atomic_dec(v: &cmci_storm_on_cpus); |
158 | |
159 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; |
160 | } |
161 | |
162 | static void cmci_toggle_interrupt_mode(bool on) |
163 | { |
164 | unsigned long flags, *owned; |
165 | int bank; |
166 | u64 val; |
167 | |
168 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
169 | owned = this_cpu_ptr(mce_banks_owned); |
170 | for_each_set_bit(bank, owned, MAX_NR_BANKS) { |
171 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); |
172 | |
173 | if (on) |
174 | val |= MCI_CTL2_CMCI_EN; |
175 | else |
176 | val &= ~MCI_CTL2_CMCI_EN; |
177 | |
178 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); |
179 | } |
180 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
181 | } |
182 | |
183 | unsigned long cmci_intel_adjust_timer(unsigned long interval) |
184 | { |
185 | if ((this_cpu_read(cmci_backoff_cnt) > 0) && |
186 | (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { |
187 | mce_notify_irq(); |
188 | return CMCI_STORM_INTERVAL; |
189 | } |
190 | |
191 | switch (__this_cpu_read(cmci_storm_state)) { |
192 | case CMCI_STORM_ACTIVE: |
193 | |
194 | /* |
195 | * We switch back to interrupt mode once the poll timer has |
196 | * silenced itself. That means no events recorded and the timer |
197 | * interval is back to our poll interval. |
198 | */ |
199 | __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); |
200 | if (!atomic_sub_return(i: 1, v: &cmci_storm_on_cpus)) |
201 | pr_notice("CMCI storm subsided: switching to interrupt mode\n" ); |
202 | |
203 | fallthrough; |
204 | |
205 | case CMCI_STORM_SUBSIDED: |
206 | /* |
207 | * We wait for all CPUs to go back to SUBSIDED state. When that |
208 | * happens we switch back to interrupt mode. |
209 | */ |
210 | if (!atomic_read(v: &cmci_storm_on_cpus)) { |
211 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); |
212 | cmci_toggle_interrupt_mode(on: true); |
213 | cmci_recheck(); |
214 | } |
215 | return CMCI_POLL_INTERVAL; |
216 | default: |
217 | |
218 | /* We have shiny weather. Let the poll do whatever it thinks. */ |
219 | return interval; |
220 | } |
221 | } |
222 | |
223 | static bool cmci_storm_detect(void) |
224 | { |
225 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); |
226 | unsigned long ts = __this_cpu_read(cmci_time_stamp); |
227 | unsigned long now = jiffies; |
228 | int r; |
229 | |
230 | if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) |
231 | return true; |
232 | |
233 | if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { |
234 | cnt++; |
235 | } else { |
236 | cnt = 1; |
237 | __this_cpu_write(cmci_time_stamp, now); |
238 | } |
239 | __this_cpu_write(cmci_storm_cnt, cnt); |
240 | |
241 | if (cnt <= CMCI_STORM_THRESHOLD) |
242 | return false; |
243 | |
244 | cmci_toggle_interrupt_mode(on: false); |
245 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); |
246 | r = atomic_add_return(i: 1, v: &cmci_storm_on_cpus); |
247 | mce_timer_kick(CMCI_STORM_INTERVAL); |
248 | this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); |
249 | |
250 | if (r == 1) |
251 | pr_notice("CMCI storm detected: switching to poll mode\n" ); |
252 | return true; |
253 | } |
254 | |
255 | /* |
256 | * The interrupt handler. This is called on every event. |
257 | * Just call the poller directly to log any events. |
258 | * This could in theory increase the threshold under high load, |
259 | * but doesn't for now. |
260 | */ |
261 | static void intel_threshold_interrupt(void) |
262 | { |
263 | if (cmci_storm_detect()) |
264 | return; |
265 | |
266 | machine_check_poll(flags: MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); |
267 | } |
268 | |
269 | /* |
270 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks |
271 | * on this CPU. Use the algorithm recommended in the SDM to discover shared |
272 | * banks. |
273 | */ |
274 | static void cmci_discover(int banks) |
275 | { |
276 | unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); |
277 | unsigned long flags; |
278 | int i; |
279 | int bios_wrong_thresh = 0; |
280 | |
281 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
282 | for (i = 0; i < banks; i++) { |
283 | u64 val; |
284 | int bios_zero_thresh = 0; |
285 | |
286 | if (test_bit(i, owned)) |
287 | continue; |
288 | |
289 | /* Skip banks in firmware first mode */ |
290 | if (test_bit(i, mce_banks_ce_disabled)) |
291 | continue; |
292 | |
293 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
294 | |
295 | /* Already owned by someone else? */ |
296 | if (val & MCI_CTL2_CMCI_EN) { |
297 | clear_bit(nr: i, addr: owned); |
298 | __clear_bit(i, this_cpu_ptr(mce_poll_banks)); |
299 | continue; |
300 | } |
301 | |
302 | if (!mca_cfg.bios_cmci_threshold) { |
303 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
304 | val |= CMCI_THRESHOLD; |
305 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { |
306 | /* |
307 | * If bios_cmci_threshold boot option was specified |
308 | * but the threshold is zero, we'll try to initialize |
309 | * it to 1. |
310 | */ |
311 | bios_zero_thresh = 1; |
312 | val |= CMCI_THRESHOLD; |
313 | } |
314 | |
315 | val |= MCI_CTL2_CMCI_EN; |
316 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
317 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
318 | |
319 | /* Did the enable bit stick? -- the bank supports CMCI */ |
320 | if (val & MCI_CTL2_CMCI_EN) { |
321 | set_bit(nr: i, addr: owned); |
322 | __clear_bit(i, this_cpu_ptr(mce_poll_banks)); |
323 | /* |
324 | * We are able to set thresholds for some banks that |
325 | * had a threshold of 0. This means the BIOS has not |
326 | * set the thresholds properly or does not work with |
327 | * this boot option. Note down now and report later. |
328 | */ |
329 | if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && |
330 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) |
331 | bios_wrong_thresh = 1; |
332 | } else { |
333 | WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); |
334 | } |
335 | } |
336 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
337 | if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { |
338 | pr_info_once( |
339 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n" ); |
340 | pr_info_once( |
341 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n" ); |
342 | } |
343 | } |
344 | |
345 | /* |
346 | * Just in case we missed an event during initialization check |
347 | * all the CMCI owned banks. |
348 | */ |
349 | void cmci_recheck(void) |
350 | { |
351 | unsigned long flags; |
352 | int banks; |
353 | |
354 | if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(banks: &banks)) |
355 | return; |
356 | |
357 | local_irq_save(flags); |
358 | machine_check_poll(flags: 0, this_cpu_ptr(&mce_banks_owned)); |
359 | local_irq_restore(flags); |
360 | } |
361 | |
362 | /* Caller must hold the lock on cmci_discover_lock */ |
363 | static void __cmci_disable_bank(int bank) |
364 | { |
365 | u64 val; |
366 | |
367 | if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) |
368 | return; |
369 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); |
370 | val &= ~MCI_CTL2_CMCI_EN; |
371 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); |
372 | __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); |
373 | } |
374 | |
375 | /* |
376 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
377 | * This allows other CPUs to claim the banks on rediscovery. |
378 | */ |
379 | void cmci_clear(void) |
380 | { |
381 | unsigned long flags; |
382 | int i; |
383 | int banks; |
384 | |
385 | if (!cmci_supported(banks: &banks)) |
386 | return; |
387 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
388 | for (i = 0; i < banks; i++) |
389 | __cmci_disable_bank(bank: i); |
390 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
391 | } |
392 | |
393 | static void cmci_rediscover_work_func(void *arg) |
394 | { |
395 | int banks; |
396 | |
397 | /* Recheck banks in case CPUs don't all have the same */ |
398 | if (cmci_supported(banks: &banks)) |
399 | cmci_discover(banks); |
400 | } |
401 | |
402 | /* After a CPU went down cycle through all the others and rediscover */ |
403 | void cmci_rediscover(void) |
404 | { |
405 | int banks; |
406 | |
407 | if (!cmci_supported(banks: &banks)) |
408 | return; |
409 | |
410 | on_each_cpu(func: cmci_rediscover_work_func, NULL, wait: 1); |
411 | } |
412 | |
413 | /* |
414 | * Reenable CMCI on this CPU in case a CPU down failed. |
415 | */ |
416 | void cmci_reenable(void) |
417 | { |
418 | int banks; |
419 | if (cmci_supported(banks: &banks)) |
420 | cmci_discover(banks); |
421 | } |
422 | |
423 | void cmci_disable_bank(int bank) |
424 | { |
425 | int banks; |
426 | unsigned long flags; |
427 | |
428 | if (!cmci_supported(banks: &banks)) |
429 | return; |
430 | |
431 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
432 | __cmci_disable_bank(bank); |
433 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
434 | } |
435 | |
436 | /* Bank polling function when CMCI is disabled. */ |
437 | static void cmci_mc_poll_banks(void) |
438 | { |
439 | spin_lock(lock: &cmci_poll_lock); |
440 | machine_check_poll(flags: 0, this_cpu_ptr(&mce_poll_banks)); |
441 | spin_unlock(lock: &cmci_poll_lock); |
442 | } |
443 | |
444 | void intel_init_cmci(void) |
445 | { |
446 | int banks; |
447 | |
448 | if (!cmci_supported(banks: &banks)) { |
449 | mc_poll_banks = cmci_mc_poll_banks; |
450 | return; |
451 | } |
452 | |
453 | mce_threshold_vector = intel_threshold_interrupt; |
454 | cmci_discover(banks); |
455 | /* |
456 | * For CPU #0 this runs with still disabled APIC, but that's |
457 | * ok because only the vector is set up. We still do another |
458 | * check for the banks later for CPU #0 just to make sure |
459 | * to not miss any events. |
460 | */ |
461 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); |
462 | cmci_recheck(); |
463 | } |
464 | |
465 | void intel_init_lmce(void) |
466 | { |
467 | u64 val; |
468 | |
469 | if (!lmce_supported()) |
470 | return; |
471 | |
472 | rdmsrl(MSR_IA32_MCG_EXT_CTL, val); |
473 | |
474 | if (!(val & MCG_EXT_CTL_LMCE_EN)) |
475 | wrmsrl(MSR_IA32_MCG_EXT_CTL, val: val | MCG_EXT_CTL_LMCE_EN); |
476 | } |
477 | |
478 | void intel_clear_lmce(void) |
479 | { |
480 | u64 val; |
481 | |
482 | if (!lmce_supported()) |
483 | return; |
484 | |
485 | rdmsrl(MSR_IA32_MCG_EXT_CTL, val); |
486 | val &= ~MCG_EXT_CTL_LMCE_EN; |
487 | wrmsrl(MSR_IA32_MCG_EXT_CTL, val); |
488 | } |
489 | |
490 | /* |
491 | * Enable additional error logs from the integrated |
492 | * memory controller on processors that support this. |
493 | */ |
494 | static void intel_imc_init(struct cpuinfo_x86 *c) |
495 | { |
496 | u64 error_control; |
497 | |
498 | switch (c->x86_model) { |
499 | case INTEL_FAM6_SANDYBRIDGE_X: |
500 | case INTEL_FAM6_IVYBRIDGE_X: |
501 | case INTEL_FAM6_HASWELL_X: |
502 | if (rdmsrl_safe(MSR_ERROR_CONTROL, p: &error_control)) |
503 | return; |
504 | error_control |= 2; |
505 | wrmsrl_safe(MSR_ERROR_CONTROL, val: error_control); |
506 | break; |
507 | } |
508 | } |
509 | |
510 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
511 | { |
512 | intel_init_cmci(); |
513 | intel_init_lmce(); |
514 | intel_imc_init(c); |
515 | } |
516 | |
517 | void mce_intel_feature_clear(struct cpuinfo_x86 *c) |
518 | { |
519 | intel_clear_lmce(); |
520 | } |
521 | |
522 | bool intel_filter_mce(struct mce *m) |
523 | { |
524 | struct cpuinfo_x86 *c = &boot_cpu_data; |
525 | |
526 | /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ |
527 | if ((c->x86 == 6) && |
528 | ((c->x86_model == INTEL_FAM6_HASWELL) || |
529 | (c->x86_model == INTEL_FAM6_HASWELL_L) || |
530 | (c->x86_model == INTEL_FAM6_BROADWELL) || |
531 | (c->x86_model == INTEL_FAM6_HASWELL_G) || |
532 | (c->x86_model == INTEL_FAM6_SKYLAKE_X)) && |
533 | (m->bank == 0) && |
534 | ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) |
535 | return true; |
536 | |
537 | return false; |
538 | } |
539 | |
540 | /* |
541 | * Check if the address reported by the CPU is in a format we can parse. |
542 | * It would be possible to add code for most other cases, but all would |
543 | * be somewhat complicated (e.g. segment offset would require an instruction |
544 | * parser). So only support physical addresses up to page granularity for now. |
545 | */ |
546 | bool intel_mce_usable_address(struct mce *m) |
547 | { |
548 | if (!(m->status & MCI_STATUS_MISCV)) |
549 | return false; |
550 | |
551 | if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) |
552 | return false; |
553 | |
554 | if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) |
555 | return false; |
556 | |
557 | return true; |
558 | } |
559 | |