intel.c source code [linux/arch/x86/kernel/cpu/mce/intel.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Intel specific MCE features.
4	* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
5	* Copyright (C) 2008, 2009 Intel Corporation
6	* Author: Andi Kleen
7	*/
8
9	#include <linux/gfp.h>
10	#include <linux/interrupt.h>
11	#include <linux/percpu.h>
12	#include <linux/sched.h>
13	#include <linux/cpumask.h>
14	#include <asm/apic.h>
15	#include <asm/cpufeature.h>
16	#include <asm/intel-family.h>
17	#include <asm/processor.h>
18	#include <asm/msr.h>
19	#include <asm/mce.h>
20
21	#include "internal.h"
22
23	/*
24	* Support for Intel Correct Machine Check Interrupts. This allows
25	* the CPU to raise an interrupt when a corrected machine check happened.
26	* Normally we pick those up using a regular polling timer.
27	* Also supports reliable discovery of shared banks.
28	*/
29
30	/*
31	* CMCI can be delivered to multiple cpus that share a machine check bank
32	* so we need to designate a single cpu to process errors logged in each bank
33	* in the interrupt handler (otherwise we would have many races and potential
34	* double reporting of the same error).
35	* Note that this can change when a cpu is offlined or brought online since
36	* some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
37	* disables CMCI on all banks owned by the cpu and clears this bitfield. At
38	* this point, cmci_rediscover() kicks in and a different cpu may end up
39	* taking ownership of some of the shared MCA banks that were previously
40	* owned by the offlined cpu.
41	*/
42	static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
43
44	/*
45	* CMCI storm detection backoff counter
46	*
47	* During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
48	* encountered an error. If not, we decrement it by one. We signal the end of
49	* the CMCI storm when it reaches 0.
50	*/
51	static DEFINE_PER_CPU(int, cmci_backoff_cnt);
52
53	/*
54	* cmci_discover_lock protects against parallel discovery attempts
55	* which could race against each other.
56	*/
57	static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
58
59	/*
60	* On systems that do support CMCI but it's disabled, polling for MCEs can
61	* cause the same event to be reported multiple times because IA32_MCi_STATUS
62	* is shared by the same package.
63	*/
64	static DEFINE_SPINLOCK(cmci_poll_lock);
65
66	#define CMCI_THRESHOLD 1
67	#define CMCI_POLL_INTERVAL (30 * HZ)
68	#define CMCI_STORM_INTERVAL (HZ)
69	#define CMCI_STORM_THRESHOLD 15
70
71	static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
72	static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
73	static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
74
75	enum {
76	CMCI_STORM_NONE,
77	CMCI_STORM_ACTIVE,
78	CMCI_STORM_SUBSIDED,
79	};
80
81	static atomic_t cmci_storm_on_cpus;
82
83	static int cmci_supported(int *banks)
84	{
85	u64 cap;
86
87	if (mca_cfg.cmci_disabled \|\| mca_cfg.ignore_ce)
88	return `0`;
89
90	/*
91	* Vendor check is not strictly needed, but the initial
92	* initialization is vendor keyed and this
93	* makes sure none of the backdoors are entered otherwise.
94	*/
95	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
96	boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
97	return `0`;
98
99	if (!boot_cpu_has(X86_FEATURE_APIC) \|\| lapic_get_maxlvt() < `6`)
100	return `0`;
101	rdmsrl(MSR_IA32_MCG_CAP, cap);
102	banks = min_t(unsigned*, MAX_NR_BANKS, cap & `0xff`);
103	return !!(cap & MCG_CMCI_P);
104	}
105
106	static bool lmce_supported(void)
107	{
108	u64 tmp;
109
110	if (mca_cfg.lmce_disabled)
111	return false;
112
113	rdmsrl(MSR_IA32_MCG_CAP, tmp);
114
115	/*
116	* LMCE depends on recovery support in the processor. Hence both
117	* MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
118	*/
119	if ((tmp & (MCG_SER_P \| MCG_LMCE_P)) !=
120	(MCG_SER_P \| MCG_LMCE_P))
121	return false;
122
123	/*
124	* BIOS should indicate support for LMCE by setting bit 20 in
125	* IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
126	* fault. The MSR must also be locked for LMCE_ENABLED to take effect.
127	* WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
128	* locks the MSR in the event that it wasn't already locked by BIOS.
129	*/
130	rdmsrl(MSR_IA32_FEAT_CTL, tmp);
131	if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
132	return false;
133
134	return tmp & FEAT_CTL_LMCE_ENABLED;
135	}
136
137	bool mce_intel_cmci_poll(void)
138	{
139	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
140	return false;
141
142	/*
143	* Reset the counter if we've logged an error in the last poll
144	* during the storm.
145	*/
146	if (machine_check_poll(flags: `0`, this_cpu_ptr(&mce_banks_owned)))
147	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
148	else
149	this_cpu_dec(cmci_backoff_cnt);
150
151	return true;
152	}
153
154	void mce_intel_hcpu_update(unsigned long cpu)
155	{
156	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
157	atomic_dec(v: &cmci_storm_on_cpus);
158
159	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
160	}
161
162	static void cmci_toggle_interrupt_mode(bool on)
163	{
164	unsigned long flags, *owned;
165	int bank;
166	u64 val;
167
168	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
169	owned = this_cpu_ptr(mce_banks_owned);
170	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
171	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
172
173	if (on)
174	val \|= MCI_CTL2_CMCI_EN;
175	else
176	val &= ~MCI_CTL2_CMCI_EN;
177
178	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
179	}
180	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
181	}
182
183	unsigned long cmci_intel_adjust_timer(unsigned long interval)
184	{
185	if ((this_cpu_read(cmci_backoff_cnt) > `0`) &&
186	(__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
187	mce_notify_irq();
188	return CMCI_STORM_INTERVAL;
189	}
190
191	switch (__this_cpu_read(cmci_storm_state)) {
192	case CMCI_STORM_ACTIVE:
193
194	/*
195	* We switch back to interrupt mode once the poll timer has
196	* silenced itself. That means no events recorded and the timer
197	* interval is back to our poll interval.
198	*/
199	__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
200	if (!atomic_sub_return(i: `1`, v: &cmci_storm_on_cpus))
201	pr_notice("CMCI storm subsided: switching to interrupt mode\n");
202
203	fallthrough;
204
205	case CMCI_STORM_SUBSIDED:
206	/*
207	* We wait for all CPUs to go back to SUBSIDED state. When that
208	* happens we switch back to interrupt mode.
209	*/
210	if (!atomic_read(v: &cmci_storm_on_cpus)) {
211	__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
212	cmci_toggle_interrupt_mode(on: true);
213	cmci_recheck();
214	}
215	return CMCI_POLL_INTERVAL;
216	default:
217
218	/ We have shiny weather. Let the poll do whatever it thinks. /
219	return interval;
220	}
221	}
222
223	static bool cmci_storm_detect(void)
224	{
225	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
226	unsigned long ts = __this_cpu_read(cmci_time_stamp);
227	unsigned long now = jiffies;
228	int r;
229
230	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
231	return true;
232
233	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
234	cnt++;
235	} else {
236	cnt = `1`;
237	__this_cpu_write(cmci_time_stamp, now);
238	}
239	__this_cpu_write(cmci_storm_cnt, cnt);
240
241	if (cnt <= CMCI_STORM_THRESHOLD)
242	return false;
243
244	cmci_toggle_interrupt_mode(on: false);
245	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
246	r = atomic_add_return(i: `1`, v: &cmci_storm_on_cpus);
247	mce_timer_kick(CMCI_STORM_INTERVAL);
248	this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
249
250	if (r == `1`)
251	pr_notice("CMCI storm detected: switching to poll mode\n");
252	return true;
253	}
254
255	/*
256	* The interrupt handler. This is called on every event.
257	* Just call the poller directly to log any events.
258	* This could in theory increase the threshold under high load,
259	* but doesn't for now.
260	*/
261	static void intel_threshold_interrupt(void)
262	{
263	if (cmci_storm_detect())
264	return;
265
266	machine_check_poll(flags: MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
267	}
268
269	/*
270	* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
271	* on this CPU. Use the algorithm recommended in the SDM to discover shared
272	* banks.
273	*/
274	static void cmci_discover(int banks)
275	{
276	unsigned long owned = (void* *)this_cpu_ptr(&mce_banks_owned);
277	unsigned long flags;
278	int i;
279	int bios_wrong_thresh = `0`;
280
281	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
282	for (i = `0`; i < banks; i++) {
283	u64 val;
284	int bios_zero_thresh = `0`;
285
286	if (test_bit(i, owned))
287	continue;
288
289	/ Skip banks in firmware first mode /
290	if (test_bit(i, mce_banks_ce_disabled))
291	continue;
292
293	rdmsrl(MSR_IA32_MCx_CTL2(i), val);
294
295	/ Already owned by someone else? /
296	if (val & MCI_CTL2_CMCI_EN) {
297	clear_bit(nr: i, addr: owned);
298	__clear_bit(i, this_cpu_ptr(mce_poll_banks));
299	continue;
300	}
301
302	if (!mca_cfg.bios_cmci_threshold) {
303	val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
304	val \|= CMCI_THRESHOLD;
305	} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
306	/*
307	* If bios_cmci_threshold boot option was specified
308	* but the threshold is zero, we'll try to initialize
309	* it to 1.
310	*/
311	bios_zero_thresh = `1`;
312	val \|= CMCI_THRESHOLD;
313	}
314
315	val \|= MCI_CTL2_CMCI_EN;
316	wrmsrl(MSR_IA32_MCx_CTL2(i), val);
317	rdmsrl(MSR_IA32_MCx_CTL2(i), val);
318
319	/ Did the enable bit stick? -- the bank supports CMCI /
320	if (val & MCI_CTL2_CMCI_EN) {
321	set_bit(nr: i, addr: owned);
322	__clear_bit(i, this_cpu_ptr(mce_poll_banks));
323	/*
324	* We are able to set thresholds for some banks that
325	* had a threshold of 0. This means the BIOS has not
326	* set the thresholds properly or does not work with
327	* this boot option. Note down now and report later.
328	*/
329	if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
330	(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
331	bios_wrong_thresh = `1`;
332	} else {
333	WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
334	}
335	}
336	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
337	if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
338	pr_info_once(
339	"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
340	pr_info_once(
341	"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
342	}
343	}
344
345	/*
346	* Just in case we missed an event during initialization check
347	* all the CMCI owned banks.
348	*/
349	void cmci_recheck(void)
350	{
351	unsigned long flags;
352	int banks;
353
354	if (!mce_available(raw_cpu_ptr(&cpu_info)) \|\| !cmci_supported(banks: &banks))
355	return;
356
357	local_irq_save(flags);
358	machine_check_poll(flags: `0`, this_cpu_ptr(&mce_banks_owned));
359	local_irq_restore(flags);
360	}
361
362	/ Caller must hold the lock on cmci_discover_lock /
363	static void __cmci_disable_bank(int bank)
364	{
365	u64 val;
366
367	if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
368	return;
369	rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
370	val &= ~MCI_CTL2_CMCI_EN;
371	wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
372	__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
373	}
374
375	/*
376	* Disable CMCI on this CPU for all banks it owns when it goes down.
377	* This allows other CPUs to claim the banks on rediscovery.
378	*/
379	void cmci_clear(void)
380	{
381	unsigned long flags;
382	int i;
383	int banks;
384
385	if (!cmci_supported(banks: &banks))
386	return;
387	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
388	for (i = `0`; i < banks; i++)
389	__cmci_disable_bank(bank: i);
390	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
391	}
392
393	static void cmci_rediscover_work_func(void *arg)
394	{
395	int banks;
396
397	/ Recheck banks in case CPUs don't all have the same /
398	if (cmci_supported(banks: &banks))
399	cmci_discover(banks);
400	}
401
402	/ After a CPU went down cycle through all the others and rediscover /
403	void cmci_rediscover(void)
404	{
405	int banks;
406
407	if (!cmci_supported(banks: &banks))
408	return;
409
410	on_each_cpu(func: cmci_rediscover_work_func, NULL, wait: `1`);
411	}
412
413	/*
414	* Reenable CMCI on this CPU in case a CPU down failed.
415	*/
416	void cmci_reenable(void)
417	{
418	int banks;
419	if (cmci_supported(banks: &banks))
420	cmci_discover(banks);
421	}
422
423	void cmci_disable_bank(int bank)
424	{
425	int banks;
426	unsigned long flags;
427
428	if (!cmci_supported(banks: &banks))
429	return;
430
431	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
432	__cmci_disable_bank(bank);
433	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
434	}
435
436	/ Bank polling function when CMCI is disabled. /
437	static void cmci_mc_poll_banks(void)
438	{
439	spin_lock(lock: &cmci_poll_lock);
440	machine_check_poll(flags: `0`, this_cpu_ptr(&mce_poll_banks));
441	spin_unlock(lock: &cmci_poll_lock);
442	}
443
444	void intel_init_cmci(void)
445	{
446	int banks;
447
448	if (!cmci_supported(banks: &banks)) {
449	mc_poll_banks = cmci_mc_poll_banks;
450	return;
451	}
452
453	mce_threshold_vector = intel_threshold_interrupt;
454	cmci_discover(banks);
455	/*
456	* For CPU #0 this runs with still disabled APIC, but that's
457	* ok because only the vector is set up. We still do another
458	* check for the banks later for CPU #0 just to make sure
459	* to not miss any events.
460	*/
461	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR\|APIC_DM_FIXED);
462	cmci_recheck();
463	}
464
465	void intel_init_lmce(void)
466	{
467	u64 val;
468
469	if (!lmce_supported())
470	return;
471
472	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
473
474	if (!(val & MCG_EXT_CTL_LMCE_EN))
475	wrmsrl(MSR_IA32_MCG_EXT_CTL, val: val \| MCG_EXT_CTL_LMCE_EN);
476	}
477
478	void intel_clear_lmce(void)
479	{
480	u64 val;
481
482	if (!lmce_supported())
483	return;
484
485	rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
486	val &= ~MCG_EXT_CTL_LMCE_EN;
487	wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
488	}
489
490	/*
491	* Enable additional error logs from the integrated
492	* memory controller on processors that support this.
493	*/
494	static void intel_imc_init(struct cpuinfo_x86 *c)
495	{
496	u64 error_control;
497
498	switch (c->x86_model) {
499	case INTEL_FAM6_SANDYBRIDGE_X:
500	case INTEL_FAM6_IVYBRIDGE_X:
501	case INTEL_FAM6_HASWELL_X:
502	if (rdmsrl_safe(MSR_ERROR_CONTROL, p: &error_control))
503	return;
504	error_control \|= `2`;
505	wrmsrl_safe(MSR_ERROR_CONTROL, val: error_control);
506	break;
507	}
508	}
509
510	void mce_intel_feature_init(struct cpuinfo_x86 *c)
511	{
512	intel_init_cmci();
513	intel_init_lmce();
514	intel_imc_init(c);
515	}
516
517	void mce_intel_feature_clear(struct cpuinfo_x86 *c)
518	{
519	intel_clear_lmce();
520	}
521
522	bool intel_filter_mce(struct mce *m)
523	{
524	struct cpuinfo_x86 *c = &boot_cpu_data;
525
526	/ MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 /
527	if ((c->x86 == `6`) &&
528	((c->x86_model == INTEL_FAM6_HASWELL) \|\|
529	(c->x86_model == INTEL_FAM6_HASWELL_L) \|\|
530	(c->x86_model == INTEL_FAM6_BROADWELL) \|\|
531	(c->x86_model == INTEL_FAM6_HASWELL_G) \|\|
532	(c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
533	(m->bank == `0`) &&
534	((m->status & `0xa0000000ffffffff`) == `0x80000000000f0005`))
535	return true;
536
537	return false;
538	}
539
540	/*
541	* Check if the address reported by the CPU is in a format we can parse.
542	* It would be possible to add code for most other cases, but all would
543	* be somewhat complicated (e.g. segment offset would require an instruction
544	* parser). So only support physical addresses up to page granularity for now.
545	*/
546	bool intel_mce_usable_address(struct mce *m)
547	{
548	if (!(m->status & MCI_STATUS_MISCV))
549	return false;
550
551	if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
552	return false;
553
554	if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
555	return false;
556
557	return true;
558	}
559

source code of linux/arch/x86/kernel/cpu/mce/intel.c