severity.c source code [linux/arch/x86/kernel/cpu/mce/severity.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* MCE grading rules.
4	* Copyright 2008, 2009 Intel Corporation.
5	*
6	* Author: Andi Kleen
7	*/
8	#include <linux/kernel.h>
9	#include <linux/seq_file.h>
10	#include <linux/init.h>
11	#include <linux/debugfs.h>
12	#include <linux/uaccess.h>
13
14	#include <asm/mce.h>
15	#include <asm/intel-family.h>
16	#include <asm/traps.h>
17	#include <asm/insn.h>
18	#include <asm/insn-eval.h>
19
20	#include "internal.h"
21
22	/*
23	* Grade an mce by severity. In general the most severe ones are processed
24	* first. Since there are quite a lot of combinations test the bits in a
25	* table-driven way. The rules are simply processed in order, first
26	* match wins.
27	*
28	* Note this is only used for machine check exceptions, the corrected
29	* errors use much simpler rules. The exceptions still check for the corrected
30	* errors, but only to leave them alone for the CMCI handler (except for
31	* panic situations)
32	*/
33
34	enum context { IN_KERNEL = `1`, IN_USER = `2`, IN_KERNEL_RECOV = `3` };
35	enum ser { SER_REQUIRED = `1`, NO_SER = `2` };
36	enum exception { EXCP_CONTEXT = `1`, NO_EXCP = `2` };
37
38	static struct severity {
39	u64 mask;
40	u64 result;
41	unsigned char sev;
42	unsigned char mcgmask;
43	unsigned char mcgres;
44	unsigned char ser;
45	unsigned char context;
46	unsigned char excp;
47	unsigned char covered;
48	unsigned char cpu_model;
49	unsigned char cpu_minstepping;
50	unsigned char bank_lo, bank_hi;
51	char *msg;
52	} severities[] = {
53	#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
54	#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
55	#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
56	#define KERNEL .context = IN_KERNEL
57	#define USER .context = IN_USER
58	#define KERNEL_RECOV .context = IN_KERNEL_RECOV
59	#define SER .ser = SER_REQUIRED
60	#define NOSER .ser = NO_SER
61	#define EXCP .excp = EXCP_CONTEXT
62	#define NOEXCP .excp = NO_EXCP
63	#define BITCLR(x) .mask = x, .result = 0
64	#define BITSET(x) .mask = x, .result = x
65	#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
66	#define MASK(x, y) .mask = x, .result = y
67	#define MCI_UC_S (MCI_STATUS_UC\|MCI_STATUS_S)
68	#define MCI_UC_AR (MCI_STATUS_UC\|MCI_STATUS_AR)
69	#define MCI_UC_SAR (MCI_STATUS_UC\|MCI_STATUS_S\|MCI_STATUS_AR)
70	#define MCI_ADDR (MCI_STATUS_ADDRV\|MCI_STATUS_MISCV)
71
72	MCESEV(
73	NO, "Invalid",
74	BITCLR(MCI_STATUS_VAL)
75	),
76	MCESEV(
77	NO, "Not enabled",
78	EXCP, BITCLR(MCI_STATUS_EN)
79	),
80	MCESEV(
81	PANIC, "Processor context corrupt",
82	BITSET(MCI_STATUS_PCC)
83	),
84	/ When MCIP is not set something is very confused /
85	MCESEV(
86	PANIC, "MCIP not set in MCA handler",
87	EXCP, MCGMASK(MCG_STATUS_MCIP, `0`)
88	),
89	/ Neither return not error IP -- no chance to recover -> PANIC /
90	MCESEV(
91	PANIC, "Neither restart nor error IP",
92	EXCP, MCGMASK(MCG_STATUS_RIPV\|MCG_STATUS_EIPV, `0`)
93	),
94	MCESEV(
95	PANIC, "In kernel and no restart IP",
96	EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, `0`)
97	),
98	MCESEV(
99	PANIC, "In kernel and no restart IP",
100	EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, `0`)
101	),
102	MCESEV(
103	KEEP, "Corrected error",
104	NOSER, BITCLR(MCI_STATUS_UC)
105	),
106	/*
107	* known AO MCACODs reported via MCE or CMC:
108	*
109	* SRAO could be signaled either via a machine check exception or
110	* CMCI with the corresponding bit S 1 or 0. So we don't need to
111	* check bit S for SRAO.
112	*/
113	MCESEV(
114	AO, "Action optional: memory scrubbing error",
115	SER, MASK(MCI_UC_AR\|MCACOD_SCRUBMSK, MCI_STATUS_UC\|MCACOD_SCRUB)
116	),
117	MCESEV(
118	AO, "Action optional: last level cache writeback error",
119	SER, MASK(MCI_UC_AR\|MCACOD, MCI_STATUS_UC\|MCACOD_L3WB)
120	),
121	/*
122	* Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
123	* to report uncorrected errors using CMCI with a special signature.
124	* UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
125	* in one of the memory controller banks.
126	* Set severity to "AO" for same action as normal patrol scrub error.
127	*/
128	MCESEV(
129	AO, "Uncorrected Patrol Scrub Error",
130	SER, MASK(MCI_STATUS_UC\|MCI_ADDR\|`0xffffeff0`, MCI_ADDR\|`0x001000c0`),
131	MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, `4`), BANK_RANGE(`13`, `18`)
132	),
133
134	/ ignore OVER for UCNA /
135	MCESEV(
136	UCNA, "Uncorrected no action required",
137	SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
138	),
139	MCESEV(
140	PANIC, "Illegal combination (UCNA with AR=1)",
141	SER,
142	MASK(MCI_STATUS_OVER\|MCI_UC_SAR, MCI_STATUS_UC\|MCI_STATUS_AR)
143	),
144	MCESEV(
145	KEEP, "Non signaled machine check",
146	SER, BITCLR(MCI_STATUS_S)
147	),
148
149	MCESEV(
150	PANIC, "Action required with lost events",
151	SER, BITSET(MCI_STATUS_OVER\|MCI_UC_SAR)
152	),
153
154	/ known AR MCACODs: /
155	#ifdef CONFIG_MEMORY_FAILURE
156	MCESEV(
157	KEEP, "Action required but unaffected thread is continuable",
158	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR, MCI_UC_SAR\|MCI_ADDR),
159	MCGMASK(MCG_STATUS_RIPV\|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
160	),
161	MCESEV(
162	AR, "Action required: data load in error recoverable area of kernel",
163	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),
164	KERNEL_RECOV
165	),
166	MCESEV(
167	AR, "Action required: data load error in a user process",
168	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),
169	USER
170	),
171	MCESEV(
172	AR, "Action required: instruction fetch error in a user process",
173	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_INSTR),
174	USER
175	),
176	MCESEV(
177	PANIC, "Data load in unrecoverable area of kernel",
178	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),
179	KERNEL
180	),
181	MCESEV(
182	PANIC, "Instruction fetch error in kernel",
183	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_INSTR),
184	KERNEL
185	),
186	#endif
187	MCESEV(
188	PANIC, "Action required: unknown MCACOD",
189	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR, MCI_UC_SAR)
190	),
191
192	MCESEV(
193	SOME, "Action optional: unknown MCACOD",
194	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR, MCI_UC_S)
195	),
196	MCESEV(
197	SOME, "Action optional with lost events",
198	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR, MCI_STATUS_OVER\|MCI_UC_S)
199	),
200
201	MCESEV(
202	PANIC, "Overflowed uncorrected",
203	BITSET(MCI_STATUS_OVER\|MCI_STATUS_UC)
204	),
205	MCESEV(
206	PANIC, "Uncorrected in kernel",
207	BITSET(MCI_STATUS_UC),
208	KERNEL
209	),
210	MCESEV(
211	UC, "Uncorrected",
212	BITSET(MCI_STATUS_UC)
213	),
214	MCESEV(
215	SOME, "No match",
216	BITSET(`0`)
217	) / always matches. keep at end /
218	};
219
220	#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV\|MCG_STATUS_EIPV)) == \
221	(MCG_STATUS_RIPV\|MCG_STATUS_EIPV))
222
223	static bool is_copy_from_user(struct pt_regs *regs)
224	{
225	u8 insn_buf[MAX_INSN_SIZE];
226	unsigned long addr;
227	struct insn insn;
228	int ret;
229
230	if (!regs)
231	return false;
232
233	if (copy_from_kernel_nofault(dst: insn_buf, src: (void *)regs->ip, MAX_INSN_SIZE))
234	return false;
235
236	ret = insn_decode_kernel(&insn, insn_buf);
237	if (ret < `0`)
238	return false;
239
240	switch (insn.opcode.value) {
241	/ MOV mem,reg /
242	case `0x8A`: case `0x8B`:
243	/ MOVZ mem,reg /
244	case `0xB60F`: case `0xB70F`:
245	addr = (unsigned long)insn_get_addr_ref(insn: &insn, regs);
246	break;
247	/ REP MOVS /
248	case `0xA4`: case `0xA5`:
249	addr = regs->si;
250	break;
251	default:
252	return false;
253	}
254
255	if (fault_in_kernel_space(address: addr))
256	return false;
257
258	current->mce_vaddr = (void __user *)addr;
259
260	return true;
261	}
262
263	/*
264	* If mcgstatus indicated that ip/cs on the stack were
265	* no good, then "m->cs" will be zero and we will have
266	* to assume the worst case (IN_KERNEL) as we actually
267	* have no idea what we were executing when the machine
268	* check hit.
269	* If we do have a good "m->cs" (or a faked one in the
270	* case we were executing in VM86 mode) we can use it to
271	* distinguish an exception taken in user from from one
272	* taken in the kernel.
273	*/
274	static noinstr int error_context(struct mce m, struct* pt_regs *regs)
275	{
276	int fixup_type;
277	bool copy_user;
278
279	if ((m->cs & `3`) == `3`)
280	return IN_USER;
281
282	if (!mc_recoverable(m->mcgstatus))
283	return IN_KERNEL;
284
285	/ Allow instrumentation around external facilities usage. /
286	instrumentation_begin();
287	fixup_type = ex_get_fixup_type(ip: m->ip);
288	copy_user = is_copy_from_user(regs);
289	instrumentation_end();
290
291	switch (fixup_type) {
292	case EX_TYPE_UACCESS:
293	case EX_TYPE_COPY:
294	if (!copy_user)
295	return IN_KERNEL;
296	m->kflags \|= MCE_IN_KERNEL_COPYIN;
297	fallthrough;
298
299	case EX_TYPE_FAULT_MCE_SAFE:
300	case EX_TYPE_DEFAULT_MCE_SAFE:
301	m->kflags \|= MCE_IN_KERNEL_RECOV;
302	return IN_KERNEL_RECOV;
303
304	default:
305	return IN_KERNEL;
306	}
307	}
308
309	/ See AMD PPR(s) section Machine Check Error Handling. /
310	static noinstr int mce_severity_amd(struct mce m, struct* pt_regs regs, char* **msg, bool is_excp)
311	{
312	char *panic_msg = NULL;
313	int ret;
314
315	/*
316	* Default return value: Action required, the error must be handled
317	* immediately.
318	*/
319	ret = MCE_AR_SEVERITY;
320
321	/ Processor Context Corrupt, no need to fumble too much, die! /
322	if (m->status & MCI_STATUS_PCC) {
323	panic_msg = "Processor Context Corrupt";
324	ret = MCE_PANIC_SEVERITY;
325	goto out;
326	}
327
328	if (m->status & MCI_STATUS_DEFERRED) {
329	ret = MCE_DEFERRED_SEVERITY;
330	goto out;
331	}
332
333	/*
334	* If the UC bit is not set, the system either corrected or deferred
335	* the error. No action will be required after logging the error.
336	*/
337	if (!(m->status & MCI_STATUS_UC)) {
338	ret = MCE_KEEP_SEVERITY;
339	goto out;
340	}
341
342	/*
343	* On MCA overflow, without the MCA overflow recovery feature the
344	* system will not be able to recover, panic.
345	*/
346	if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
347	panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
348	ret = MCE_PANIC_SEVERITY;
349	goto out;
350	}
351
352	if (!mce_flags.succor) {
353	panic_msg = "Uncorrected error without MCA Recovery";
354	ret = MCE_PANIC_SEVERITY;
355	goto out;
356	}
357
358	if (error_context(m, regs) == IN_KERNEL) {
359	panic_msg = "Uncorrected unrecoverable error in kernel context";
360	ret = MCE_PANIC_SEVERITY;
361	}
362
363	out:
364	if (msg && panic_msg)
365	*msg = panic_msg;
366
367	return ret;
368	}
369
370	static noinstr int mce_severity_intel(struct mce m, struct* pt_regs regs, char* **msg, bool is_excp)
371	{
372	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
373	enum context ctx = error_context(m, regs);
374	struct severity *s;
375
376	for (s = severities;; s++) {
377	if ((m->status & s->mask) != s->result)
378	continue;
379	if ((m->mcgstatus & s->mcgmask) != s->mcgres)
380	continue;
381	if (s->ser == SER_REQUIRED && !mca_cfg.ser)
382	continue;
383	if (s->ser == NO_SER && mca_cfg.ser)
384	continue;
385	if (s->context && ctx != s->context)
386	continue;
387	if (s->excp && excp != s->excp)
388	continue;
389	if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
390	continue;
391	if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
392	continue;
393	if (s->bank_lo && (m->bank < s->bank_lo \|\| m->bank > s->bank_hi))
394	continue;
395	if (msg)
396	*msg = s->msg;
397	s->covered = `1`;
398
399	return s->sev;
400	}
401	}
402
403	int noinstr mce_severity(struct mce m, struct* pt_regs regs, char* **msg, bool is_excp)
404	{
405	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD \|\|
406	boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
407	return mce_severity_amd(m, regs, msg, is_excp);
408	else
409	return mce_severity_intel(m, regs, msg, is_excp);
410	}
411
412	#ifdef CONFIG_DEBUG_FS
413	static void s_start(struct* seq_file f, loff_t pos)
414	{
415	if (*pos >= ARRAY_SIZE(severities))
416	return NULL;
417	return &severities[*pos];
418	}
419
420	static void s_next(struct* seq_file f, void* data, loff_t pos)
421	{
422	if (++(*pos) >= ARRAY_SIZE(severities))
423	return NULL;
424	return &severities[*pos];
425	}
426
427	static void s_stop(struct seq_file f, void* *data)
428	{
429	}
430
431	static int s_show(struct seq_file f, void* *data)
432	{
433	struct severity *ser = data;
434	seq_printf(m: f, fmt: "%d\t%s\n", ser->covered, ser->msg);
435	return `0`;
436	}
437
438	static const struct seq_operations severities_seq_ops = {
439	.start = s_start,
440	.next = s_next,
441	.stop = s_stop,
442	.show = s_show,
443	};
444
445	static int severities_coverage_open(struct inode inode, struct* file *file)
446	{
447	return seq_open(file, &severities_seq_ops);
448	}
449
450	static ssize_t severities_coverage_write(struct file *file,
451	const char __user *ubuf,
452	size_t count, loff_t *ppos)
453	{
454	int i;
455	for (i = `0`; i < ARRAY_SIZE(severities); i++)
456	severities[i].covered = `0`;
457	return count;
458	}
459
460	static const struct file_operations severities_coverage_fops = {
461	.open = severities_coverage_open,
462	.release = seq_release,
463	.read = seq_read,
464	.write = severities_coverage_write,
465	.llseek = seq_lseek,
466	};
467
468	static int __init severities_debugfs_init(void)
469	{
470	struct dentry *dmce;
471
472	dmce = mce_get_debugfs_dir();
473
474	debugfs_create_file(name: "severities-coverage", mode: `0444`, parent: dmce, NULL,
475	fops: &severities_coverage_fops);
476	return `0`;
477	}
478	late_initcall(severities_debugfs_init);
479	#endif /* CONFIG_DEBUG_FS */
480

source code of linux/arch/x86/kernel/cpu/mce/severity.c