1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * MCE grading rules. |
4 | * Copyright 2008, 2009 Intel Corporation. |
5 | * |
6 | * Author: Andi Kleen |
7 | */ |
8 | #include <linux/kernel.h> |
9 | #include <linux/seq_file.h> |
10 | #include <linux/init.h> |
11 | #include <linux/debugfs.h> |
12 | #include <linux/uaccess.h> |
13 | |
14 | #include <asm/mce.h> |
15 | #include <asm/intel-family.h> |
16 | #include <asm/traps.h> |
17 | #include <asm/insn.h> |
18 | #include <asm/insn-eval.h> |
19 | |
20 | #include "internal.h" |
21 | |
22 | /* |
23 | * Grade an mce by severity. In general the most severe ones are processed |
24 | * first. Since there are quite a lot of combinations test the bits in a |
25 | * table-driven way. The rules are simply processed in order, first |
26 | * match wins. |
27 | * |
28 | * Note this is only used for machine check exceptions, the corrected |
29 | * errors use much simpler rules. The exceptions still check for the corrected |
30 | * errors, but only to leave them alone for the CMCI handler (except for |
31 | * panic situations) |
32 | */ |
33 | |
34 | enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 }; |
35 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; |
36 | enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; |
37 | |
38 | static struct severity { |
39 | u64 mask; |
40 | u64 result; |
41 | unsigned char sev; |
42 | unsigned char mcgmask; |
43 | unsigned char mcgres; |
44 | unsigned char ser; |
45 | unsigned char context; |
46 | unsigned char excp; |
47 | unsigned char covered; |
48 | unsigned char cpu_model; |
49 | unsigned char cpu_minstepping; |
50 | unsigned char bank_lo, bank_hi; |
51 | char *msg; |
52 | } severities[] = { |
53 | #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } |
54 | #define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h |
55 | #define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s |
56 | #define KERNEL .context = IN_KERNEL |
57 | #define USER .context = IN_USER |
58 | #define KERNEL_RECOV .context = IN_KERNEL_RECOV |
59 | #define SER .ser = SER_REQUIRED |
60 | #define NOSER .ser = NO_SER |
61 | #define EXCP .excp = EXCP_CONTEXT |
62 | #define NOEXCP .excp = NO_EXCP |
63 | #define BITCLR(x) .mask = x, .result = 0 |
64 | #define BITSET(x) .mask = x, .result = x |
65 | #define MCGMASK(x, y) .mcgmask = x, .mcgres = y |
66 | #define MASK(x, y) .mask = x, .result = y |
67 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
68 | #define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR) |
69 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) |
70 | #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) |
71 | |
72 | MCESEV( |
73 | NO, "Invalid" , |
74 | BITCLR(MCI_STATUS_VAL) |
75 | ), |
76 | MCESEV( |
77 | NO, "Not enabled" , |
78 | EXCP, BITCLR(MCI_STATUS_EN) |
79 | ), |
80 | MCESEV( |
81 | PANIC, "Processor context corrupt" , |
82 | BITSET(MCI_STATUS_PCC) |
83 | ), |
84 | /* When MCIP is not set something is very confused */ |
85 | MCESEV( |
86 | PANIC, "MCIP not set in MCA handler" , |
87 | EXCP, MCGMASK(MCG_STATUS_MCIP, 0) |
88 | ), |
89 | /* Neither return not error IP -- no chance to recover -> PANIC */ |
90 | MCESEV( |
91 | PANIC, "Neither restart nor error IP" , |
92 | EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) |
93 | ), |
94 | MCESEV( |
95 | PANIC, "In kernel and no restart IP" , |
96 | EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) |
97 | ), |
98 | MCESEV( |
99 | PANIC, "In kernel and no restart IP" , |
100 | EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0) |
101 | ), |
102 | MCESEV( |
103 | KEEP, "Corrected error" , |
104 | NOSER, BITCLR(MCI_STATUS_UC) |
105 | ), |
106 | /* |
107 | * known AO MCACODs reported via MCE or CMC: |
108 | * |
109 | * SRAO could be signaled either via a machine check exception or |
110 | * CMCI with the corresponding bit S 1 or 0. So we don't need to |
111 | * check bit S for SRAO. |
112 | */ |
113 | MCESEV( |
114 | AO, "Action optional: memory scrubbing error" , |
115 | SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB) |
116 | ), |
117 | MCESEV( |
118 | AO, "Action optional: last level cache writeback error" , |
119 | SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) |
120 | ), |
121 | /* |
122 | * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured |
123 | * to report uncorrected errors using CMCI with a special signature. |
124 | * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported |
125 | * in one of the memory controller banks. |
126 | * Set severity to "AO" for same action as normal patrol scrub error. |
127 | */ |
128 | MCESEV( |
129 | AO, "Uncorrected Patrol Scrub Error" , |
130 | SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0), |
131 | MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18) |
132 | ), |
133 | |
134 | /* ignore OVER for UCNA */ |
135 | MCESEV( |
136 | UCNA, "Uncorrected no action required" , |
137 | SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) |
138 | ), |
139 | MCESEV( |
140 | PANIC, "Illegal combination (UCNA with AR=1)" , |
141 | SER, |
142 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR) |
143 | ), |
144 | MCESEV( |
145 | KEEP, "Non signaled machine check" , |
146 | SER, BITCLR(MCI_STATUS_S) |
147 | ), |
148 | |
149 | MCESEV( |
150 | PANIC, "Action required with lost events" , |
151 | SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) |
152 | ), |
153 | |
154 | /* known AR MCACODs: */ |
155 | #ifdef CONFIG_MEMORY_FAILURE |
156 | MCESEV( |
157 | KEEP, "Action required but unaffected thread is continuable" , |
158 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR), |
159 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) |
160 | ), |
161 | MCESEV( |
162 | AR, "Action required: data load in error recoverable area of kernel" , |
163 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
164 | KERNEL_RECOV |
165 | ), |
166 | MCESEV( |
167 | AR, "Action required: data load error in a user process" , |
168 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
169 | USER |
170 | ), |
171 | MCESEV( |
172 | AR, "Action required: instruction fetch error in a user process" , |
173 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), |
174 | USER |
175 | ), |
176 | MCESEV( |
177 | PANIC, "Data load in unrecoverable area of kernel" , |
178 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
179 | KERNEL |
180 | ), |
181 | MCESEV( |
182 | PANIC, "Instruction fetch error in kernel" , |
183 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), |
184 | KERNEL |
185 | ), |
186 | #endif |
187 | MCESEV( |
188 | PANIC, "Action required: unknown MCACOD" , |
189 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) |
190 | ), |
191 | |
192 | MCESEV( |
193 | SOME, "Action optional: unknown MCACOD" , |
194 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) |
195 | ), |
196 | MCESEV( |
197 | SOME, "Action optional with lost events" , |
198 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S) |
199 | ), |
200 | |
201 | MCESEV( |
202 | PANIC, "Overflowed uncorrected" , |
203 | BITSET(MCI_STATUS_OVER|MCI_STATUS_UC) |
204 | ), |
205 | MCESEV( |
206 | PANIC, "Uncorrected in kernel" , |
207 | BITSET(MCI_STATUS_UC), |
208 | KERNEL |
209 | ), |
210 | MCESEV( |
211 | UC, "Uncorrected" , |
212 | BITSET(MCI_STATUS_UC) |
213 | ), |
214 | MCESEV( |
215 | SOME, "No match" , |
216 | BITSET(0) |
217 | ) /* always matches. keep at end */ |
218 | }; |
219 | |
220 | #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \ |
221 | (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) |
222 | |
223 | static bool is_copy_from_user(struct pt_regs *regs) |
224 | { |
225 | u8 insn_buf[MAX_INSN_SIZE]; |
226 | unsigned long addr; |
227 | struct insn insn; |
228 | int ret; |
229 | |
230 | if (!regs) |
231 | return false; |
232 | |
233 | if (copy_from_kernel_nofault(dst: insn_buf, src: (void *)regs->ip, MAX_INSN_SIZE)) |
234 | return false; |
235 | |
236 | ret = insn_decode_kernel(&insn, insn_buf); |
237 | if (ret < 0) |
238 | return false; |
239 | |
240 | switch (insn.opcode.value) { |
241 | /* MOV mem,reg */ |
242 | case 0x8A: case 0x8B: |
243 | /* MOVZ mem,reg */ |
244 | case 0xB60F: case 0xB70F: |
245 | addr = (unsigned long)insn_get_addr_ref(insn: &insn, regs); |
246 | break; |
247 | /* REP MOVS */ |
248 | case 0xA4: case 0xA5: |
249 | addr = regs->si; |
250 | break; |
251 | default: |
252 | return false; |
253 | } |
254 | |
255 | if (fault_in_kernel_space(address: addr)) |
256 | return false; |
257 | |
258 | current->mce_vaddr = (void __user *)addr; |
259 | |
260 | return true; |
261 | } |
262 | |
263 | /* |
264 | * If mcgstatus indicated that ip/cs on the stack were |
265 | * no good, then "m->cs" will be zero and we will have |
266 | * to assume the worst case (IN_KERNEL) as we actually |
267 | * have no idea what we were executing when the machine |
268 | * check hit. |
269 | * If we do have a good "m->cs" (or a faked one in the |
270 | * case we were executing in VM86 mode) we can use it to |
271 | * distinguish an exception taken in user from from one |
272 | * taken in the kernel. |
273 | */ |
274 | static noinstr int error_context(struct mce *m, struct pt_regs *regs) |
275 | { |
276 | int fixup_type; |
277 | bool copy_user; |
278 | |
279 | if ((m->cs & 3) == 3) |
280 | return IN_USER; |
281 | |
282 | if (!mc_recoverable(m->mcgstatus)) |
283 | return IN_KERNEL; |
284 | |
285 | /* Allow instrumentation around external facilities usage. */ |
286 | instrumentation_begin(); |
287 | fixup_type = ex_get_fixup_type(ip: m->ip); |
288 | copy_user = is_copy_from_user(regs); |
289 | instrumentation_end(); |
290 | |
291 | switch (fixup_type) { |
292 | case EX_TYPE_UACCESS: |
293 | case EX_TYPE_COPY: |
294 | if (!copy_user) |
295 | return IN_KERNEL; |
296 | m->kflags |= MCE_IN_KERNEL_COPYIN; |
297 | fallthrough; |
298 | |
299 | case EX_TYPE_FAULT_MCE_SAFE: |
300 | case EX_TYPE_DEFAULT_MCE_SAFE: |
301 | m->kflags |= MCE_IN_KERNEL_RECOV; |
302 | return IN_KERNEL_RECOV; |
303 | |
304 | default: |
305 | return IN_KERNEL; |
306 | } |
307 | } |
308 | |
309 | /* See AMD PPR(s) section Machine Check Error Handling. */ |
310 | static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp) |
311 | { |
312 | char *panic_msg = NULL; |
313 | int ret; |
314 | |
315 | /* |
316 | * Default return value: Action required, the error must be handled |
317 | * immediately. |
318 | */ |
319 | ret = MCE_AR_SEVERITY; |
320 | |
321 | /* Processor Context Corrupt, no need to fumble too much, die! */ |
322 | if (m->status & MCI_STATUS_PCC) { |
323 | panic_msg = "Processor Context Corrupt" ; |
324 | ret = MCE_PANIC_SEVERITY; |
325 | goto out; |
326 | } |
327 | |
328 | if (m->status & MCI_STATUS_DEFERRED) { |
329 | ret = MCE_DEFERRED_SEVERITY; |
330 | goto out; |
331 | } |
332 | |
333 | /* |
334 | * If the UC bit is not set, the system either corrected or deferred |
335 | * the error. No action will be required after logging the error. |
336 | */ |
337 | if (!(m->status & MCI_STATUS_UC)) { |
338 | ret = MCE_KEEP_SEVERITY; |
339 | goto out; |
340 | } |
341 | |
342 | /* |
343 | * On MCA overflow, without the MCA overflow recovery feature the |
344 | * system will not be able to recover, panic. |
345 | */ |
346 | if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) { |
347 | panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery" ; |
348 | ret = MCE_PANIC_SEVERITY; |
349 | goto out; |
350 | } |
351 | |
352 | if (!mce_flags.succor) { |
353 | panic_msg = "Uncorrected error without MCA Recovery" ; |
354 | ret = MCE_PANIC_SEVERITY; |
355 | goto out; |
356 | } |
357 | |
358 | if (error_context(m, regs) == IN_KERNEL) { |
359 | panic_msg = "Uncorrected unrecoverable error in kernel context" ; |
360 | ret = MCE_PANIC_SEVERITY; |
361 | } |
362 | |
363 | out: |
364 | if (msg && panic_msg) |
365 | *msg = panic_msg; |
366 | |
367 | return ret; |
368 | } |
369 | |
370 | static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp) |
371 | { |
372 | enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); |
373 | enum context ctx = error_context(m, regs); |
374 | struct severity *s; |
375 | |
376 | for (s = severities;; s++) { |
377 | if ((m->status & s->mask) != s->result) |
378 | continue; |
379 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) |
380 | continue; |
381 | if (s->ser == SER_REQUIRED && !mca_cfg.ser) |
382 | continue; |
383 | if (s->ser == NO_SER && mca_cfg.ser) |
384 | continue; |
385 | if (s->context && ctx != s->context) |
386 | continue; |
387 | if (s->excp && excp != s->excp) |
388 | continue; |
389 | if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model) |
390 | continue; |
391 | if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping) |
392 | continue; |
393 | if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi)) |
394 | continue; |
395 | if (msg) |
396 | *msg = s->msg; |
397 | s->covered = 1; |
398 | |
399 | return s->sev; |
400 | } |
401 | } |
402 | |
403 | int noinstr mce_severity(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp) |
404 | { |
405 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || |
406 | boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) |
407 | return mce_severity_amd(m, regs, msg, is_excp); |
408 | else |
409 | return mce_severity_intel(m, regs, msg, is_excp); |
410 | } |
411 | |
412 | #ifdef CONFIG_DEBUG_FS |
413 | static void *s_start(struct seq_file *f, loff_t *pos) |
414 | { |
415 | if (*pos >= ARRAY_SIZE(severities)) |
416 | return NULL; |
417 | return &severities[*pos]; |
418 | } |
419 | |
420 | static void *s_next(struct seq_file *f, void *data, loff_t *pos) |
421 | { |
422 | if (++(*pos) >= ARRAY_SIZE(severities)) |
423 | return NULL; |
424 | return &severities[*pos]; |
425 | } |
426 | |
427 | static void s_stop(struct seq_file *f, void *data) |
428 | { |
429 | } |
430 | |
431 | static int s_show(struct seq_file *f, void *data) |
432 | { |
433 | struct severity *ser = data; |
434 | seq_printf(m: f, fmt: "%d\t%s\n" , ser->covered, ser->msg); |
435 | return 0; |
436 | } |
437 | |
438 | static const struct seq_operations severities_seq_ops = { |
439 | .start = s_start, |
440 | .next = s_next, |
441 | .stop = s_stop, |
442 | .show = s_show, |
443 | }; |
444 | |
445 | static int severities_coverage_open(struct inode *inode, struct file *file) |
446 | { |
447 | return seq_open(file, &severities_seq_ops); |
448 | } |
449 | |
450 | static ssize_t severities_coverage_write(struct file *file, |
451 | const char __user *ubuf, |
452 | size_t count, loff_t *ppos) |
453 | { |
454 | int i; |
455 | for (i = 0; i < ARRAY_SIZE(severities); i++) |
456 | severities[i].covered = 0; |
457 | return count; |
458 | } |
459 | |
460 | static const struct file_operations severities_coverage_fops = { |
461 | .open = severities_coverage_open, |
462 | .release = seq_release, |
463 | .read = seq_read, |
464 | .write = severities_coverage_write, |
465 | .llseek = seq_lseek, |
466 | }; |
467 | |
468 | static int __init severities_debugfs_init(void) |
469 | { |
470 | struct dentry *dmce; |
471 | |
472 | dmce = mce_get_debugfs_dir(); |
473 | |
474 | debugfs_create_file(name: "severities-coverage" , mode: 0444, parent: dmce, NULL, |
475 | fops: &severities_coverage_fops); |
476 | return 0; |
477 | } |
478 | late_initcall(severities_debugfs_init); |
479 | #endif /* CONFIG_DEBUG_FS */ |
480 | |