1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * /dev/mcelog driver |
4 | * |
5 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. |
6 | * Rest from unknown author(s). |
7 | * 2004 Andi Kleen. Rewrote most of it. |
8 | * Copyright 2008 Intel Corporation |
9 | * Author: Andi Kleen |
10 | */ |
11 | |
12 | #include <linux/miscdevice.h> |
13 | #include <linux/slab.h> |
14 | #include <linux/kmod.h> |
15 | #include <linux/poll.h> |
16 | |
17 | #include "internal.h" |
18 | |
19 | static BLOCKING_NOTIFIER_HEAD(mce_injector_chain); |
20 | |
21 | static DEFINE_MUTEX(mce_chrdev_read_mutex); |
22 | |
23 | static char mce_helper[128]; |
24 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
25 | |
26 | /* |
27 | * Lockless MCE logging infrastructure. |
28 | * This avoids deadlocks on printk locks without having to break locks. Also |
29 | * separate MCEs from kernel messages to avoid bogus bug reports. |
30 | */ |
31 | |
32 | static struct mce_log_buffer *mcelog; |
33 | |
34 | static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); |
35 | |
36 | static int dev_mce_log(struct notifier_block *nb, unsigned long val, |
37 | void *data) |
38 | { |
39 | struct mce *mce = (struct mce *)data; |
40 | unsigned int entry; |
41 | |
42 | if (mce->kflags & MCE_HANDLED_CEC) |
43 | return NOTIFY_DONE; |
44 | |
45 | mutex_lock(&mce_chrdev_read_mutex); |
46 | |
47 | entry = mcelog->next; |
48 | |
49 | /* |
50 | * When the buffer fills up discard new entries. Assume that the |
51 | * earlier errors are the more interesting ones: |
52 | */ |
53 | if (entry >= mcelog->len) { |
54 | set_bit(MCE_OVERFLOW, addr: (unsigned long *)&mcelog->flags); |
55 | goto unlock; |
56 | } |
57 | |
58 | mcelog->next = entry + 1; |
59 | |
60 | memcpy(mcelog->entry + entry, mce, sizeof(struct mce)); |
61 | mcelog->entry[entry].finished = 1; |
62 | mcelog->entry[entry].kflags = 0; |
63 | |
64 | /* wake processes polling /dev/mcelog */ |
65 | wake_up_interruptible(&mce_chrdev_wait); |
66 | |
67 | unlock: |
68 | mutex_unlock(lock: &mce_chrdev_read_mutex); |
69 | |
70 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) |
71 | mce->kflags |= MCE_HANDLED_MCELOG; |
72 | |
73 | return NOTIFY_OK; |
74 | } |
75 | |
76 | static struct notifier_block dev_mcelog_nb = { |
77 | .notifier_call = dev_mce_log, |
78 | .priority = MCE_PRIO_MCELOG, |
79 | }; |
80 | |
81 | static void mce_do_trigger(struct work_struct *work) |
82 | { |
83 | call_usermodehelper(path: mce_helper, argv: mce_helper_argv, NULL, UMH_NO_WAIT); |
84 | } |
85 | |
86 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
87 | |
88 | |
89 | void mce_work_trigger(void) |
90 | { |
91 | if (mce_helper[0]) |
92 | schedule_work(work: &mce_trigger_work); |
93 | } |
94 | |
95 | static ssize_t |
96 | show_trigger(struct device *s, struct device_attribute *attr, char *buf) |
97 | { |
98 | strcpy(p: buf, q: mce_helper); |
99 | strcat(p: buf, q: "\n" ); |
100 | return strlen(mce_helper) + 1; |
101 | } |
102 | |
103 | static ssize_t set_trigger(struct device *s, struct device_attribute *attr, |
104 | const char *buf, size_t siz) |
105 | { |
106 | char *p; |
107 | |
108 | strscpy(p: mce_helper, q: buf, size: sizeof(mce_helper)); |
109 | p = strchr(mce_helper, '\n'); |
110 | |
111 | if (p) |
112 | *p = 0; |
113 | |
114 | return strlen(mce_helper) + !!p; |
115 | } |
116 | |
117 | DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); |
118 | |
119 | /* |
120 | * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. |
121 | */ |
122 | |
123 | static DEFINE_SPINLOCK(mce_chrdev_state_lock); |
124 | static int mce_chrdev_open_count; /* #times opened */ |
125 | static int mce_chrdev_open_exclu; /* already open exclusive? */ |
126 | |
127 | static int mce_chrdev_open(struct inode *inode, struct file *file) |
128 | { |
129 | spin_lock(lock: &mce_chrdev_state_lock); |
130 | |
131 | if (mce_chrdev_open_exclu || |
132 | (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { |
133 | spin_unlock(lock: &mce_chrdev_state_lock); |
134 | |
135 | return -EBUSY; |
136 | } |
137 | |
138 | if (file->f_flags & O_EXCL) |
139 | mce_chrdev_open_exclu = 1; |
140 | mce_chrdev_open_count++; |
141 | |
142 | spin_unlock(lock: &mce_chrdev_state_lock); |
143 | |
144 | return nonseekable_open(inode, filp: file); |
145 | } |
146 | |
147 | static int mce_chrdev_release(struct inode *inode, struct file *file) |
148 | { |
149 | spin_lock(lock: &mce_chrdev_state_lock); |
150 | |
151 | mce_chrdev_open_count--; |
152 | mce_chrdev_open_exclu = 0; |
153 | |
154 | spin_unlock(lock: &mce_chrdev_state_lock); |
155 | |
156 | return 0; |
157 | } |
158 | |
159 | static int mce_apei_read_done; |
160 | |
161 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ |
162 | static int __mce_read_apei(char __user **ubuf, size_t usize) |
163 | { |
164 | int rc; |
165 | u64 record_id; |
166 | struct mce m; |
167 | |
168 | if (usize < sizeof(struct mce)) |
169 | return -EINVAL; |
170 | |
171 | rc = apei_read_mce(m: &m, record_id: &record_id); |
172 | /* Error or no more MCE record */ |
173 | if (rc <= 0) { |
174 | mce_apei_read_done = 1; |
175 | /* |
176 | * When ERST is disabled, mce_chrdev_read() should return |
177 | * "no record" instead of "no device." |
178 | */ |
179 | if (rc == -ENODEV) |
180 | return 0; |
181 | return rc; |
182 | } |
183 | rc = -EFAULT; |
184 | if (copy_to_user(to: *ubuf, from: &m, n: sizeof(struct mce))) |
185 | return rc; |
186 | /* |
187 | * In fact, we should have cleared the record after that has |
188 | * been flushed to the disk or sent to network in |
189 | * /sbin/mcelog, but we have no interface to support that now, |
190 | * so just clear it to avoid duplication. |
191 | */ |
192 | rc = apei_clear_mce(record_id); |
193 | if (rc) { |
194 | mce_apei_read_done = 1; |
195 | return rc; |
196 | } |
197 | *ubuf += sizeof(struct mce); |
198 | |
199 | return 0; |
200 | } |
201 | |
202 | static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, |
203 | size_t usize, loff_t *off) |
204 | { |
205 | char __user *buf = ubuf; |
206 | unsigned next; |
207 | int i, err; |
208 | |
209 | mutex_lock(&mce_chrdev_read_mutex); |
210 | |
211 | if (!mce_apei_read_done) { |
212 | err = __mce_read_apei(ubuf: &buf, usize); |
213 | if (err || buf != ubuf) |
214 | goto out; |
215 | } |
216 | |
217 | /* Only supports full reads right now */ |
218 | err = -EINVAL; |
219 | if (*off != 0 || usize < mcelog->len * sizeof(struct mce)) |
220 | goto out; |
221 | |
222 | next = mcelog->next; |
223 | err = 0; |
224 | |
225 | for (i = 0; i < next; i++) { |
226 | struct mce *m = &mcelog->entry[i]; |
227 | |
228 | err |= copy_to_user(to: buf, from: m, n: sizeof(*m)); |
229 | buf += sizeof(*m); |
230 | } |
231 | |
232 | memset(mcelog->entry, 0, next * sizeof(struct mce)); |
233 | mcelog->next = 0; |
234 | |
235 | if (err) |
236 | err = -EFAULT; |
237 | |
238 | out: |
239 | mutex_unlock(lock: &mce_chrdev_read_mutex); |
240 | |
241 | return err ? err : buf - ubuf; |
242 | } |
243 | |
244 | static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) |
245 | { |
246 | poll_wait(filp: file, wait_address: &mce_chrdev_wait, p: wait); |
247 | if (READ_ONCE(mcelog->next)) |
248 | return EPOLLIN | EPOLLRDNORM; |
249 | if (!mce_apei_read_done && apei_check_mce()) |
250 | return EPOLLIN | EPOLLRDNORM; |
251 | return 0; |
252 | } |
253 | |
254 | static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, |
255 | unsigned long arg) |
256 | { |
257 | int __user *p = (int __user *)arg; |
258 | |
259 | if (!capable(CAP_SYS_ADMIN)) |
260 | return -EPERM; |
261 | |
262 | switch (cmd) { |
263 | case MCE_GET_RECORD_LEN: |
264 | return put_user(sizeof(struct mce), p); |
265 | case MCE_GET_LOG_LEN: |
266 | return put_user(mcelog->len, p); |
267 | case MCE_GETCLEAR_FLAGS: { |
268 | unsigned flags; |
269 | |
270 | do { |
271 | flags = mcelog->flags; |
272 | } while (cmpxchg(&mcelog->flags, flags, 0) != flags); |
273 | |
274 | return put_user(flags, p); |
275 | } |
276 | default: |
277 | return -ENOTTY; |
278 | } |
279 | } |
280 | |
281 | void mce_register_injector_chain(struct notifier_block *nb) |
282 | { |
283 | blocking_notifier_chain_register(nh: &mce_injector_chain, nb); |
284 | } |
285 | EXPORT_SYMBOL_GPL(mce_register_injector_chain); |
286 | |
287 | void mce_unregister_injector_chain(struct notifier_block *nb) |
288 | { |
289 | blocking_notifier_chain_unregister(nh: &mce_injector_chain, nb); |
290 | } |
291 | EXPORT_SYMBOL_GPL(mce_unregister_injector_chain); |
292 | |
293 | static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, |
294 | size_t usize, loff_t *off) |
295 | { |
296 | struct mce m; |
297 | |
298 | if (!capable(CAP_SYS_ADMIN)) |
299 | return -EPERM; |
300 | /* |
301 | * There are some cases where real MSR reads could slip |
302 | * through. |
303 | */ |
304 | if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) |
305 | return -EIO; |
306 | |
307 | if ((unsigned long)usize > sizeof(struct mce)) |
308 | usize = sizeof(struct mce); |
309 | if (copy_from_user(to: &m, from: ubuf, n: usize)) |
310 | return -EFAULT; |
311 | |
312 | if (m.extcpu >= num_possible_cpus() || !cpu_online(cpu: m.extcpu)) |
313 | return -EINVAL; |
314 | |
315 | /* |
316 | * Need to give user space some time to set everything up, |
317 | * so do it a jiffie or two later everywhere. |
318 | */ |
319 | schedule_timeout(timeout: 2); |
320 | |
321 | blocking_notifier_call_chain(nh: &mce_injector_chain, val: 0, v: &m); |
322 | |
323 | return usize; |
324 | } |
325 | |
326 | static const struct file_operations mce_chrdev_ops = { |
327 | .open = mce_chrdev_open, |
328 | .release = mce_chrdev_release, |
329 | .read = mce_chrdev_read, |
330 | .write = mce_chrdev_write, |
331 | .poll = mce_chrdev_poll, |
332 | .unlocked_ioctl = mce_chrdev_ioctl, |
333 | .compat_ioctl = compat_ptr_ioctl, |
334 | .llseek = no_llseek, |
335 | }; |
336 | |
337 | static struct miscdevice mce_chrdev_device = { |
338 | MISC_MCELOG_MINOR, |
339 | "mcelog" , |
340 | &mce_chrdev_ops, |
341 | }; |
342 | |
343 | static __init int dev_mcelog_init_device(void) |
344 | { |
345 | int mce_log_len; |
346 | int err; |
347 | |
348 | mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus()); |
349 | mcelog = kzalloc(struct_size(mcelog, entry, mce_log_len), GFP_KERNEL); |
350 | if (!mcelog) |
351 | return -ENOMEM; |
352 | |
353 | memcpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature)); |
354 | mcelog->len = mce_log_len; |
355 | mcelog->recordlen = sizeof(struct mce); |
356 | |
357 | /* register character device /dev/mcelog */ |
358 | err = misc_register(misc: &mce_chrdev_device); |
359 | if (err) { |
360 | if (err == -EBUSY) |
361 | /* Xen dom0 might have registered the device already. */ |
362 | pr_info("Unable to init device /dev/mcelog, already registered" ); |
363 | else |
364 | pr_err("Unable to init device /dev/mcelog (rc: %d)\n" , err); |
365 | |
366 | kfree(objp: mcelog); |
367 | return err; |
368 | } |
369 | |
370 | mce_register_decode_chain(nb: &dev_mcelog_nb); |
371 | return 0; |
372 | } |
373 | device_initcall_sync(dev_mcelog_init_device); |
374 | |