1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * CPU Microcode Update Driver for Linux |
4 | * |
5 | * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com> |
6 | * 2006 Shaohua Li <shaohua.li@intel.com> |
7 | * 2013-2016 Borislav Petkov <bp@alien8.de> |
8 | * |
9 | * X86 CPU microcode early update for Linux: |
10 | * |
11 | * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> |
12 | * H Peter Anvin" <hpa@zytor.com> |
13 | * (C) 2015 Borislav Petkov <bp@alien8.de> |
14 | * |
15 | * This driver allows to upgrade microcode on x86 processors. |
16 | */ |
17 | |
18 | #define pr_fmt(fmt) "microcode: " fmt |
19 | |
20 | #include <linux/platform_device.h> |
21 | #include <linux/stop_machine.h> |
22 | #include <linux/syscore_ops.h> |
23 | #include <linux/miscdevice.h> |
24 | #include <linux/capability.h> |
25 | #include <linux/firmware.h> |
26 | #include <linux/cpumask.h> |
27 | #include <linux/kernel.h> |
28 | #include <linux/delay.h> |
29 | #include <linux/mutex.h> |
30 | #include <linux/cpu.h> |
31 | #include <linux/nmi.h> |
32 | #include <linux/fs.h> |
33 | #include <linux/mm.h> |
34 | |
35 | #include <asm/apic.h> |
36 | #include <asm/cpu_device_id.h> |
37 | #include <asm/perf_event.h> |
38 | #include <asm/processor.h> |
39 | #include <asm/cmdline.h> |
40 | #include <asm/setup.h> |
41 | |
42 | #include "internal.h" |
43 | |
44 | static struct microcode_ops *microcode_ops; |
45 | bool dis_ucode_ldr = true; |
46 | |
47 | bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); |
48 | module_param(force_minrev, bool, S_IRUSR | S_IWUSR); |
49 | |
50 | /* |
51 | * Synchronization. |
52 | * |
53 | * All non cpu-hotplug-callback call sites use: |
54 | * |
55 | * - cpus_read_lock/unlock() to synchronize with |
56 | * the cpu-hotplug-callback call sites. |
57 | * |
58 | * We guarantee that only a single cpu is being |
59 | * updated at any particular moment of time. |
60 | */ |
61 | struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; |
62 | |
63 | struct cpu_info_ctx { |
64 | struct cpu_signature *cpu_sig; |
65 | int err; |
66 | }; |
67 | |
68 | /* |
69 | * Those patch levels cannot be updated to newer ones and thus should be final. |
70 | */ |
71 | static u32 final_levels[] = { |
72 | 0x01000098, |
73 | 0x0100009f, |
74 | 0x010000af, |
75 | 0, /* T-101 terminator */ |
76 | }; |
77 | |
78 | struct early_load_data early_data; |
79 | |
80 | /* |
81 | * Check the current patch level on this CPU. |
82 | * |
83 | * Returns: |
84 | * - true: if update should stop |
85 | * - false: otherwise |
86 | */ |
87 | static bool amd_check_current_patch_level(void) |
88 | { |
89 | u32 lvl, dummy, i; |
90 | u32 *levels; |
91 | |
92 | native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); |
93 | |
94 | levels = final_levels; |
95 | |
96 | for (i = 0; levels[i]; i++) { |
97 | if (lvl == levels[i]) |
98 | return true; |
99 | } |
100 | return false; |
101 | } |
102 | |
103 | static bool __init check_loader_disabled_bsp(void) |
104 | { |
105 | static const char *__dis_opt_str = "dis_ucode_ldr" ; |
106 | const char *cmdline = boot_command_line; |
107 | const char *option = __dis_opt_str; |
108 | |
109 | /* |
110 | * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not |
111 | * completely accurate as xen pv guests don't see that CPUID bit set but |
112 | * that's good enough as they don't land on the BSP path anyway. |
113 | */ |
114 | if (native_cpuid_ecx(op: 1) & BIT(31)) |
115 | return true; |
116 | |
117 | if (x86_cpuid_vendor() == X86_VENDOR_AMD) { |
118 | if (amd_check_current_patch_level()) |
119 | return true; |
120 | } |
121 | |
122 | if (cmdline_find_option_bool(cmdline_ptr: cmdline, option) <= 0) |
123 | dis_ucode_ldr = false; |
124 | |
125 | return dis_ucode_ldr; |
126 | } |
127 | |
128 | void __init load_ucode_bsp(void) |
129 | { |
130 | unsigned int cpuid_1_eax; |
131 | bool intel = true; |
132 | |
133 | if (!have_cpuid_p()) |
134 | return; |
135 | |
136 | cpuid_1_eax = native_cpuid_eax(op: 1); |
137 | |
138 | switch (x86_cpuid_vendor()) { |
139 | case X86_VENDOR_INTEL: |
140 | if (x86_family(sig: cpuid_1_eax) < 6) |
141 | return; |
142 | break; |
143 | |
144 | case X86_VENDOR_AMD: |
145 | if (x86_family(sig: cpuid_1_eax) < 0x10) |
146 | return; |
147 | intel = false; |
148 | break; |
149 | |
150 | default: |
151 | return; |
152 | } |
153 | |
154 | if (check_loader_disabled_bsp()) |
155 | return; |
156 | |
157 | if (intel) |
158 | load_ucode_intel_bsp(ed: &early_data); |
159 | else |
160 | load_ucode_amd_bsp(ed: &early_data, family: cpuid_1_eax); |
161 | } |
162 | |
163 | void load_ucode_ap(void) |
164 | { |
165 | unsigned int cpuid_1_eax; |
166 | |
167 | if (dis_ucode_ldr) |
168 | return; |
169 | |
170 | cpuid_1_eax = native_cpuid_eax(op: 1); |
171 | |
172 | switch (x86_cpuid_vendor()) { |
173 | case X86_VENDOR_INTEL: |
174 | if (x86_family(sig: cpuid_1_eax) >= 6) |
175 | load_ucode_intel_ap(); |
176 | break; |
177 | case X86_VENDOR_AMD: |
178 | if (x86_family(sig: cpuid_1_eax) >= 0x10) |
179 | load_ucode_amd_ap(family: cpuid_1_eax); |
180 | break; |
181 | default: |
182 | break; |
183 | } |
184 | } |
185 | |
186 | struct cpio_data __init find_microcode_in_initrd(const char *path) |
187 | { |
188 | #ifdef CONFIG_BLK_DEV_INITRD |
189 | unsigned long start = 0; |
190 | size_t size; |
191 | |
192 | #ifdef CONFIG_X86_32 |
193 | size = boot_params.hdr.ramdisk_size; |
194 | /* Early load on BSP has a temporary mapping. */ |
195 | if (size) |
196 | start = initrd_start_early; |
197 | |
198 | #else /* CONFIG_X86_64 */ |
199 | size = (unsigned long)boot_params.ext_ramdisk_size << 32; |
200 | size |= boot_params.hdr.ramdisk_size; |
201 | |
202 | if (size) { |
203 | start = (unsigned long)boot_params.ext_ramdisk_image << 32; |
204 | start |= boot_params.hdr.ramdisk_image; |
205 | start += PAGE_OFFSET; |
206 | } |
207 | #endif |
208 | |
209 | /* |
210 | * Fixup the start address: after reserve_initrd() runs, initrd_start |
211 | * has the virtual address of the beginning of the initrd. It also |
212 | * possibly relocates the ramdisk. In either case, initrd_start contains |
213 | * the updated address so use that instead. |
214 | */ |
215 | if (initrd_start) |
216 | start = initrd_start; |
217 | |
218 | return find_cpio_data(path, data: (void *)start, len: size, NULL); |
219 | #else /* !CONFIG_BLK_DEV_INITRD */ |
220 | return (struct cpio_data){ NULL, 0, "" }; |
221 | #endif |
222 | } |
223 | |
224 | static void reload_early_microcode(unsigned int cpu) |
225 | { |
226 | int vendor, family; |
227 | |
228 | vendor = x86_cpuid_vendor(); |
229 | family = x86_cpuid_family(); |
230 | |
231 | switch (vendor) { |
232 | case X86_VENDOR_INTEL: |
233 | if (family >= 6) |
234 | reload_ucode_intel(); |
235 | break; |
236 | case X86_VENDOR_AMD: |
237 | if (family >= 0x10) |
238 | reload_ucode_amd(cpu); |
239 | break; |
240 | default: |
241 | break; |
242 | } |
243 | } |
244 | |
245 | /* fake device for request_firmware */ |
246 | static struct platform_device *microcode_pdev; |
247 | |
248 | #ifdef CONFIG_MICROCODE_LATE_LOADING |
249 | /* |
250 | * Late loading dance. Why the heavy-handed stomp_machine effort? |
251 | * |
252 | * - HT siblings must be idle and not execute other code while the other sibling |
253 | * is loading microcode in order to avoid any negative interactions caused by |
254 | * the loading. |
255 | * |
256 | * - In addition, microcode update on the cores must be serialized until this |
257 | * requirement can be relaxed in the future. Right now, this is conservative |
258 | * and good. |
259 | */ |
260 | enum sibling_ctrl { |
261 | /* Spinwait with timeout */ |
262 | SCTRL_WAIT, |
263 | /* Invoke the microcode_apply() callback */ |
264 | SCTRL_APPLY, |
265 | /* Proceed without invoking the microcode_apply() callback */ |
266 | SCTRL_DONE, |
267 | }; |
268 | |
269 | struct microcode_ctrl { |
270 | enum sibling_ctrl ctrl; |
271 | enum ucode_state result; |
272 | unsigned int ctrl_cpu; |
273 | bool nmi_enabled; |
274 | }; |
275 | |
276 | DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable); |
277 | static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl); |
278 | static atomic_t late_cpus_in, offline_in_nmi; |
279 | static unsigned int loops_per_usec; |
280 | static cpumask_t cpu_offline_mask; |
281 | |
282 | static noinstr bool wait_for_cpus(atomic_t *cnt) |
283 | { |
284 | unsigned int timeout, loops; |
285 | |
286 | WARN_ON_ONCE(raw_atomic_dec_return(cnt) < 0); |
287 | |
288 | for (timeout = 0; timeout < USEC_PER_SEC; timeout++) { |
289 | if (!raw_atomic_read(v: cnt)) |
290 | return true; |
291 | |
292 | for (loops = 0; loops < loops_per_usec; loops++) |
293 | cpu_relax(); |
294 | |
295 | /* If invoked directly, tickle the NMI watchdog */ |
296 | if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) { |
297 | instrumentation_begin(); |
298 | touch_nmi_watchdog(); |
299 | instrumentation_end(); |
300 | } |
301 | } |
302 | /* Prevent the late comers from making progress and let them time out */ |
303 | raw_atomic_inc(v: cnt); |
304 | return false; |
305 | } |
306 | |
307 | static noinstr bool wait_for_ctrl(void) |
308 | { |
309 | unsigned int timeout, loops; |
310 | |
311 | for (timeout = 0; timeout < USEC_PER_SEC; timeout++) { |
312 | if (raw_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT) |
313 | return true; |
314 | |
315 | for (loops = 0; loops < loops_per_usec; loops++) |
316 | cpu_relax(); |
317 | |
318 | /* If invoked directly, tickle the NMI watchdog */ |
319 | if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC)) { |
320 | instrumentation_begin(); |
321 | touch_nmi_watchdog(); |
322 | instrumentation_end(); |
323 | } |
324 | } |
325 | return false; |
326 | } |
327 | |
328 | /* |
329 | * Protected against instrumentation up to the point where the primary |
330 | * thread completed the update. See microcode_nmi_handler() for details. |
331 | */ |
332 | static noinstr bool load_secondary_wait(unsigned int ctrl_cpu) |
333 | { |
334 | /* Initial rendezvous to ensure that all CPUs have arrived */ |
335 | if (!wait_for_cpus(cnt: &late_cpus_in)) { |
336 | raw_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT); |
337 | return false; |
338 | } |
339 | |
340 | /* |
341 | * Wait for primary threads to complete. If one of them hangs due |
342 | * to the update, there is no way out. This is non-recoverable |
343 | * because the CPU might hold locks or resources and confuse the |
344 | * scheduler, watchdogs etc. There is no way to safely evacuate the |
345 | * machine. |
346 | */ |
347 | if (wait_for_ctrl()) |
348 | return true; |
349 | |
350 | instrumentation_begin(); |
351 | panic(fmt: "Microcode load: Primary CPU %d timed out\n" , ctrl_cpu); |
352 | instrumentation_end(); |
353 | } |
354 | |
355 | /* |
356 | * Protected against instrumentation up to the point where the primary |
357 | * thread completed the update. See microcode_nmi_handler() for details. |
358 | */ |
359 | static noinstr void load_secondary(unsigned int cpu) |
360 | { |
361 | unsigned int ctrl_cpu = raw_cpu_read(ucode_ctrl.ctrl_cpu); |
362 | enum ucode_state ret; |
363 | |
364 | if (!load_secondary_wait(ctrl_cpu)) { |
365 | instrumentation_begin(); |
366 | pr_err_once("load: %d CPUs timed out\n" , |
367 | atomic_read(&late_cpus_in) - 1); |
368 | instrumentation_end(); |
369 | return; |
370 | } |
371 | |
372 | /* Primary thread completed. Allow to invoke instrumentable code */ |
373 | instrumentation_begin(); |
374 | /* |
375 | * If the primary succeeded then invoke the apply() callback, |
376 | * otherwise copy the state from the primary thread. |
377 | */ |
378 | if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY) |
379 | ret = microcode_ops->apply_microcode(cpu); |
380 | else |
381 | ret = per_cpu(ucode_ctrl.result, ctrl_cpu); |
382 | |
383 | this_cpu_write(ucode_ctrl.result, ret); |
384 | this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE); |
385 | instrumentation_end(); |
386 | } |
387 | |
388 | static void __load_primary(unsigned int cpu) |
389 | { |
390 | struct cpumask *secondaries = topology_sibling_cpumask(cpu); |
391 | enum sibling_ctrl ctrl; |
392 | enum ucode_state ret; |
393 | unsigned int sibling; |
394 | |
395 | /* Initial rendezvous to ensure that all CPUs have arrived */ |
396 | if (!wait_for_cpus(cnt: &late_cpus_in)) { |
397 | this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT); |
398 | pr_err_once("load: %d CPUs timed out\n" , atomic_read(&late_cpus_in) - 1); |
399 | return; |
400 | } |
401 | |
402 | ret = microcode_ops->apply_microcode(cpu); |
403 | this_cpu_write(ucode_ctrl.result, ret); |
404 | this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE); |
405 | |
406 | /* |
407 | * If the update was successful, let the siblings run the apply() |
408 | * callback. If not, tell them it's done. This also covers the |
409 | * case where the CPU has uniform loading at package or system |
410 | * scope implemented but does not advertise it. |
411 | */ |
412 | if (ret == UCODE_UPDATED || ret == UCODE_OK) |
413 | ctrl = SCTRL_APPLY; |
414 | else |
415 | ctrl = SCTRL_DONE; |
416 | |
417 | for_each_cpu(sibling, secondaries) { |
418 | if (sibling != cpu) |
419 | per_cpu(ucode_ctrl.ctrl, sibling) = ctrl; |
420 | } |
421 | } |
422 | |
423 | static bool kick_offline_cpus(unsigned int nr_offl) |
424 | { |
425 | unsigned int cpu, timeout; |
426 | |
427 | for_each_cpu(cpu, &cpu_offline_mask) { |
428 | /* Enable the rendezvous handler and send NMI */ |
429 | per_cpu(ucode_ctrl.nmi_enabled, cpu) = true; |
430 | apic_send_nmi_to_offline_cpu(cpu); |
431 | } |
432 | |
433 | /* Wait for them to arrive */ |
434 | for (timeout = 0; timeout < (USEC_PER_SEC / 2); timeout++) { |
435 | if (atomic_read(v: &offline_in_nmi) == nr_offl) |
436 | return true; |
437 | udelay(1); |
438 | } |
439 | /* Let the others time out */ |
440 | return false; |
441 | } |
442 | |
443 | static void release_offline_cpus(void) |
444 | { |
445 | unsigned int cpu; |
446 | |
447 | for_each_cpu(cpu, &cpu_offline_mask) |
448 | per_cpu(ucode_ctrl.ctrl, cpu) = SCTRL_DONE; |
449 | } |
450 | |
451 | static void load_primary(unsigned int cpu) |
452 | { |
453 | unsigned int nr_offl = cpumask_weight(srcp: &cpu_offline_mask); |
454 | bool proceed = true; |
455 | |
456 | /* Kick soft-offlined SMT siblings if required */ |
457 | if (!cpu && nr_offl) |
458 | proceed = kick_offline_cpus(nr_offl); |
459 | |
460 | /* If the soft-offlined CPUs did not respond, abort */ |
461 | if (proceed) |
462 | __load_primary(cpu); |
463 | |
464 | /* Unconditionally release soft-offlined SMT siblings if required */ |
465 | if (!cpu && nr_offl) |
466 | release_offline_cpus(); |
467 | } |
468 | |
469 | /* |
470 | * Minimal stub rendezvous handler for soft-offlined CPUs which participate |
471 | * in the NMI rendezvous to protect against a concurrent NMI on affected |
472 | * CPUs. |
473 | */ |
474 | void noinstr microcode_offline_nmi_handler(void) |
475 | { |
476 | if (!raw_cpu_read(ucode_ctrl.nmi_enabled)) |
477 | return; |
478 | raw_cpu_write(ucode_ctrl.nmi_enabled, false); |
479 | raw_cpu_write(ucode_ctrl.result, UCODE_OFFLINE); |
480 | raw_atomic_inc(v: &offline_in_nmi); |
481 | wait_for_ctrl(); |
482 | } |
483 | |
484 | static noinstr bool microcode_update_handler(void) |
485 | { |
486 | unsigned int cpu = raw_smp_processor_id(); |
487 | |
488 | if (raw_cpu_read(ucode_ctrl.ctrl_cpu) == cpu) { |
489 | instrumentation_begin(); |
490 | load_primary(cpu); |
491 | instrumentation_end(); |
492 | } else { |
493 | load_secondary(cpu); |
494 | } |
495 | |
496 | instrumentation_begin(); |
497 | touch_nmi_watchdog(); |
498 | instrumentation_end(); |
499 | |
500 | return true; |
501 | } |
502 | |
503 | /* |
504 | * Protection against instrumentation is required for CPUs which are not |
505 | * safe against an NMI which is delivered to the secondary SMT sibling |
506 | * while the primary thread updates the microcode. Instrumentation can end |
507 | * up in #INT3, #DB and #PF. The IRET from those exceptions reenables NMI |
508 | * which is the opposite of what the NMI rendezvous is trying to achieve. |
509 | * |
510 | * The primary thread is safe versus instrumentation as the actual |
511 | * microcode update handles this correctly. It's only the sibling code |
512 | * path which must be NMI safe until the primary thread completed the |
513 | * update. |
514 | */ |
515 | bool noinstr microcode_nmi_handler(void) |
516 | { |
517 | if (!raw_cpu_read(ucode_ctrl.nmi_enabled)) |
518 | return false; |
519 | |
520 | raw_cpu_write(ucode_ctrl.nmi_enabled, false); |
521 | return microcode_update_handler(); |
522 | } |
523 | |
524 | static int load_cpus_stopped(void *unused) |
525 | { |
526 | if (microcode_ops->use_nmi) { |
527 | /* Enable the NMI handler and raise NMI */ |
528 | this_cpu_write(ucode_ctrl.nmi_enabled, true); |
529 | apic->send_IPI(smp_processor_id(), NMI_VECTOR); |
530 | } else { |
531 | /* Just invoke the handler directly */ |
532 | microcode_update_handler(); |
533 | } |
534 | return 0; |
535 | } |
536 | |
537 | static int load_late_stop_cpus(bool is_safe) |
538 | { |
539 | unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0; |
540 | unsigned int nr_offl, offline = 0; |
541 | int old_rev = boot_cpu_data.microcode; |
542 | struct cpuinfo_x86 prev_info; |
543 | |
544 | if (!is_safe) { |
545 | pr_err("Late microcode loading without minimal revision check.\n" ); |
546 | pr_err("You should switch to early loading, if possible.\n" ); |
547 | } |
548 | |
549 | atomic_set(v: &late_cpus_in, i: num_online_cpus()); |
550 | atomic_set(v: &offline_in_nmi, i: 0); |
551 | loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000); |
552 | |
553 | /* |
554 | * Take a snapshot before the microcode update in order to compare and |
555 | * check whether any bits changed after an update. |
556 | */ |
557 | store_cpu_caps(info: &prev_info); |
558 | |
559 | if (microcode_ops->use_nmi) |
560 | static_branch_enable_cpuslocked(µcode_nmi_handler_enable); |
561 | |
562 | stop_machine_cpuslocked(fn: load_cpus_stopped, NULL, cpu_online_mask); |
563 | |
564 | if (microcode_ops->use_nmi) |
565 | static_branch_disable_cpuslocked(µcode_nmi_handler_enable); |
566 | |
567 | /* Analyze the results */ |
568 | for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) { |
569 | switch (per_cpu(ucode_ctrl.result, cpu)) { |
570 | case UCODE_UPDATED: updated++; break; |
571 | case UCODE_TIMEOUT: timedout++; break; |
572 | case UCODE_OK: siblings++; break; |
573 | case UCODE_OFFLINE: offline++; break; |
574 | default: failed++; break; |
575 | } |
576 | } |
577 | |
578 | if (microcode_ops->finalize_late_load) |
579 | microcode_ops->finalize_late_load(!updated); |
580 | |
581 | if (!updated) { |
582 | /* Nothing changed. */ |
583 | if (!failed && !timedout) |
584 | return 0; |
585 | |
586 | nr_offl = cpumask_weight(srcp: &cpu_offline_mask); |
587 | if (offline < nr_offl) { |
588 | pr_warn("%u offline siblings did not respond.\n" , |
589 | nr_offl - atomic_read(&offline_in_nmi)); |
590 | return -EIO; |
591 | } |
592 | pr_err("update failed: %u CPUs failed %u CPUs timed out\n" , |
593 | failed, timedout); |
594 | return -EIO; |
595 | } |
596 | |
597 | if (!is_safe || failed || timedout) |
598 | add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); |
599 | |
600 | pr_info("load: updated on %u primary CPUs with %u siblings\n" , updated, siblings); |
601 | if (failed || timedout) { |
602 | pr_err("load incomplete. %u CPUs timed out or failed\n" , |
603 | num_online_cpus() - (updated + siblings)); |
604 | } |
605 | pr_info("revision: 0x%x -> 0x%x\n" , old_rev, boot_cpu_data.microcode); |
606 | microcode_check(prev_info: &prev_info); |
607 | |
608 | return updated + siblings == num_online_cpus() ? 0 : -EIO; |
609 | } |
610 | |
611 | /* |
612 | * This function does two things: |
613 | * |
614 | * 1) Ensure that all required CPUs which are present and have been booted |
615 | * once are online. |
616 | * |
617 | * To pass this check, all primary threads must be online. |
618 | * |
619 | * If the microcode load is not safe against NMI then all SMT threads |
620 | * must be online as well because they still react to NMIs when they are |
621 | * soft-offlined and parked in one of the play_dead() variants. So if a |
622 | * NMI hits while the primary thread updates the microcode the resulting |
623 | * behaviour is undefined. The default play_dead() implementation on |
624 | * modern CPUs uses MWAIT, which is also not guaranteed to be safe |
625 | * against a microcode update which affects MWAIT. |
626 | * |
627 | * As soft-offlined CPUs still react on NMIs, the SMT sibling |
628 | * restriction can be lifted when the vendor driver signals to use NMI |
629 | * for rendezvous and the APIC provides a mechanism to send an NMI to a |
630 | * soft-offlined CPU. The soft-offlined CPUs are then able to |
631 | * participate in the rendezvous in a trivial stub handler. |
632 | * |
633 | * 2) Initialize the per CPU control structure and create a cpumask |
634 | * which contains "offline"; secondary threads, so they can be handled |
635 | * correctly by a control CPU. |
636 | */ |
637 | static bool setup_cpus(void) |
638 | { |
639 | struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, }; |
640 | bool allow_smt_offline; |
641 | unsigned int cpu; |
642 | |
643 | allow_smt_offline = microcode_ops->nmi_safe || |
644 | (microcode_ops->use_nmi && apic->nmi_to_offline_cpu); |
645 | |
646 | cpumask_clear(dstp: &cpu_offline_mask); |
647 | |
648 | for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) { |
649 | /* |
650 | * Offline CPUs sit in one of the play_dead() functions |
651 | * with interrupts disabled, but they still react on NMIs |
652 | * and execute arbitrary code. Also MWAIT being updated |
653 | * while the offline CPU sits there is not necessarily safe |
654 | * on all CPU variants. |
655 | * |
656 | * Mark them in the offline_cpus mask which will be handled |
657 | * by CPU0 later in the update process. |
658 | * |
659 | * Ensure that the primary thread is online so that it is |
660 | * guaranteed that all cores are updated. |
661 | */ |
662 | if (!cpu_online(cpu)) { |
663 | if (topology_is_primary_thread(cpu) || !allow_smt_offline) { |
664 | pr_err("CPU %u not online, loading aborted\n" , cpu); |
665 | return false; |
666 | } |
667 | cpumask_set_cpu(cpu, dstp: &cpu_offline_mask); |
668 | per_cpu(ucode_ctrl, cpu) = ctrl; |
669 | continue; |
670 | } |
671 | |
672 | /* |
673 | * Initialize the per CPU state. This is core scope for now, |
674 | * but prepared to take package or system scope into account. |
675 | */ |
676 | ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu)); |
677 | per_cpu(ucode_ctrl, cpu) = ctrl; |
678 | } |
679 | return true; |
680 | } |
681 | |
682 | static int load_late_locked(void) |
683 | { |
684 | if (!setup_cpus()) |
685 | return -EBUSY; |
686 | |
687 | switch (microcode_ops->request_microcode_fw(0, µcode_pdev->dev)) { |
688 | case UCODE_NEW: |
689 | return load_late_stop_cpus(is_safe: false); |
690 | case UCODE_NEW_SAFE: |
691 | return load_late_stop_cpus(is_safe: true); |
692 | case UCODE_NFOUND: |
693 | return -ENOENT; |
694 | default: |
695 | return -EBADFD; |
696 | } |
697 | } |
698 | |
699 | static ssize_t reload_store(struct device *dev, |
700 | struct device_attribute *attr, |
701 | const char *buf, size_t size) |
702 | { |
703 | unsigned long val; |
704 | ssize_t ret; |
705 | |
706 | ret = kstrtoul(s: buf, base: 0, res: &val); |
707 | if (ret || val != 1) |
708 | return -EINVAL; |
709 | |
710 | cpus_read_lock(); |
711 | ret = load_late_locked(); |
712 | cpus_read_unlock(); |
713 | |
714 | return ret ? : size; |
715 | } |
716 | |
717 | static DEVICE_ATTR_WO(reload); |
718 | #endif |
719 | |
720 | static ssize_t version_show(struct device *dev, |
721 | struct device_attribute *attr, char *buf) |
722 | { |
723 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
724 | |
725 | return sprintf(buf, fmt: "0x%x\n" , uci->cpu_sig.rev); |
726 | } |
727 | |
728 | static ssize_t processor_flags_show(struct device *dev, |
729 | struct device_attribute *attr, char *buf) |
730 | { |
731 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
732 | |
733 | return sprintf(buf, fmt: "0x%x\n" , uci->cpu_sig.pf); |
734 | } |
735 | |
736 | static DEVICE_ATTR_RO(version); |
737 | static DEVICE_ATTR_RO(processor_flags); |
738 | |
739 | static struct attribute *mc_default_attrs[] = { |
740 | &dev_attr_version.attr, |
741 | &dev_attr_processor_flags.attr, |
742 | NULL |
743 | }; |
744 | |
745 | static const struct attribute_group mc_attr_group = { |
746 | .attrs = mc_default_attrs, |
747 | .name = "microcode" , |
748 | }; |
749 | |
750 | static void microcode_fini_cpu(int cpu) |
751 | { |
752 | if (microcode_ops->microcode_fini_cpu) |
753 | microcode_ops->microcode_fini_cpu(cpu); |
754 | } |
755 | |
756 | /** |
757 | * microcode_bsp_resume - Update boot CPU microcode during resume. |
758 | */ |
759 | void microcode_bsp_resume(void) |
760 | { |
761 | int cpu = smp_processor_id(); |
762 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
763 | |
764 | if (uci->mc) |
765 | microcode_ops->apply_microcode(cpu); |
766 | else |
767 | reload_early_microcode(cpu); |
768 | } |
769 | |
770 | static struct syscore_ops mc_syscore_ops = { |
771 | .resume = microcode_bsp_resume, |
772 | }; |
773 | |
774 | static int mc_cpu_online(unsigned int cpu) |
775 | { |
776 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
777 | struct device *dev = get_cpu_device(cpu); |
778 | |
779 | memset(uci, 0, sizeof(*uci)); |
780 | |
781 | microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig); |
782 | cpu_data(cpu).microcode = uci->cpu_sig.rev; |
783 | if (!cpu) |
784 | boot_cpu_data.microcode = uci->cpu_sig.rev; |
785 | |
786 | if (sysfs_create_group(kobj: &dev->kobj, grp: &mc_attr_group)) |
787 | pr_err("Failed to create group for CPU%d\n" , cpu); |
788 | return 0; |
789 | } |
790 | |
791 | static int mc_cpu_down_prep(unsigned int cpu) |
792 | { |
793 | struct device *dev = get_cpu_device(cpu); |
794 | |
795 | microcode_fini_cpu(cpu); |
796 | sysfs_remove_group(kobj: &dev->kobj, grp: &mc_attr_group); |
797 | return 0; |
798 | } |
799 | |
800 | static struct attribute *cpu_root_microcode_attrs[] = { |
801 | #ifdef CONFIG_MICROCODE_LATE_LOADING |
802 | &dev_attr_reload.attr, |
803 | #endif |
804 | NULL |
805 | }; |
806 | |
807 | static const struct attribute_group cpu_root_microcode_group = { |
808 | .name = "microcode" , |
809 | .attrs = cpu_root_microcode_attrs, |
810 | }; |
811 | |
812 | static int __init microcode_init(void) |
813 | { |
814 | struct device *dev_root; |
815 | struct cpuinfo_x86 *c = &boot_cpu_data; |
816 | int error; |
817 | |
818 | if (dis_ucode_ldr) |
819 | return -EINVAL; |
820 | |
821 | if (c->x86_vendor == X86_VENDOR_INTEL) |
822 | microcode_ops = init_intel_microcode(); |
823 | else if (c->x86_vendor == X86_VENDOR_AMD) |
824 | microcode_ops = init_amd_microcode(); |
825 | else |
826 | pr_err("no support for this CPU vendor\n" ); |
827 | |
828 | if (!microcode_ops) |
829 | return -ENODEV; |
830 | |
831 | pr_info_once("Current revision: 0x%08x\n" , (early_data.new_rev ?: early_data.old_rev)); |
832 | |
833 | if (early_data.new_rev) |
834 | pr_info_once("Updated early from: 0x%08x\n" , early_data.old_rev); |
835 | |
836 | microcode_pdev = platform_device_register_simple(name: "microcode" , id: -1, NULL, num: 0); |
837 | if (IS_ERR(ptr: microcode_pdev)) |
838 | return PTR_ERR(ptr: microcode_pdev); |
839 | |
840 | dev_root = bus_get_dev_root(bus: &cpu_subsys); |
841 | if (dev_root) { |
842 | error = sysfs_create_group(kobj: &dev_root->kobj, grp: &cpu_root_microcode_group); |
843 | put_device(dev: dev_root); |
844 | if (error) { |
845 | pr_err("Error creating microcode group!\n" ); |
846 | goto out_pdev; |
847 | } |
848 | } |
849 | |
850 | register_syscore_ops(ops: &mc_syscore_ops); |
851 | cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "x86/microcode:online" , |
852 | startup: mc_cpu_online, teardown: mc_cpu_down_prep); |
853 | |
854 | return 0; |
855 | |
856 | out_pdev: |
857 | platform_device_unregister(microcode_pdev); |
858 | return error; |
859 | |
860 | } |
861 | late_initcall(microcode_init); |
862 | |