1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* cpu_feature_enabled() cannot be used this early */ |
3 | #define USE_EARLY_PGTABLE_L5 |
4 | |
5 | #include <linux/memblock.h> |
6 | #include <linux/linkage.h> |
7 | #include <linux/bitops.h> |
8 | #include <linux/kernel.h> |
9 | #include <linux/export.h> |
10 | #include <linux/percpu.h> |
11 | #include <linux/string.h> |
12 | #include <linux/ctype.h> |
13 | #include <linux/delay.h> |
14 | #include <linux/sched/mm.h> |
15 | #include <linux/sched/clock.h> |
16 | #include <linux/sched/task.h> |
17 | #include <linux/sched/smt.h> |
18 | #include <linux/init.h> |
19 | #include <linux/kprobes.h> |
20 | #include <linux/kgdb.h> |
21 | #include <linux/mem_encrypt.h> |
22 | #include <linux/smp.h> |
23 | #include <linux/cpu.h> |
24 | #include <linux/io.h> |
25 | #include <linux/syscore_ops.h> |
26 | #include <linux/pgtable.h> |
27 | #include <linux/stackprotector.h> |
28 | #include <linux/utsname.h> |
29 | |
30 | #include <asm/alternative.h> |
31 | #include <asm/cmdline.h> |
32 | #include <asm/perf_event.h> |
33 | #include <asm/mmu_context.h> |
34 | #include <asm/doublefault.h> |
35 | #include <asm/archrandom.h> |
36 | #include <asm/hypervisor.h> |
37 | #include <asm/processor.h> |
38 | #include <asm/tlbflush.h> |
39 | #include <asm/debugreg.h> |
40 | #include <asm/sections.h> |
41 | #include <asm/vsyscall.h> |
42 | #include <linux/topology.h> |
43 | #include <linux/cpumask.h> |
44 | #include <linux/atomic.h> |
45 | #include <asm/proto.h> |
46 | #include <asm/setup.h> |
47 | #include <asm/apic.h> |
48 | #include <asm/desc.h> |
49 | #include <asm/fpu/api.h> |
50 | #include <asm/mtrr.h> |
51 | #include <asm/hwcap2.h> |
52 | #include <linux/numa.h> |
53 | #include <asm/numa.h> |
54 | #include <asm/asm.h> |
55 | #include <asm/bugs.h> |
56 | #include <asm/cpu.h> |
57 | #include <asm/mce.h> |
58 | #include <asm/msr.h> |
59 | #include <asm/cacheinfo.h> |
60 | #include <asm/memtype.h> |
61 | #include <asm/microcode.h> |
62 | #include <asm/intel-family.h> |
63 | #include <asm/cpu_device_id.h> |
64 | #include <asm/uv/uv.h> |
65 | #include <asm/ia32.h> |
66 | #include <asm/set_memory.h> |
67 | #include <asm/traps.h> |
68 | #include <asm/sev.h> |
69 | |
70 | #include "cpu.h" |
71 | |
72 | u32 elf_hwcap2 __read_mostly; |
73 | |
74 | /* Number of siblings per CPU package */ |
75 | int smp_num_siblings = 1; |
76 | EXPORT_SYMBOL(smp_num_siblings); |
77 | |
78 | static struct ppin_info { |
79 | int feature; |
80 | int msr_ppin_ctl; |
81 | int msr_ppin; |
82 | } ppin_info[] = { |
83 | [X86_VENDOR_INTEL] = { |
84 | .feature = X86_FEATURE_INTEL_PPIN, |
85 | .msr_ppin_ctl = MSR_PPIN_CTL, |
86 | .msr_ppin = MSR_PPIN |
87 | }, |
88 | [X86_VENDOR_AMD] = { |
89 | .feature = X86_FEATURE_AMD_PPIN, |
90 | .msr_ppin_ctl = MSR_AMD_PPIN_CTL, |
91 | .msr_ppin = MSR_AMD_PPIN |
92 | }, |
93 | }; |
94 | |
95 | static const struct x86_cpu_id ppin_cpuids[] = { |
96 | X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]), |
97 | X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]), |
98 | |
99 | /* Legacy models without CPUID enumeration */ |
100 | X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), |
101 | X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]), |
102 | X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]), |
103 | X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]), |
104 | X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]), |
105 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), |
106 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), |
107 | X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), |
108 | X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), |
109 | X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), |
110 | X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), |
111 | |
112 | {} |
113 | }; |
114 | |
115 | static void ppin_init(struct cpuinfo_x86 *c) |
116 | { |
117 | const struct x86_cpu_id *id; |
118 | unsigned long long val; |
119 | struct ppin_info *info; |
120 | |
121 | id = x86_match_cpu(match: ppin_cpuids); |
122 | if (!id) |
123 | return; |
124 | |
125 | /* |
126 | * Testing the presence of the MSR is not enough. Need to check |
127 | * that the PPIN_CTL allows reading of the PPIN. |
128 | */ |
129 | info = (struct ppin_info *)id->driver_data; |
130 | |
131 | if (rdmsrl_safe(msr: info->msr_ppin_ctl, p: &val)) |
132 | goto clear_ppin; |
133 | |
134 | if ((val & 3UL) == 1UL) { |
135 | /* PPIN locked in disabled mode */ |
136 | goto clear_ppin; |
137 | } |
138 | |
139 | /* If PPIN is disabled, try to enable */ |
140 | if (!(val & 2UL)) { |
141 | wrmsrl_safe(msr: info->msr_ppin_ctl, val: val | 2UL); |
142 | rdmsrl_safe(msr: info->msr_ppin_ctl, p: &val); |
143 | } |
144 | |
145 | /* Is the enable bit set? */ |
146 | if (val & 2UL) { |
147 | c->ppin = __rdmsr(msr: info->msr_ppin); |
148 | set_cpu_cap(c, info->feature); |
149 | return; |
150 | } |
151 | |
152 | clear_ppin: |
153 | clear_cpu_cap(c, bit: info->feature); |
154 | } |
155 | |
156 | static void default_init(struct cpuinfo_x86 *c) |
157 | { |
158 | #ifdef CONFIG_X86_64 |
159 | cpu_detect_cache_sizes(c); |
160 | #else |
161 | /* Not much we can do here... */ |
162 | /* Check if at least it has cpuid */ |
163 | if (c->cpuid_level == -1) { |
164 | /* No cpuid. It must be an ancient CPU */ |
165 | if (c->x86 == 4) |
166 | strcpy(c->x86_model_id, "486" ); |
167 | else if (c->x86 == 3) |
168 | strcpy(c->x86_model_id, "386" ); |
169 | } |
170 | #endif |
171 | } |
172 | |
173 | static const struct cpu_dev default_cpu = { |
174 | .c_init = default_init, |
175 | .c_vendor = "Unknown" , |
176 | .c_x86_vendor = X86_VENDOR_UNKNOWN, |
177 | }; |
178 | |
179 | static const struct cpu_dev *this_cpu = &default_cpu; |
180 | |
181 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { |
182 | #ifdef CONFIG_X86_64 |
183 | /* |
184 | * We need valid kernel segments for data and code in long mode too |
185 | * IRET will check the segment types kkeil 2000/10/28 |
186 | * Also sysret mandates a special GDT layout |
187 | * |
188 | * TLS descriptors are currently at a different place compared to i386. |
189 | * Hopefully nobody expects them at a fixed place (Wine?) |
190 | */ |
191 | [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), |
192 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), |
193 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), |
194 | [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), |
195 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), |
196 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), |
197 | #else |
198 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), |
199 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
200 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), |
201 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), |
202 | /* |
203 | * Segments used for calling PnP BIOS have byte granularity. |
204 | * They code segments and data segments have fixed 64k limits, |
205 | * the transfer segment sizes are set at run time. |
206 | */ |
207 | /* 32-bit code */ |
208 | [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
209 | /* 16-bit code */ |
210 | [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
211 | /* 16-bit data */ |
212 | [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), |
213 | /* 16-bit data */ |
214 | [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), |
215 | /* 16-bit data */ |
216 | [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), |
217 | /* |
218 | * The APM segments have byte granularity and their bases |
219 | * are set at run time. All have 64k limits. |
220 | */ |
221 | /* 32-bit code */ |
222 | [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
223 | /* 16-bit code */ |
224 | [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
225 | /* data */ |
226 | [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), |
227 | |
228 | [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
229 | [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
230 | #endif |
231 | } }; |
232 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
233 | |
234 | #ifdef CONFIG_X86_64 |
235 | static int __init x86_nopcid_setup(char *s) |
236 | { |
237 | /* nopcid doesn't accept parameters */ |
238 | if (s) |
239 | return -EINVAL; |
240 | |
241 | /* do not emit a message if the feature is not present */ |
242 | if (!boot_cpu_has(X86_FEATURE_PCID)) |
243 | return 0; |
244 | |
245 | setup_clear_cpu_cap(X86_FEATURE_PCID); |
246 | pr_info("nopcid: PCID feature disabled\n" ); |
247 | return 0; |
248 | } |
249 | early_param("nopcid" , x86_nopcid_setup); |
250 | #endif |
251 | |
252 | static int __init x86_noinvpcid_setup(char *s) |
253 | { |
254 | /* noinvpcid doesn't accept parameters */ |
255 | if (s) |
256 | return -EINVAL; |
257 | |
258 | /* do not emit a message if the feature is not present */ |
259 | if (!boot_cpu_has(X86_FEATURE_INVPCID)) |
260 | return 0; |
261 | |
262 | setup_clear_cpu_cap(X86_FEATURE_INVPCID); |
263 | pr_info("noinvpcid: INVPCID feature disabled\n" ); |
264 | return 0; |
265 | } |
266 | early_param("noinvpcid" , x86_noinvpcid_setup); |
267 | |
268 | #ifdef CONFIG_X86_32 |
269 | static int cachesize_override = -1; |
270 | static int disable_x86_serial_nr = 1; |
271 | |
272 | static int __init cachesize_setup(char *str) |
273 | { |
274 | get_option(&str, &cachesize_override); |
275 | return 1; |
276 | } |
277 | __setup("cachesize=" , cachesize_setup); |
278 | |
279 | /* Standard macro to see if a specific flag is changeable */ |
280 | static inline int flag_is_changeable_p(u32 flag) |
281 | { |
282 | u32 f1, f2; |
283 | |
284 | /* |
285 | * Cyrix and IDT cpus allow disabling of CPUID |
286 | * so the code below may return different results |
287 | * when it is executed before and after enabling |
288 | * the CPUID. Add "volatile" to not allow gcc to |
289 | * optimize the subsequent calls to this function. |
290 | */ |
291 | asm volatile ("pushfl \n\t" |
292 | "pushfl \n\t" |
293 | "popl %0 \n\t" |
294 | "movl %0, %1 \n\t" |
295 | "xorl %2, %0 \n\t" |
296 | "pushl %0 \n\t" |
297 | "popfl \n\t" |
298 | "pushfl \n\t" |
299 | "popl %0 \n\t" |
300 | "popfl \n\t" |
301 | |
302 | : "=&r" (f1), "=&r" (f2) |
303 | : "ir" (flag)); |
304 | |
305 | return ((f1^f2) & flag) != 0; |
306 | } |
307 | |
308 | /* Probe for the CPUID instruction */ |
309 | int have_cpuid_p(void) |
310 | { |
311 | return flag_is_changeable_p(X86_EFLAGS_ID); |
312 | } |
313 | |
314 | static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
315 | { |
316 | unsigned long lo, hi; |
317 | |
318 | if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr) |
319 | return; |
320 | |
321 | /* Disable processor serial number: */ |
322 | |
323 | rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); |
324 | lo |= 0x200000; |
325 | wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); |
326 | |
327 | pr_notice("CPU serial number disabled.\n" ); |
328 | clear_cpu_cap(c, X86_FEATURE_PN); |
329 | |
330 | /* Disabling the serial number may affect the cpuid level */ |
331 | c->cpuid_level = cpuid_eax(0); |
332 | } |
333 | |
334 | static int __init x86_serial_nr_setup(char *s) |
335 | { |
336 | disable_x86_serial_nr = 0; |
337 | return 1; |
338 | } |
339 | __setup("serialnumber" , x86_serial_nr_setup); |
340 | #else |
341 | static inline int flag_is_changeable_p(u32 flag) |
342 | { |
343 | return 1; |
344 | } |
345 | static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
346 | { |
347 | } |
348 | #endif |
349 | |
350 | static __always_inline void setup_smep(struct cpuinfo_x86 *c) |
351 | { |
352 | if (cpu_has(c, X86_FEATURE_SMEP)) |
353 | cr4_set_bits(X86_CR4_SMEP); |
354 | } |
355 | |
356 | static __always_inline void setup_smap(struct cpuinfo_x86 *c) |
357 | { |
358 | unsigned long eflags = native_save_fl(); |
359 | |
360 | /* This should have been cleared long ago */ |
361 | BUG_ON(eflags & X86_EFLAGS_AC); |
362 | |
363 | if (cpu_has(c, X86_FEATURE_SMAP)) |
364 | cr4_set_bits(X86_CR4_SMAP); |
365 | } |
366 | |
367 | static __always_inline void setup_umip(struct cpuinfo_x86 *c) |
368 | { |
369 | /* Check the boot processor, plus build option for UMIP. */ |
370 | if (!cpu_feature_enabled(X86_FEATURE_UMIP)) |
371 | goto out; |
372 | |
373 | /* Check the current processor's cpuid bits. */ |
374 | if (!cpu_has(c, X86_FEATURE_UMIP)) |
375 | goto out; |
376 | |
377 | cr4_set_bits(X86_CR4_UMIP); |
378 | |
379 | pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n" ); |
380 | |
381 | return; |
382 | |
383 | out: |
384 | /* |
385 | * Make sure UMIP is disabled in case it was enabled in a |
386 | * previous boot (e.g., via kexec). |
387 | */ |
388 | cr4_clear_bits(X86_CR4_UMIP); |
389 | } |
390 | |
391 | /* These bits should not change their value after CPU init is finished. */ |
392 | static const unsigned long cr4_pinned_mask = |
393 | X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | |
394 | X86_CR4_FSGSBASE | X86_CR4_CET; |
395 | static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); |
396 | static unsigned long cr4_pinned_bits __ro_after_init; |
397 | |
398 | void native_write_cr0(unsigned long val) |
399 | { |
400 | unsigned long bits_missing = 0; |
401 | |
402 | set_register: |
403 | asm volatile("mov %0,%%cr0" : "+r" (val) : : "memory" ); |
404 | |
405 | if (static_branch_likely(&cr_pinning)) { |
406 | if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { |
407 | bits_missing = X86_CR0_WP; |
408 | val |= bits_missing; |
409 | goto set_register; |
410 | } |
411 | /* Warn after we've set the missing bits. */ |
412 | WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n" ); |
413 | } |
414 | } |
415 | EXPORT_SYMBOL(native_write_cr0); |
416 | |
417 | void __no_profile native_write_cr4(unsigned long val) |
418 | { |
419 | unsigned long bits_changed = 0; |
420 | |
421 | set_register: |
422 | asm volatile("mov %0,%%cr4" : "+r" (val) : : "memory" ); |
423 | |
424 | if (static_branch_likely(&cr_pinning)) { |
425 | if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { |
426 | bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; |
427 | val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; |
428 | goto set_register; |
429 | } |
430 | /* Warn after we've corrected the changed bits. */ |
431 | WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n" , |
432 | bits_changed); |
433 | } |
434 | } |
435 | #if IS_MODULE(CONFIG_LKDTM) |
436 | EXPORT_SYMBOL_GPL(native_write_cr4); |
437 | #endif |
438 | |
439 | void cr4_update_irqsoff(unsigned long set, unsigned long clear) |
440 | { |
441 | unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4); |
442 | |
443 | lockdep_assert_irqs_disabled(); |
444 | |
445 | newval = (cr4 & ~clear) | set; |
446 | if (newval != cr4) { |
447 | this_cpu_write(cpu_tlbstate.cr4, newval); |
448 | __write_cr4(x: newval); |
449 | } |
450 | } |
451 | EXPORT_SYMBOL(cr4_update_irqsoff); |
452 | |
453 | /* Read the CR4 shadow. */ |
454 | unsigned long cr4_read_shadow(void) |
455 | { |
456 | return this_cpu_read(cpu_tlbstate.cr4); |
457 | } |
458 | EXPORT_SYMBOL_GPL(cr4_read_shadow); |
459 | |
460 | void cr4_init(void) |
461 | { |
462 | unsigned long cr4 = __read_cr4(); |
463 | |
464 | if (boot_cpu_has(X86_FEATURE_PCID)) |
465 | cr4 |= X86_CR4_PCIDE; |
466 | if (static_branch_likely(&cr_pinning)) |
467 | cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; |
468 | |
469 | __write_cr4(x: cr4); |
470 | |
471 | /* Initialize cr4 shadow for this CPU. */ |
472 | this_cpu_write(cpu_tlbstate.cr4, cr4); |
473 | } |
474 | |
475 | /* |
476 | * Once CPU feature detection is finished (and boot params have been |
477 | * parsed), record any of the sensitive CR bits that are set, and |
478 | * enable CR pinning. |
479 | */ |
480 | static void __init setup_cr_pinning(void) |
481 | { |
482 | cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; |
483 | static_key_enable(key: &cr_pinning.key); |
484 | } |
485 | |
486 | static __init int x86_nofsgsbase_setup(char *arg) |
487 | { |
488 | /* Require an exact match without trailing characters. */ |
489 | if (strlen(arg)) |
490 | return 0; |
491 | |
492 | /* Do not emit a message if the feature is not present. */ |
493 | if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) |
494 | return 1; |
495 | |
496 | setup_clear_cpu_cap(X86_FEATURE_FSGSBASE); |
497 | pr_info("FSGSBASE disabled via kernel command line\n" ); |
498 | return 1; |
499 | } |
500 | __setup("nofsgsbase" , x86_nofsgsbase_setup); |
501 | |
502 | /* |
503 | * Protection Keys are not available in 32-bit mode. |
504 | */ |
505 | static bool pku_disabled; |
506 | |
507 | static __always_inline void setup_pku(struct cpuinfo_x86 *c) |
508 | { |
509 | if (c == &boot_cpu_data) { |
510 | if (pku_disabled || !cpu_feature_enabled(X86_FEATURE_PKU)) |
511 | return; |
512 | /* |
513 | * Setting CR4.PKE will cause the X86_FEATURE_OSPKE cpuid |
514 | * bit to be set. Enforce it. |
515 | */ |
516 | setup_force_cpu_cap(X86_FEATURE_OSPKE); |
517 | |
518 | } else if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) { |
519 | return; |
520 | } |
521 | |
522 | cr4_set_bits(X86_CR4_PKE); |
523 | /* Load the default PKRU value */ |
524 | pkru_write_default(); |
525 | } |
526 | |
527 | #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS |
528 | static __init int setup_disable_pku(char *arg) |
529 | { |
530 | /* |
531 | * Do not clear the X86_FEATURE_PKU bit. All of the |
532 | * runtime checks are against OSPKE so clearing the |
533 | * bit does nothing. |
534 | * |
535 | * This way, we will see "pku" in cpuinfo, but not |
536 | * "ospke", which is exactly what we want. It shows |
537 | * that the CPU has PKU, but the OS has not enabled it. |
538 | * This happens to be exactly how a system would look |
539 | * if we disabled the config option. |
540 | */ |
541 | pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n" ); |
542 | pku_disabled = true; |
543 | return 1; |
544 | } |
545 | __setup("nopku" , setup_disable_pku); |
546 | #endif |
547 | |
548 | #ifdef CONFIG_X86_KERNEL_IBT |
549 | |
550 | __noendbr u64 ibt_save(bool disable) |
551 | { |
552 | u64 msr = 0; |
553 | |
554 | if (cpu_feature_enabled(X86_FEATURE_IBT)) { |
555 | rdmsrl(MSR_IA32_S_CET, msr); |
556 | if (disable) |
557 | wrmsrl(MSR_IA32_S_CET, val: msr & ~CET_ENDBR_EN); |
558 | } |
559 | |
560 | return msr; |
561 | } |
562 | |
563 | __noendbr void ibt_restore(u64 save) |
564 | { |
565 | u64 msr; |
566 | |
567 | if (cpu_feature_enabled(X86_FEATURE_IBT)) { |
568 | rdmsrl(MSR_IA32_S_CET, msr); |
569 | msr &= ~CET_ENDBR_EN; |
570 | msr |= (save & CET_ENDBR_EN); |
571 | wrmsrl(MSR_IA32_S_CET, val: msr); |
572 | } |
573 | } |
574 | |
575 | #endif |
576 | |
577 | static __always_inline void setup_cet(struct cpuinfo_x86 *c) |
578 | { |
579 | bool user_shstk, kernel_ibt; |
580 | |
581 | if (!IS_ENABLED(CONFIG_X86_CET)) |
582 | return; |
583 | |
584 | kernel_ibt = HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT); |
585 | user_shstk = cpu_feature_enabled(X86_FEATURE_SHSTK) && |
586 | IS_ENABLED(CONFIG_X86_USER_SHADOW_STACK); |
587 | |
588 | if (!kernel_ibt && !user_shstk) |
589 | return; |
590 | |
591 | if (user_shstk) |
592 | set_cpu_cap(c, X86_FEATURE_USER_SHSTK); |
593 | |
594 | if (kernel_ibt) |
595 | wrmsrl(MSR_IA32_S_CET, CET_ENDBR_EN); |
596 | else |
597 | wrmsrl(MSR_IA32_S_CET, val: 0); |
598 | |
599 | cr4_set_bits(X86_CR4_CET); |
600 | |
601 | if (kernel_ibt && ibt_selftest()) { |
602 | pr_err("IBT selftest: Failed!\n" ); |
603 | wrmsrl(MSR_IA32_S_CET, val: 0); |
604 | setup_clear_cpu_cap(X86_FEATURE_IBT); |
605 | } |
606 | } |
607 | |
608 | __noendbr void cet_disable(void) |
609 | { |
610 | if (!(cpu_feature_enabled(X86_FEATURE_IBT) || |
611 | cpu_feature_enabled(X86_FEATURE_SHSTK))) |
612 | return; |
613 | |
614 | wrmsrl(MSR_IA32_S_CET, val: 0); |
615 | wrmsrl(MSR_IA32_U_CET, val: 0); |
616 | } |
617 | |
618 | /* |
619 | * Some CPU features depend on higher CPUID levels, which may not always |
620 | * be available due to CPUID level capping or broken virtualization |
621 | * software. Add those features to this table to auto-disable them. |
622 | */ |
623 | struct cpuid_dependent_feature { |
624 | u32 feature; |
625 | u32 level; |
626 | }; |
627 | |
628 | static const struct cpuid_dependent_feature |
629 | cpuid_dependent_features[] = { |
630 | { X86_FEATURE_MWAIT, 0x00000005 }, |
631 | { X86_FEATURE_DCA, 0x00000009 }, |
632 | { X86_FEATURE_XSAVE, 0x0000000d }, |
633 | { 0, 0 } |
634 | }; |
635 | |
636 | static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) |
637 | { |
638 | const struct cpuid_dependent_feature *df; |
639 | |
640 | for (df = cpuid_dependent_features; df->feature; df++) { |
641 | |
642 | if (!cpu_has(c, df->feature)) |
643 | continue; |
644 | /* |
645 | * Note: cpuid_level is set to -1 if unavailable, but |
646 | * extended_extended_level is set to 0 if unavailable |
647 | * and the legitimate extended levels are all negative |
648 | * when signed; hence the weird messing around with |
649 | * signs here... |
650 | */ |
651 | if (!((s32)df->level < 0 ? |
652 | (u32)df->level > (u32)c->extended_cpuid_level : |
653 | (s32)df->level > (s32)c->cpuid_level)) |
654 | continue; |
655 | |
656 | clear_cpu_cap(c, bit: df->feature); |
657 | if (!warn) |
658 | continue; |
659 | |
660 | pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n" , |
661 | x86_cap_flag(df->feature), df->level); |
662 | } |
663 | } |
664 | |
665 | /* |
666 | * Naming convention should be: <Name> [(<Codename>)] |
667 | * This table only is used unless init_<vendor>() below doesn't set it; |
668 | * in particular, if CPUID levels 0x80000002..4 are supported, this |
669 | * isn't used |
670 | */ |
671 | |
672 | /* Look up CPU names by table lookup. */ |
673 | static const char *table_lookup_model(struct cpuinfo_x86 *c) |
674 | { |
675 | #ifdef CONFIG_X86_32 |
676 | const struct legacy_cpu_model_info *info; |
677 | |
678 | if (c->x86_model >= 16) |
679 | return NULL; /* Range check */ |
680 | |
681 | if (!this_cpu) |
682 | return NULL; |
683 | |
684 | info = this_cpu->legacy_models; |
685 | |
686 | while (info->family) { |
687 | if (info->family == c->x86) |
688 | return info->model_names[c->x86_model]; |
689 | info++; |
690 | } |
691 | #endif |
692 | return NULL; /* Not found */ |
693 | } |
694 | |
695 | /* Aligned to unsigned long to avoid split lock in atomic bitmap ops */ |
696 | __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); |
697 | __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); |
698 | |
699 | #ifdef CONFIG_X86_32 |
700 | /* The 32-bit entry code needs to find cpu_entry_area. */ |
701 | DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); |
702 | #endif |
703 | |
704 | /* Load the original GDT from the per-cpu structure */ |
705 | void load_direct_gdt(int cpu) |
706 | { |
707 | struct desc_ptr gdt_descr; |
708 | |
709 | gdt_descr.address = (long)get_cpu_gdt_rw(cpu); |
710 | gdt_descr.size = GDT_SIZE - 1; |
711 | load_gdt(dtr: &gdt_descr); |
712 | } |
713 | EXPORT_SYMBOL_GPL(load_direct_gdt); |
714 | |
715 | /* Load a fixmap remapping of the per-cpu GDT */ |
716 | void load_fixmap_gdt(int cpu) |
717 | { |
718 | struct desc_ptr gdt_descr; |
719 | |
720 | gdt_descr.address = (long)get_cpu_gdt_ro(cpu); |
721 | gdt_descr.size = GDT_SIZE - 1; |
722 | load_gdt(dtr: &gdt_descr); |
723 | } |
724 | EXPORT_SYMBOL_GPL(load_fixmap_gdt); |
725 | |
726 | /** |
727 | * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base |
728 | * @cpu: The CPU number for which this is invoked |
729 | * |
730 | * Invoked during early boot to switch from early GDT and early per CPU to |
731 | * the direct GDT and the runtime per CPU area. On 32-bit the percpu base |
732 | * switch is implicit by loading the direct GDT. On 64bit this requires |
733 | * to update GSBASE. |
734 | */ |
735 | void __init switch_gdt_and_percpu_base(int cpu) |
736 | { |
737 | load_direct_gdt(cpu); |
738 | |
739 | #ifdef CONFIG_X86_64 |
740 | /* |
741 | * No need to load %gs. It is already correct. |
742 | * |
743 | * Writing %gs on 64bit would zero GSBASE which would make any per |
744 | * CPU operation up to the point of the wrmsrl() fault. |
745 | * |
746 | * Set GSBASE to the new offset. Until the wrmsrl() happens the |
747 | * early mapping is still valid. That means the GSBASE update will |
748 | * lose any prior per CPU data which was not copied over in |
749 | * setup_per_cpu_areas(). |
750 | * |
751 | * This works even with stackprotector enabled because the |
752 | * per CPU stack canary is 0 in both per CPU areas. |
753 | */ |
754 | wrmsrl(MSR_GS_BASE, val: cpu_kernelmode_gs_base(cpu)); |
755 | #else |
756 | /* |
757 | * %fs is already set to __KERNEL_PERCPU, but after switching GDT |
758 | * it is required to load FS again so that the 'hidden' part is |
759 | * updated from the new GDT. Up to this point the early per CPU |
760 | * translation is active. Any content of the early per CPU data |
761 | * which was not copied over in setup_per_cpu_areas() is lost. |
762 | */ |
763 | loadsegment(fs, __KERNEL_PERCPU); |
764 | #endif |
765 | } |
766 | |
767 | static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; |
768 | |
769 | static void get_model_name(struct cpuinfo_x86 *c) |
770 | { |
771 | unsigned int *v; |
772 | char *p, *q, *s; |
773 | |
774 | if (c->extended_cpuid_level < 0x80000004) |
775 | return; |
776 | |
777 | v = (unsigned int *)c->x86_model_id; |
778 | cpuid(op: 0x80000002, eax: &v[0], ebx: &v[1], ecx: &v[2], edx: &v[3]); |
779 | cpuid(op: 0x80000003, eax: &v[4], ebx: &v[5], ecx: &v[6], edx: &v[7]); |
780 | cpuid(op: 0x80000004, eax: &v[8], ebx: &v[9], ecx: &v[10], edx: &v[11]); |
781 | c->x86_model_id[48] = 0; |
782 | |
783 | /* Trim whitespace */ |
784 | p = q = s = &c->x86_model_id[0]; |
785 | |
786 | while (*p == ' ') |
787 | p++; |
788 | |
789 | while (*p) { |
790 | /* Note the last non-whitespace index */ |
791 | if (!isspace(*p)) |
792 | s = q; |
793 | |
794 | *q++ = *p++; |
795 | } |
796 | |
797 | *(s + 1) = '\0'; |
798 | } |
799 | |
800 | void detect_num_cpu_cores(struct cpuinfo_x86 *c) |
801 | { |
802 | unsigned int eax, ebx, ecx, edx; |
803 | |
804 | c->x86_max_cores = 1; |
805 | if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) |
806 | return; |
807 | |
808 | cpuid_count(op: 4, count: 0, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
809 | if (eax & 0x1f) |
810 | c->x86_max_cores = (eax >> 26) + 1; |
811 | } |
812 | |
813 | void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) |
814 | { |
815 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
816 | |
817 | n = c->extended_cpuid_level; |
818 | |
819 | if (n >= 0x80000005) { |
820 | cpuid(op: 0x80000005, eax: &dummy, ebx: &ebx, ecx: &ecx, edx: &edx); |
821 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
822 | #ifdef CONFIG_X86_64 |
823 | /* On K8 L1 TLB is inclusive, so don't count it */ |
824 | c->x86_tlbsize = 0; |
825 | #endif |
826 | } |
827 | |
828 | if (n < 0x80000006) /* Some chips just has a large L1. */ |
829 | return; |
830 | |
831 | cpuid(op: 0x80000006, eax: &dummy, ebx: &ebx, ecx: &ecx, edx: &edx); |
832 | l2size = ecx >> 16; |
833 | |
834 | #ifdef CONFIG_X86_64 |
835 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); |
836 | #else |
837 | /* do processor-specific cache resizing */ |
838 | if (this_cpu->legacy_cache_size) |
839 | l2size = this_cpu->legacy_cache_size(c, l2size); |
840 | |
841 | /* Allow user to override all this if necessary. */ |
842 | if (cachesize_override != -1) |
843 | l2size = cachesize_override; |
844 | |
845 | if (l2size == 0) |
846 | return; /* Again, no L2 cache is possible */ |
847 | #endif |
848 | |
849 | c->x86_cache_size = l2size; |
850 | } |
851 | |
852 | u16 __read_mostly tlb_lli_4k[NR_INFO]; |
853 | u16 __read_mostly tlb_lli_2m[NR_INFO]; |
854 | u16 __read_mostly tlb_lli_4m[NR_INFO]; |
855 | u16 __read_mostly tlb_lld_4k[NR_INFO]; |
856 | u16 __read_mostly tlb_lld_2m[NR_INFO]; |
857 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
858 | u16 __read_mostly tlb_lld_1g[NR_INFO]; |
859 | |
860 | static void cpu_detect_tlb(struct cpuinfo_x86 *c) |
861 | { |
862 | if (this_cpu->c_detect_tlb) |
863 | this_cpu->c_detect_tlb(c); |
864 | |
865 | pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" , |
866 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
867 | tlb_lli_4m[ENTRIES]); |
868 | |
869 | pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" , |
870 | tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], |
871 | tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); |
872 | } |
873 | |
874 | int detect_ht_early(struct cpuinfo_x86 *c) |
875 | { |
876 | #ifdef CONFIG_SMP |
877 | u32 eax, ebx, ecx, edx; |
878 | |
879 | if (!cpu_has(c, X86_FEATURE_HT)) |
880 | return -1; |
881 | |
882 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
883 | return -1; |
884 | |
885 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) |
886 | return -1; |
887 | |
888 | cpuid(op: 1, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
889 | |
890 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
891 | if (smp_num_siblings == 1) |
892 | pr_info_once("CPU0: Hyper-Threading is disabled\n" ); |
893 | #endif |
894 | return 0; |
895 | } |
896 | |
897 | void detect_ht(struct cpuinfo_x86 *c) |
898 | { |
899 | #ifdef CONFIG_SMP |
900 | int index_msb, core_bits; |
901 | |
902 | if (detect_ht_early(c) < 0) |
903 | return; |
904 | |
905 | index_msb = get_count_order(count: smp_num_siblings); |
906 | c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb); |
907 | |
908 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
909 | |
910 | index_msb = get_count_order(count: smp_num_siblings); |
911 | |
912 | core_bits = get_count_order(count: c->x86_max_cores); |
913 | |
914 | c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) & |
915 | ((1 << core_bits) - 1); |
916 | #endif |
917 | } |
918 | |
919 | static void get_cpu_vendor(struct cpuinfo_x86 *c) |
920 | { |
921 | char *v = c->x86_vendor_id; |
922 | int i; |
923 | |
924 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
925 | if (!cpu_devs[i]) |
926 | break; |
927 | |
928 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || |
929 | (cpu_devs[i]->c_ident[1] && |
930 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { |
931 | |
932 | this_cpu = cpu_devs[i]; |
933 | c->x86_vendor = this_cpu->c_x86_vendor; |
934 | return; |
935 | } |
936 | } |
937 | |
938 | pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \ |
939 | "CPU: Your system may be unstable.\n" , v); |
940 | |
941 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
942 | this_cpu = &default_cpu; |
943 | } |
944 | |
945 | void cpu_detect(struct cpuinfo_x86 *c) |
946 | { |
947 | /* Get vendor name */ |
948 | cpuid(op: 0x00000000, eax: (unsigned int *)&c->cpuid_level, |
949 | ebx: (unsigned int *)&c->x86_vendor_id[0], |
950 | ecx: (unsigned int *)&c->x86_vendor_id[8], |
951 | edx: (unsigned int *)&c->x86_vendor_id[4]); |
952 | |
953 | c->x86 = 4; |
954 | /* Intel-defined flags: level 0x00000001 */ |
955 | if (c->cpuid_level >= 0x00000001) { |
956 | u32 junk, tfms, cap0, misc; |
957 | |
958 | cpuid(op: 0x00000001, eax: &tfms, ebx: &misc, ecx: &junk, edx: &cap0); |
959 | c->x86 = x86_family(sig: tfms); |
960 | c->x86_model = x86_model(sig: tfms); |
961 | c->x86_stepping = x86_stepping(sig: tfms); |
962 | |
963 | if (cap0 & (1<<19)) { |
964 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
965 | c->x86_cache_alignment = c->x86_clflush_size; |
966 | } |
967 | } |
968 | } |
969 | |
970 | static void apply_forced_caps(struct cpuinfo_x86 *c) |
971 | { |
972 | int i; |
973 | |
974 | for (i = 0; i < NCAPINTS + NBUGINTS; i++) { |
975 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; |
976 | c->x86_capability[i] |= cpu_caps_set[i]; |
977 | } |
978 | } |
979 | |
980 | static void init_speculation_control(struct cpuinfo_x86 *c) |
981 | { |
982 | /* |
983 | * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, |
984 | * and they also have a different bit for STIBP support. Also, |
985 | * a hypervisor might have set the individual AMD bits even on |
986 | * Intel CPUs, for finer-grained selection of what's available. |
987 | */ |
988 | if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { |
989 | set_cpu_cap(c, X86_FEATURE_IBRS); |
990 | set_cpu_cap(c, X86_FEATURE_IBPB); |
991 | set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
992 | } |
993 | |
994 | if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) |
995 | set_cpu_cap(c, X86_FEATURE_STIBP); |
996 | |
997 | if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || |
998 | cpu_has(c, X86_FEATURE_VIRT_SSBD)) |
999 | set_cpu_cap(c, X86_FEATURE_SSBD); |
1000 | |
1001 | if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { |
1002 | set_cpu_cap(c, X86_FEATURE_IBRS); |
1003 | set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
1004 | } |
1005 | |
1006 | if (cpu_has(c, X86_FEATURE_AMD_IBPB)) |
1007 | set_cpu_cap(c, X86_FEATURE_IBPB); |
1008 | |
1009 | if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { |
1010 | set_cpu_cap(c, X86_FEATURE_STIBP); |
1011 | set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
1012 | } |
1013 | |
1014 | if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { |
1015 | set_cpu_cap(c, X86_FEATURE_SSBD); |
1016 | set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); |
1017 | clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); |
1018 | } |
1019 | } |
1020 | |
1021 | void get_cpu_cap(struct cpuinfo_x86 *c) |
1022 | { |
1023 | u32 eax, ebx, ecx, edx; |
1024 | |
1025 | /* Intel-defined flags: level 0x00000001 */ |
1026 | if (c->cpuid_level >= 0x00000001) { |
1027 | cpuid(op: 0x00000001, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1028 | |
1029 | c->x86_capability[CPUID_1_ECX] = ecx; |
1030 | c->x86_capability[CPUID_1_EDX] = edx; |
1031 | } |
1032 | |
1033 | /* Thermal and Power Management Leaf: level 0x00000006 (eax) */ |
1034 | if (c->cpuid_level >= 0x00000006) |
1035 | c->x86_capability[CPUID_6_EAX] = cpuid_eax(op: 0x00000006); |
1036 | |
1037 | /* Additional Intel-defined flags: level 0x00000007 */ |
1038 | if (c->cpuid_level >= 0x00000007) { |
1039 | cpuid_count(op: 0x00000007, count: 0, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1040 | c->x86_capability[CPUID_7_0_EBX] = ebx; |
1041 | c->x86_capability[CPUID_7_ECX] = ecx; |
1042 | c->x86_capability[CPUID_7_EDX] = edx; |
1043 | |
1044 | /* Check valid sub-leaf index before accessing it */ |
1045 | if (eax >= 1) { |
1046 | cpuid_count(op: 0x00000007, count: 1, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1047 | c->x86_capability[CPUID_7_1_EAX] = eax; |
1048 | } |
1049 | } |
1050 | |
1051 | /* Extended state features: level 0x0000000d */ |
1052 | if (c->cpuid_level >= 0x0000000d) { |
1053 | cpuid_count(op: 0x0000000d, count: 1, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1054 | |
1055 | c->x86_capability[CPUID_D_1_EAX] = eax; |
1056 | } |
1057 | |
1058 | /* AMD-defined flags: level 0x80000001 */ |
1059 | eax = cpuid_eax(op: 0x80000000); |
1060 | c->extended_cpuid_level = eax; |
1061 | |
1062 | if ((eax & 0xffff0000) == 0x80000000) { |
1063 | if (eax >= 0x80000001) { |
1064 | cpuid(op: 0x80000001, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1065 | |
1066 | c->x86_capability[CPUID_8000_0001_ECX] = ecx; |
1067 | c->x86_capability[CPUID_8000_0001_EDX] = edx; |
1068 | } |
1069 | } |
1070 | |
1071 | if (c->extended_cpuid_level >= 0x80000007) { |
1072 | cpuid(op: 0x80000007, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1073 | |
1074 | c->x86_capability[CPUID_8000_0007_EBX] = ebx; |
1075 | c->x86_power = edx; |
1076 | } |
1077 | |
1078 | if (c->extended_cpuid_level >= 0x80000008) { |
1079 | cpuid(op: 0x80000008, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1080 | c->x86_capability[CPUID_8000_0008_EBX] = ebx; |
1081 | } |
1082 | |
1083 | if (c->extended_cpuid_level >= 0x8000000a) |
1084 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(op: 0x8000000a); |
1085 | |
1086 | if (c->extended_cpuid_level >= 0x8000001f) |
1087 | c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(op: 0x8000001f); |
1088 | |
1089 | if (c->extended_cpuid_level >= 0x80000021) |
1090 | c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(op: 0x80000021); |
1091 | |
1092 | init_scattered_cpuid_features(c); |
1093 | init_speculation_control(c); |
1094 | |
1095 | /* |
1096 | * Clear/Set all flags overridden by options, after probe. |
1097 | * This needs to happen each time we re-probe, which may happen |
1098 | * several times during CPU initialization. |
1099 | */ |
1100 | apply_forced_caps(c); |
1101 | } |
1102 | |
1103 | void get_cpu_address_sizes(struct cpuinfo_x86 *c) |
1104 | { |
1105 | u32 eax, ebx, ecx, edx; |
1106 | bool vp_bits_from_cpuid = true; |
1107 | |
1108 | if (!cpu_has(c, X86_FEATURE_CPUID) || |
1109 | (c->extended_cpuid_level < 0x80000008)) |
1110 | vp_bits_from_cpuid = false; |
1111 | |
1112 | if (vp_bits_from_cpuid) { |
1113 | cpuid(op: 0x80000008, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
1114 | |
1115 | c->x86_virt_bits = (eax >> 8) & 0xff; |
1116 | c->x86_phys_bits = eax & 0xff; |
1117 | } else { |
1118 | if (IS_ENABLED(CONFIG_X86_64)) { |
1119 | c->x86_clflush_size = 64; |
1120 | c->x86_phys_bits = 36; |
1121 | c->x86_virt_bits = 48; |
1122 | } else { |
1123 | c->x86_clflush_size = 32; |
1124 | c->x86_virt_bits = 32; |
1125 | c->x86_phys_bits = 32; |
1126 | |
1127 | if (cpu_has(c, X86_FEATURE_PAE) || |
1128 | cpu_has(c, X86_FEATURE_PSE36)) |
1129 | c->x86_phys_bits = 36; |
1130 | } |
1131 | } |
1132 | c->x86_cache_bits = c->x86_phys_bits; |
1133 | c->x86_cache_alignment = c->x86_clflush_size; |
1134 | } |
1135 | |
1136 | static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
1137 | { |
1138 | #ifdef CONFIG_X86_32 |
1139 | int i; |
1140 | |
1141 | /* |
1142 | * First of all, decide if this is a 486 or higher |
1143 | * It's a 486 if we can modify the AC flag |
1144 | */ |
1145 | if (flag_is_changeable_p(X86_EFLAGS_AC)) |
1146 | c->x86 = 4; |
1147 | else |
1148 | c->x86 = 3; |
1149 | |
1150 | for (i = 0; i < X86_VENDOR_NUM; i++) |
1151 | if (cpu_devs[i] && cpu_devs[i]->c_identify) { |
1152 | c->x86_vendor_id[0] = 0; |
1153 | cpu_devs[i]->c_identify(c); |
1154 | if (c->x86_vendor_id[0]) { |
1155 | get_cpu_vendor(c); |
1156 | break; |
1157 | } |
1158 | } |
1159 | #endif |
1160 | } |
1161 | |
1162 | #define NO_SPECULATION BIT(0) |
1163 | #define NO_MELTDOWN BIT(1) |
1164 | #define NO_SSB BIT(2) |
1165 | #define NO_L1TF BIT(3) |
1166 | #define NO_MDS BIT(4) |
1167 | #define MSBDS_ONLY BIT(5) |
1168 | #define NO_SWAPGS BIT(6) |
1169 | #define NO_ITLB_MULTIHIT BIT(7) |
1170 | #define NO_SPECTRE_V2 BIT(8) |
1171 | #define NO_MMIO BIT(9) |
1172 | #define NO_EIBRS_PBRSB BIT(10) |
1173 | |
1174 | #define VULNWL(vendor, family, model, whitelist) \ |
1175 | X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) |
1176 | |
1177 | #define VULNWL_INTEL(model, whitelist) \ |
1178 | VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) |
1179 | |
1180 | #define VULNWL_AMD(family, whitelist) \ |
1181 | VULNWL(AMD, family, X86_MODEL_ANY, whitelist) |
1182 | |
1183 | #define VULNWL_HYGON(family, whitelist) \ |
1184 | VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) |
1185 | |
1186 | static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { |
1187 | VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), |
1188 | VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), |
1189 | VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), |
1190 | VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), |
1191 | VULNWL(VORTEX, 5, X86_MODEL_ANY, NO_SPECULATION), |
1192 | VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION), |
1193 | |
1194 | /* Intel Family 6 */ |
1195 | VULNWL_INTEL(TIGERLAKE, NO_MMIO), |
1196 | VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), |
1197 | VULNWL_INTEL(ALDERLAKE, NO_MMIO), |
1198 | VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), |
1199 | |
1200 | VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1201 | VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1202 | VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1203 | VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1204 | VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), |
1205 | |
1206 | VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1207 | VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1208 | VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1209 | VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1210 | VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1211 | VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1212 | |
1213 | VULNWL_INTEL(CORE_YONAH, NO_SSB), |
1214 | |
1215 | VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1216 | VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), |
1217 | |
1218 | VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), |
1219 | VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), |
1220 | VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), |
1221 | |
1222 | /* |
1223 | * Technically, swapgs isn't serializing on AMD (despite it previously |
1224 | * being documented as such in the APM). But according to AMD, %gs is |
1225 | * updated non-speculatively, and the issuing of %gs-relative memory |
1226 | * operands will be blocked until the %gs update completes, which is |
1227 | * good enough for our purposes. |
1228 | */ |
1229 | |
1230 | VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), |
1231 | VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), |
1232 | VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), |
1233 | |
1234 | /* AMD Family 0xf - 0x12 */ |
1235 | VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), |
1236 | VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), |
1237 | VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), |
1238 | VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), |
1239 | |
1240 | /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ |
1241 | VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), |
1242 | VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), |
1243 | |
1244 | /* Zhaoxin Family 7 */ |
1245 | VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), |
1246 | VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), |
1247 | {} |
1248 | }; |
1249 | |
1250 | #define VULNBL(vendor, family, model, blacklist) \ |
1251 | X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) |
1252 | |
1253 | #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ |
1254 | X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ |
1255 | INTEL_FAM6_##model, steppings, \ |
1256 | X86_FEATURE_ANY, issues) |
1257 | |
1258 | #define VULNBL_AMD(family, blacklist) \ |
1259 | VULNBL(AMD, family, X86_MODEL_ANY, blacklist) |
1260 | |
1261 | #define VULNBL_HYGON(family, blacklist) \ |
1262 | VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) |
1263 | |
1264 | #define SRBDS BIT(0) |
1265 | /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ |
1266 | #define MMIO BIT(1) |
1267 | /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ |
1268 | #define MMIO_SBDS BIT(2) |
1269 | /* CPU is affected by RETbleed, speculating where you would not expect it */ |
1270 | #define RETBLEED BIT(3) |
1271 | /* CPU is affected by SMT (cross-thread) return predictions */ |
1272 | #define SMT_RSB BIT(4) |
1273 | /* CPU is affected by SRSO */ |
1274 | #define SRSO BIT(5) |
1275 | /* CPU is affected by GDS */ |
1276 | #define GDS BIT(6) |
1277 | |
1278 | static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { |
1279 | VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), |
1280 | VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), |
1281 | VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), |
1282 | VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), |
1283 | VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), |
1284 | VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), |
1285 | VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), |
1286 | VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), |
1287 | VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), |
1288 | VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), |
1289 | VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), |
1290 | VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), |
1291 | VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), |
1292 | VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), |
1293 | VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), |
1294 | VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), |
1295 | VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), |
1296 | VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), |
1297 | VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), |
1298 | VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), |
1299 | VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), |
1300 | VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), |
1301 | VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), |
1302 | VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), |
1303 | VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), |
1304 | VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), |
1305 | VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), |
1306 | VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), |
1307 | |
1308 | VULNBL_AMD(0x15, RETBLEED), |
1309 | VULNBL_AMD(0x16, RETBLEED), |
1310 | VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), |
1311 | VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), |
1312 | VULNBL_AMD(0x19, SRSO), |
1313 | {} |
1314 | }; |
1315 | |
1316 | static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which) |
1317 | { |
1318 | const struct x86_cpu_id *m = x86_match_cpu(match: table); |
1319 | |
1320 | return m && !!(m->driver_data & which); |
1321 | } |
1322 | |
1323 | u64 x86_read_arch_cap_msr(void) |
1324 | { |
1325 | u64 ia32_cap = 0; |
1326 | |
1327 | if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) |
1328 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); |
1329 | |
1330 | return ia32_cap; |
1331 | } |
1332 | |
1333 | static bool arch_cap_mmio_immune(u64 ia32_cap) |
1334 | { |
1335 | return (ia32_cap & ARCH_CAP_FBSDP_NO && |
1336 | ia32_cap & ARCH_CAP_PSDP_NO && |
1337 | ia32_cap & ARCH_CAP_SBDR_SSDP_NO); |
1338 | } |
1339 | |
1340 | static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
1341 | { |
1342 | u64 ia32_cap = x86_read_arch_cap_msr(); |
1343 | |
1344 | /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ |
1345 | if (!cpu_matches(table: cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && |
1346 | !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) |
1347 | setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); |
1348 | |
1349 | if (cpu_matches(table: cpu_vuln_whitelist, NO_SPECULATION)) |
1350 | return; |
1351 | |
1352 | setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1353 | |
1354 | if (!cpu_matches(table: cpu_vuln_whitelist, NO_SPECTRE_V2)) |
1355 | setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1356 | |
1357 | if (!cpu_matches(table: cpu_vuln_whitelist, NO_SSB) && |
1358 | !(ia32_cap & ARCH_CAP_SSB_NO) && |
1359 | !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) |
1360 | setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); |
1361 | |
1362 | /* |
1363 | * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature |
1364 | * flag and protect from vendor-specific bugs via the whitelist. |
1365 | */ |
1366 | if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) { |
1367 | setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); |
1368 | if (!cpu_matches(table: cpu_vuln_whitelist, NO_EIBRS_PBRSB) && |
1369 | !(ia32_cap & ARCH_CAP_PBRSB_NO)) |
1370 | setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); |
1371 | } |
1372 | |
1373 | if (!cpu_matches(table: cpu_vuln_whitelist, NO_MDS) && |
1374 | !(ia32_cap & ARCH_CAP_MDS_NO)) { |
1375 | setup_force_cpu_bug(X86_BUG_MDS); |
1376 | if (cpu_matches(table: cpu_vuln_whitelist, MSBDS_ONLY)) |
1377 | setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); |
1378 | } |
1379 | |
1380 | if (!cpu_matches(table: cpu_vuln_whitelist, NO_SWAPGS)) |
1381 | setup_force_cpu_bug(X86_BUG_SWAPGS); |
1382 | |
1383 | /* |
1384 | * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: |
1385 | * - TSX is supported or |
1386 | * - TSX_CTRL is present |
1387 | * |
1388 | * TSX_CTRL check is needed for cases when TSX could be disabled before |
1389 | * the kernel boot e.g. kexec. |
1390 | * TSX_CTRL check alone is not sufficient for cases when the microcode |
1391 | * update is not present or running as guest that don't get TSX_CTRL. |
1392 | */ |
1393 | if (!(ia32_cap & ARCH_CAP_TAA_NO) && |
1394 | (cpu_has(c, X86_FEATURE_RTM) || |
1395 | (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) |
1396 | setup_force_cpu_bug(X86_BUG_TAA); |
1397 | |
1398 | /* |
1399 | * SRBDS affects CPUs which support RDRAND or RDSEED and are listed |
1400 | * in the vulnerability blacklist. |
1401 | * |
1402 | * Some of the implications and mitigation of Shared Buffers Data |
1403 | * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as |
1404 | * SRBDS. |
1405 | */ |
1406 | if ((cpu_has(c, X86_FEATURE_RDRAND) || |
1407 | cpu_has(c, X86_FEATURE_RDSEED)) && |
1408 | cpu_matches(table: cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) |
1409 | setup_force_cpu_bug(X86_BUG_SRBDS); |
1410 | |
1411 | /* |
1412 | * Processor MMIO Stale Data bug enumeration |
1413 | * |
1414 | * Affected CPU list is generally enough to enumerate the vulnerability, |
1415 | * but for virtualization case check for ARCH_CAP MSR bits also, VMM may |
1416 | * not want the guest to enumerate the bug. |
1417 | * |
1418 | * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, |
1419 | * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. |
1420 | */ |
1421 | if (!arch_cap_mmio_immune(ia32_cap)) { |
1422 | if (cpu_matches(table: cpu_vuln_blacklist, MMIO)) |
1423 | setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); |
1424 | else if (!cpu_matches(table: cpu_vuln_whitelist, NO_MMIO)) |
1425 | setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); |
1426 | } |
1427 | |
1428 | if (!cpu_has(c, X86_FEATURE_BTC_NO)) { |
1429 | if (cpu_matches(table: cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) |
1430 | setup_force_cpu_bug(X86_BUG_RETBLEED); |
1431 | } |
1432 | |
1433 | if (cpu_matches(table: cpu_vuln_blacklist, SMT_RSB)) |
1434 | setup_force_cpu_bug(X86_BUG_SMT_RSB); |
1435 | |
1436 | if (!cpu_has(c, X86_FEATURE_SRSO_NO)) { |
1437 | if (cpu_matches(table: cpu_vuln_blacklist, SRSO)) |
1438 | setup_force_cpu_bug(X86_BUG_SRSO); |
1439 | } |
1440 | |
1441 | /* |
1442 | * Check if CPU is vulnerable to GDS. If running in a virtual machine on |
1443 | * an affected processor, the VMM may have disabled the use of GATHER by |
1444 | * disabling AVX2. The only way to do this in HW is to clear XCR0[2], |
1445 | * which means that AVX will be disabled. |
1446 | */ |
1447 | if (cpu_matches(table: cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && |
1448 | boot_cpu_has(X86_FEATURE_AVX)) |
1449 | setup_force_cpu_bug(X86_BUG_GDS); |
1450 | |
1451 | if (cpu_matches(table: cpu_vuln_whitelist, NO_MELTDOWN)) |
1452 | return; |
1453 | |
1454 | /* Rogue Data Cache Load? No! */ |
1455 | if (ia32_cap & ARCH_CAP_RDCL_NO) |
1456 | return; |
1457 | |
1458 | setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1459 | |
1460 | if (cpu_matches(table: cpu_vuln_whitelist, NO_L1TF)) |
1461 | return; |
1462 | |
1463 | setup_force_cpu_bug(X86_BUG_L1TF); |
1464 | } |
1465 | |
1466 | /* |
1467 | * The NOPL instruction is supposed to exist on all CPUs of family >= 6; |
1468 | * unfortunately, that's not true in practice because of early VIA |
1469 | * chips and (more importantly) broken virtualizers that are not easy |
1470 | * to detect. In the latter case it doesn't even *fail* reliably, so |
1471 | * probing for it doesn't even work. Disable it completely on 32-bit |
1472 | * unless we can find a reliable way to detect all the broken cases. |
1473 | * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). |
1474 | */ |
1475 | static void detect_nopl(void) |
1476 | { |
1477 | #ifdef CONFIG_X86_32 |
1478 | setup_clear_cpu_cap(X86_FEATURE_NOPL); |
1479 | #else |
1480 | setup_force_cpu_cap(X86_FEATURE_NOPL); |
1481 | #endif |
1482 | } |
1483 | |
1484 | /* |
1485 | * We parse cpu parameters early because fpu__init_system() is executed |
1486 | * before parse_early_param(). |
1487 | */ |
1488 | static void __init cpu_parse_early_param(void) |
1489 | { |
1490 | char arg[128]; |
1491 | char *argptr = arg, *opt; |
1492 | int arglen, taint = 0; |
1493 | |
1494 | #ifdef CONFIG_X86_32 |
1495 | if (cmdline_find_option_bool(boot_command_line, "no387" )) |
1496 | #ifdef CONFIG_MATH_EMULATION |
1497 | setup_clear_cpu_cap(X86_FEATURE_FPU); |
1498 | #else |
1499 | pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n" ); |
1500 | #endif |
1501 | |
1502 | if (cmdline_find_option_bool(boot_command_line, "nofxsr" )) |
1503 | setup_clear_cpu_cap(X86_FEATURE_FXSR); |
1504 | #endif |
1505 | |
1506 | if (cmdline_find_option_bool(cmdline_ptr: boot_command_line, option: "noxsave" )) |
1507 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
1508 | |
1509 | if (cmdline_find_option_bool(cmdline_ptr: boot_command_line, option: "noxsaveopt" )) |
1510 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
1511 | |
1512 | if (cmdline_find_option_bool(cmdline_ptr: boot_command_line, option: "noxsaves" )) |
1513 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
1514 | |
1515 | if (cmdline_find_option_bool(cmdline_ptr: boot_command_line, option: "nousershstk" )) |
1516 | setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK); |
1517 | |
1518 | arglen = cmdline_find_option(cmdline_ptr: boot_command_line, option: "clearcpuid" , buffer: arg, bufsize: sizeof(arg)); |
1519 | if (arglen <= 0) |
1520 | return; |
1521 | |
1522 | pr_info("Clearing CPUID bits:" ); |
1523 | |
1524 | while (argptr) { |
1525 | bool found __maybe_unused = false; |
1526 | unsigned int bit; |
1527 | |
1528 | opt = strsep(&argptr, "," ); |
1529 | |
1530 | /* |
1531 | * Handle naked numbers first for feature flags which don't |
1532 | * have names. |
1533 | */ |
1534 | if (!kstrtouint(s: opt, base: 10, res: &bit)) { |
1535 | if (bit < NCAPINTS * 32) { |
1536 | |
1537 | /* empty-string, i.e., ""-defined feature flags */ |
1538 | if (!x86_cap_flags[bit]) |
1539 | pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit)); |
1540 | else |
1541 | pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); |
1542 | |
1543 | setup_clear_cpu_cap(bit); |
1544 | taint++; |
1545 | } |
1546 | /* |
1547 | * The assumption is that there are no feature names with only |
1548 | * numbers in the name thus go to the next argument. |
1549 | */ |
1550 | continue; |
1551 | } |
1552 | |
1553 | for (bit = 0; bit < 32 * NCAPINTS; bit++) { |
1554 | if (!x86_cap_flag(bit)) |
1555 | continue; |
1556 | |
1557 | if (strcmp(x86_cap_flag(bit), opt)) |
1558 | continue; |
1559 | |
1560 | pr_cont(" %s" , opt); |
1561 | setup_clear_cpu_cap(bit); |
1562 | taint++; |
1563 | found = true; |
1564 | break; |
1565 | } |
1566 | |
1567 | if (!found) |
1568 | pr_cont(" (unknown: %s)" , opt); |
1569 | } |
1570 | pr_cont("\n" ); |
1571 | |
1572 | if (taint) |
1573 | add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); |
1574 | } |
1575 | |
1576 | /* |
1577 | * Do minimum CPU detection early. |
1578 | * Fields really needed: vendor, cpuid_level, family, model, mask, |
1579 | * cache alignment. |
1580 | * The others are not touched to avoid unwanted side effects. |
1581 | * |
1582 | * WARNING: this function is only called on the boot CPU. Don't add code |
1583 | * here that is supposed to run on all CPUs. |
1584 | */ |
1585 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
1586 | { |
1587 | memset(&c->x86_capability, 0, sizeof(c->x86_capability)); |
1588 | c->extended_cpuid_level = 0; |
1589 | |
1590 | if (!have_cpuid_p()) |
1591 | identify_cpu_without_cpuid(c); |
1592 | |
1593 | /* cyrix could have cpuid enabled via c_identify()*/ |
1594 | if (have_cpuid_p()) { |
1595 | cpu_detect(c); |
1596 | get_cpu_vendor(c); |
1597 | get_cpu_cap(c); |
1598 | setup_force_cpu_cap(X86_FEATURE_CPUID); |
1599 | cpu_parse_early_param(); |
1600 | |
1601 | if (this_cpu->c_early_init) |
1602 | this_cpu->c_early_init(c); |
1603 | |
1604 | c->cpu_index = 0; |
1605 | filter_cpuid_features(c, warn: false); |
1606 | |
1607 | if (this_cpu->c_bsp_init) |
1608 | this_cpu->c_bsp_init(c); |
1609 | } else { |
1610 | setup_clear_cpu_cap(X86_FEATURE_CPUID); |
1611 | } |
1612 | |
1613 | get_cpu_address_sizes(c); |
1614 | |
1615 | setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
1616 | |
1617 | cpu_set_bug_bits(c); |
1618 | |
1619 | sld_setup(c); |
1620 | |
1621 | #ifdef CONFIG_X86_32 |
1622 | /* |
1623 | * Regardless of whether PCID is enumerated, the SDM says |
1624 | * that it can't be enabled in 32-bit mode. |
1625 | */ |
1626 | setup_clear_cpu_cap(X86_FEATURE_PCID); |
1627 | #endif |
1628 | |
1629 | /* |
1630 | * Later in the boot process pgtable_l5_enabled() relies on |
1631 | * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not |
1632 | * enabled by this point we need to clear the feature bit to avoid |
1633 | * false-positives at the later stage. |
1634 | * |
1635 | * pgtable_l5_enabled() can be false here for several reasons: |
1636 | * - 5-level paging is disabled compile-time; |
1637 | * - it's 32-bit kernel; |
1638 | * - machine doesn't support 5-level paging; |
1639 | * - user specified 'no5lvl' in kernel command line. |
1640 | */ |
1641 | if (!pgtable_l5_enabled()) |
1642 | setup_clear_cpu_cap(X86_FEATURE_LA57); |
1643 | |
1644 | detect_nopl(); |
1645 | } |
1646 | |
1647 | void __init early_cpu_init(void) |
1648 | { |
1649 | const struct cpu_dev *const *cdev; |
1650 | int count = 0; |
1651 | |
1652 | #ifdef CONFIG_PROCESSOR_SELECT |
1653 | pr_info("KERNEL supported cpus:\n" ); |
1654 | #endif |
1655 | |
1656 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
1657 | const struct cpu_dev *cpudev = *cdev; |
1658 | |
1659 | if (count >= X86_VENDOR_NUM) |
1660 | break; |
1661 | cpu_devs[count] = cpudev; |
1662 | count++; |
1663 | |
1664 | #ifdef CONFIG_PROCESSOR_SELECT |
1665 | { |
1666 | unsigned int j; |
1667 | |
1668 | for (j = 0; j < 2; j++) { |
1669 | if (!cpudev->c_ident[j]) |
1670 | continue; |
1671 | pr_info(" %s %s\n" , cpudev->c_vendor, |
1672 | cpudev->c_ident[j]); |
1673 | } |
1674 | } |
1675 | #endif |
1676 | } |
1677 | early_identify_cpu(c: &boot_cpu_data); |
1678 | } |
1679 | |
1680 | static bool detect_null_seg_behavior(void) |
1681 | { |
1682 | /* |
1683 | * Empirically, writing zero to a segment selector on AMD does |
1684 | * not clear the base, whereas writing zero to a segment |
1685 | * selector on Intel does clear the base. Intel's behavior |
1686 | * allows slightly faster context switches in the common case |
1687 | * where GS is unused by the prev and next threads. |
1688 | * |
1689 | * Since neither vendor documents this anywhere that I can see, |
1690 | * detect it directly instead of hard-coding the choice by |
1691 | * vendor. |
1692 | * |
1693 | * I've designated AMD's behavior as the "bug" because it's |
1694 | * counterintuitive and less friendly. |
1695 | */ |
1696 | |
1697 | unsigned long old_base, tmp; |
1698 | rdmsrl(MSR_FS_BASE, old_base); |
1699 | wrmsrl(MSR_FS_BASE, val: 1); |
1700 | loadsegment(fs, 0); |
1701 | rdmsrl(MSR_FS_BASE, tmp); |
1702 | wrmsrl(MSR_FS_BASE, val: old_base); |
1703 | return tmp == 0; |
1704 | } |
1705 | |
1706 | void check_null_seg_clears_base(struct cpuinfo_x86 *c) |
1707 | { |
1708 | /* BUG_NULL_SEG is only relevant with 64bit userspace */ |
1709 | if (!IS_ENABLED(CONFIG_X86_64)) |
1710 | return; |
1711 | |
1712 | if (cpu_has(c, X86_FEATURE_NULL_SEL_CLR_BASE)) |
1713 | return; |
1714 | |
1715 | /* |
1716 | * CPUID bit above wasn't set. If this kernel is still running |
1717 | * as a HV guest, then the HV has decided not to advertize |
1718 | * that CPUID bit for whatever reason. For example, one |
1719 | * member of the migration pool might be vulnerable. Which |
1720 | * means, the bug is present: set the BUG flag and return. |
1721 | */ |
1722 | if (cpu_has(c, X86_FEATURE_HYPERVISOR)) { |
1723 | set_cpu_bug(c, X86_BUG_NULL_SEG); |
1724 | return; |
1725 | } |
1726 | |
1727 | /* |
1728 | * Zen2 CPUs also have this behaviour, but no CPUID bit. |
1729 | * 0x18 is the respective family for Hygon. |
1730 | */ |
1731 | if ((c->x86 == 0x17 || c->x86 == 0x18) && |
1732 | detect_null_seg_behavior()) |
1733 | return; |
1734 | |
1735 | /* All the remaining ones are affected */ |
1736 | set_cpu_bug(c, X86_BUG_NULL_SEG); |
1737 | } |
1738 | |
1739 | static void generic_identify(struct cpuinfo_x86 *c) |
1740 | { |
1741 | c->extended_cpuid_level = 0; |
1742 | |
1743 | if (!have_cpuid_p()) |
1744 | identify_cpu_without_cpuid(c); |
1745 | |
1746 | /* cyrix could have cpuid enabled via c_identify()*/ |
1747 | if (!have_cpuid_p()) |
1748 | return; |
1749 | |
1750 | cpu_detect(c); |
1751 | |
1752 | get_cpu_vendor(c); |
1753 | |
1754 | get_cpu_cap(c); |
1755 | |
1756 | get_cpu_address_sizes(c); |
1757 | |
1758 | if (c->cpuid_level >= 0x00000001) { |
1759 | c->topo.initial_apicid = (cpuid_ebx(op: 1) >> 24) & 0xFF; |
1760 | #ifdef CONFIG_X86_32 |
1761 | # ifdef CONFIG_SMP |
1762 | c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); |
1763 | # else |
1764 | c->topo.apicid = c->topo.initial_apicid; |
1765 | # endif |
1766 | #endif |
1767 | c->topo.pkg_id = c->topo.initial_apicid; |
1768 | } |
1769 | |
1770 | get_model_name(c); /* Default name */ |
1771 | |
1772 | /* |
1773 | * ESPFIX is a strange bug. All real CPUs have it. Paravirt |
1774 | * systems that run Linux at CPL > 0 may or may not have the |
1775 | * issue, but, even if they have the issue, there's absolutely |
1776 | * nothing we can do about it because we can't use the real IRET |
1777 | * instruction. |
1778 | * |
1779 | * NB: For the time being, only 32-bit kernels support |
1780 | * X86_BUG_ESPFIX as such. 64-bit kernels directly choose |
1781 | * whether to apply espfix using paravirt hooks. If any |
1782 | * non-paravirt system ever shows up that does *not* have the |
1783 | * ESPFIX issue, we can change this. |
1784 | */ |
1785 | #ifdef CONFIG_X86_32 |
1786 | set_cpu_bug(c, X86_BUG_ESPFIX); |
1787 | #endif |
1788 | } |
1789 | |
1790 | /* |
1791 | * Validate that ACPI/mptables have the same information about the |
1792 | * effective APIC id and update the package map. |
1793 | */ |
1794 | static void validate_apic_and_package_id(struct cpuinfo_x86 *c) |
1795 | { |
1796 | #ifdef CONFIG_SMP |
1797 | unsigned int cpu = smp_processor_id(); |
1798 | u32 apicid; |
1799 | |
1800 | apicid = apic->cpu_present_to_apicid(cpu); |
1801 | |
1802 | if (apicid != c->topo.apicid) { |
1803 | pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n" , |
1804 | cpu, apicid, c->topo.initial_apicid); |
1805 | } |
1806 | BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); |
1807 | BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); |
1808 | #else |
1809 | c->topo.logical_pkg_id = 0; |
1810 | #endif |
1811 | } |
1812 | |
1813 | /* |
1814 | * This does the hard work of actually picking apart the CPU stuff... |
1815 | */ |
1816 | static void identify_cpu(struct cpuinfo_x86 *c) |
1817 | { |
1818 | int i; |
1819 | |
1820 | c->loops_per_jiffy = loops_per_jiffy; |
1821 | c->x86_cache_size = 0; |
1822 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
1823 | c->x86_model = c->x86_stepping = 0; /* So far unknown... */ |
1824 | c->x86_vendor_id[0] = '\0'; /* Unset */ |
1825 | c->x86_model_id[0] = '\0'; /* Unset */ |
1826 | c->x86_max_cores = 1; |
1827 | c->x86_coreid_bits = 0; |
1828 | c->topo.cu_id = 0xff; |
1829 | c->topo.llc_id = BAD_APICID; |
1830 | c->topo.l2c_id = BAD_APICID; |
1831 | #ifdef CONFIG_X86_64 |
1832 | c->x86_clflush_size = 64; |
1833 | c->x86_phys_bits = 36; |
1834 | c->x86_virt_bits = 48; |
1835 | #else |
1836 | c->cpuid_level = -1; /* CPUID not detected */ |
1837 | c->x86_clflush_size = 32; |
1838 | c->x86_phys_bits = 32; |
1839 | c->x86_virt_bits = 32; |
1840 | #endif |
1841 | c->x86_cache_alignment = c->x86_clflush_size; |
1842 | memset(&c->x86_capability, 0, sizeof(c->x86_capability)); |
1843 | #ifdef CONFIG_X86_VMX_FEATURE_NAMES |
1844 | memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); |
1845 | #endif |
1846 | |
1847 | generic_identify(c); |
1848 | |
1849 | if (this_cpu->c_identify) |
1850 | this_cpu->c_identify(c); |
1851 | |
1852 | /* Clear/Set all flags overridden by options, after probe */ |
1853 | apply_forced_caps(c); |
1854 | |
1855 | #ifdef CONFIG_X86_64 |
1856 | c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); |
1857 | #endif |
1858 | |
1859 | /* |
1860 | * Vendor-specific initialization. In this section we |
1861 | * canonicalize the feature flags, meaning if there are |
1862 | * features a certain CPU supports which CPUID doesn't |
1863 | * tell us, CPUID claiming incorrect flags, or other bugs, |
1864 | * we handle them here. |
1865 | * |
1866 | * At the end of this section, c->x86_capability better |
1867 | * indicate the features this CPU genuinely supports! |
1868 | */ |
1869 | if (this_cpu->c_init) |
1870 | this_cpu->c_init(c); |
1871 | |
1872 | /* Disable the PN if appropriate */ |
1873 | squash_the_stupid_serial_number(c); |
1874 | |
1875 | /* Set up SMEP/SMAP/UMIP */ |
1876 | setup_smep(c); |
1877 | setup_smap(c); |
1878 | setup_umip(c); |
1879 | |
1880 | /* Enable FSGSBASE instructions if available. */ |
1881 | if (cpu_has(c, X86_FEATURE_FSGSBASE)) { |
1882 | cr4_set_bits(X86_CR4_FSGSBASE); |
1883 | elf_hwcap2 |= HWCAP2_FSGSBASE; |
1884 | } |
1885 | |
1886 | /* |
1887 | * The vendor-specific functions might have changed features. |
1888 | * Now we do "generic changes." |
1889 | */ |
1890 | |
1891 | /* Filter out anything that depends on CPUID levels we don't have */ |
1892 | filter_cpuid_features(c, warn: true); |
1893 | |
1894 | /* If the model name is still unset, do table lookup. */ |
1895 | if (!c->x86_model_id[0]) { |
1896 | const char *p; |
1897 | p = table_lookup_model(c); |
1898 | if (p) |
1899 | strcpy(p: c->x86_model_id, q: p); |
1900 | else |
1901 | /* Last resort... */ |
1902 | sprintf(buf: c->x86_model_id, fmt: "%02x/%02x" , |
1903 | c->x86, c->x86_model); |
1904 | } |
1905 | |
1906 | #ifdef CONFIG_X86_64 |
1907 | detect_ht(c); |
1908 | #endif |
1909 | |
1910 | x86_init_rdrand(c); |
1911 | setup_pku(c); |
1912 | setup_cet(c); |
1913 | |
1914 | /* |
1915 | * Clear/Set all flags overridden by options, need do it |
1916 | * before following smp all cpus cap AND. |
1917 | */ |
1918 | apply_forced_caps(c); |
1919 | |
1920 | /* |
1921 | * On SMP, boot_cpu_data holds the common feature set between |
1922 | * all CPUs; so make sure that we indicate which features are |
1923 | * common between the CPUs. The first time this routine gets |
1924 | * executed, c == &boot_cpu_data. |
1925 | */ |
1926 | if (c != &boot_cpu_data) { |
1927 | /* AND the already accumulated flags with these */ |
1928 | for (i = 0; i < NCAPINTS; i++) |
1929 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
1930 | |
1931 | /* OR, i.e. replicate the bug flags */ |
1932 | for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++) |
1933 | c->x86_capability[i] |= boot_cpu_data.x86_capability[i]; |
1934 | } |
1935 | |
1936 | ppin_init(c); |
1937 | |
1938 | /* Init Machine Check Exception if available. */ |
1939 | mcheck_cpu_init(c); |
1940 | |
1941 | select_idle_routine(c); |
1942 | |
1943 | #ifdef CONFIG_NUMA |
1944 | numa_add_cpu(smp_processor_id()); |
1945 | #endif |
1946 | } |
1947 | |
1948 | /* |
1949 | * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions |
1950 | * on 32-bit kernels: |
1951 | */ |
1952 | #ifdef CONFIG_X86_32 |
1953 | void enable_sep_cpu(void) |
1954 | { |
1955 | struct tss_struct *tss; |
1956 | int cpu; |
1957 | |
1958 | if (!boot_cpu_has(X86_FEATURE_SEP)) |
1959 | return; |
1960 | |
1961 | cpu = get_cpu(); |
1962 | tss = &per_cpu(cpu_tss_rw, cpu); |
1963 | |
1964 | /* |
1965 | * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- |
1966 | * see the big comment in struct x86_hw_tss's definition. |
1967 | */ |
1968 | |
1969 | tss->x86_tss.ss1 = __KERNEL_CS; |
1970 | wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); |
1971 | wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); |
1972 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); |
1973 | |
1974 | put_cpu(); |
1975 | } |
1976 | #endif |
1977 | |
1978 | static __init void identify_boot_cpu(void) |
1979 | { |
1980 | identify_cpu(c: &boot_cpu_data); |
1981 | if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) |
1982 | pr_info("CET detected: Indirect Branch Tracking enabled\n" ); |
1983 | #ifdef CONFIG_X86_32 |
1984 | enable_sep_cpu(); |
1985 | #endif |
1986 | cpu_detect_tlb(c: &boot_cpu_data); |
1987 | setup_cr_pinning(); |
1988 | |
1989 | tsx_init(); |
1990 | lkgs_init(); |
1991 | } |
1992 | |
1993 | void identify_secondary_cpu(struct cpuinfo_x86 *c) |
1994 | { |
1995 | BUG_ON(c == &boot_cpu_data); |
1996 | identify_cpu(c); |
1997 | #ifdef CONFIG_X86_32 |
1998 | enable_sep_cpu(); |
1999 | #endif |
2000 | validate_apic_and_package_id(c); |
2001 | x86_spec_ctrl_setup_ap(); |
2002 | update_srbds_msr(); |
2003 | if (boot_cpu_has_bug(X86_BUG_GDS)) |
2004 | update_gds_msr(); |
2005 | |
2006 | tsx_ap_init(); |
2007 | } |
2008 | |
2009 | void print_cpu_info(struct cpuinfo_x86 *c) |
2010 | { |
2011 | const char *vendor = NULL; |
2012 | |
2013 | if (c->x86_vendor < X86_VENDOR_NUM) { |
2014 | vendor = this_cpu->c_vendor; |
2015 | } else { |
2016 | if (c->cpuid_level >= 0) |
2017 | vendor = c->x86_vendor_id; |
2018 | } |
2019 | |
2020 | if (vendor && !strstr(c->x86_model_id, vendor)) |
2021 | pr_cont("%s " , vendor); |
2022 | |
2023 | if (c->x86_model_id[0]) |
2024 | pr_cont("%s" , c->x86_model_id); |
2025 | else |
2026 | pr_cont("%d86" , c->x86); |
2027 | |
2028 | pr_cont(" (family: 0x%x, model: 0x%x" , c->x86, c->x86_model); |
2029 | |
2030 | if (c->x86_stepping || c->cpuid_level >= 0) |
2031 | pr_cont(", stepping: 0x%x)\n" , c->x86_stepping); |
2032 | else |
2033 | pr_cont(")\n" ); |
2034 | } |
2035 | |
2036 | /* |
2037 | * clearcpuid= was already parsed in cpu_parse_early_param(). This dummy |
2038 | * function prevents it from becoming an environment variable for init. |
2039 | */ |
2040 | static __init int setup_clearcpuid(char *arg) |
2041 | { |
2042 | return 1; |
2043 | } |
2044 | __setup("clearcpuid=" , setup_clearcpuid); |
2045 | |
2046 | DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { |
2047 | .current_task = &init_task, |
2048 | .preempt_count = INIT_PREEMPT_COUNT, |
2049 | .top_of_stack = TOP_OF_INIT_STACK, |
2050 | }; |
2051 | EXPORT_PER_CPU_SYMBOL(pcpu_hot); |
2052 | |
2053 | #ifdef CONFIG_X86_64 |
2054 | DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, |
2055 | fixed_percpu_data) __aligned(PAGE_SIZE) __visible; |
2056 | EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); |
2057 | |
2058 | static void wrmsrl_cstar(unsigned long val) |
2059 | { |
2060 | /* |
2061 | * Intel CPUs do not support 32-bit SYSCALL. Writing to MSR_CSTAR |
2062 | * is so far ignored by the CPU, but raises a #VE trap in a TDX |
2063 | * guest. Avoid the pointless write on all Intel CPUs. |
2064 | */ |
2065 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
2066 | wrmsrl(MSR_CSTAR, val); |
2067 | } |
2068 | |
2069 | /* May not be marked __init: used by software suspend */ |
2070 | void syscall_init(void) |
2071 | { |
2072 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); |
2073 | wrmsrl(MSR_LSTAR, val: (unsigned long)entry_SYSCALL_64); |
2074 | |
2075 | if (ia32_enabled()) { |
2076 | wrmsrl_cstar(val: (unsigned long)entry_SYSCALL_compat); |
2077 | /* |
2078 | * This only works on Intel CPUs. |
2079 | * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. |
2080 | * This does not cause SYSENTER to jump to the wrong location, because |
2081 | * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). |
2082 | */ |
2083 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, val: (u64)__KERNEL_CS); |
2084 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, |
2085 | val: (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); |
2086 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, val: (u64)entry_SYSENTER_compat); |
2087 | } else { |
2088 | wrmsrl_cstar(val: (unsigned long)entry_SYSCALL32_ignore); |
2089 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, val: (u64)GDT_ENTRY_INVALID_SEG); |
2090 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, val: 0ULL); |
2091 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, val: 0ULL); |
2092 | } |
2093 | |
2094 | /* |
2095 | * Flags to clear on syscall; clear as much as possible |
2096 | * to minimize user space-kernel interference. |
2097 | */ |
2098 | wrmsrl(MSR_SYSCALL_MASK, |
2099 | X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| |
2100 | X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF| |
2101 | X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF| |
2102 | X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_RF| |
2103 | X86_EFLAGS_AC|X86_EFLAGS_ID); |
2104 | } |
2105 | |
2106 | #else /* CONFIG_X86_64 */ |
2107 | |
2108 | #ifdef CONFIG_STACKPROTECTOR |
2109 | DEFINE_PER_CPU(unsigned long, __stack_chk_guard); |
2110 | EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); |
2111 | #endif |
2112 | |
2113 | #endif /* CONFIG_X86_64 */ |
2114 | |
2115 | /* |
2116 | * Clear all 6 debug registers: |
2117 | */ |
2118 | static void clear_all_debug_regs(void) |
2119 | { |
2120 | int i; |
2121 | |
2122 | for (i = 0; i < 8; i++) { |
2123 | /* Ignore db4, db5 */ |
2124 | if ((i == 4) || (i == 5)) |
2125 | continue; |
2126 | |
2127 | set_debugreg(val: 0, reg: i); |
2128 | } |
2129 | } |
2130 | |
2131 | #ifdef CONFIG_KGDB |
2132 | /* |
2133 | * Restore debug regs if using kgdbwait and you have a kernel debugger |
2134 | * connection established. |
2135 | */ |
2136 | static void dbg_restore_debug_regs(void) |
2137 | { |
2138 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) |
2139 | arch_kgdb_ops.correct_hw_break(); |
2140 | } |
2141 | #else /* ! CONFIG_KGDB */ |
2142 | #define dbg_restore_debug_regs() |
2143 | #endif /* ! CONFIG_KGDB */ |
2144 | |
2145 | static inline void setup_getcpu(int cpu) |
2146 | { |
2147 | unsigned long cpudata = vdso_encode_cpunode(cpu, node: early_cpu_to_node(cpu)); |
2148 | struct desc_struct d = { }; |
2149 | |
2150 | if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID)) |
2151 | wrmsr(MSR_TSC_AUX, cpudata, 0); |
2152 | |
2153 | /* Store CPU and node number in limit. */ |
2154 | d.limit0 = cpudata; |
2155 | d.limit1 = cpudata >> 16; |
2156 | |
2157 | d.type = 5; /* RO data, expand down, accessed */ |
2158 | d.dpl = 3; /* Visible to user code */ |
2159 | d.s = 1; /* Not a system segment */ |
2160 | d.p = 1; /* Present */ |
2161 | d.d = 1; /* 32-bit */ |
2162 | |
2163 | write_gdt_entry(dt: get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, desc: &d, type: DESCTYPE_S); |
2164 | } |
2165 | |
2166 | #ifdef CONFIG_X86_64 |
2167 | static inline void tss_setup_ist(struct tss_struct *tss) |
2168 | { |
2169 | /* Set up the per-CPU TSS IST stacks */ |
2170 | tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); |
2171 | tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); |
2172 | tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); |
2173 | tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); |
2174 | /* Only mapped when SEV-ES is active */ |
2175 | tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC); |
2176 | } |
2177 | #else /* CONFIG_X86_64 */ |
2178 | static inline void tss_setup_ist(struct tss_struct *tss) { } |
2179 | #endif /* !CONFIG_X86_64 */ |
2180 | |
2181 | static inline void tss_setup_io_bitmap(struct tss_struct *tss) |
2182 | { |
2183 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; |
2184 | |
2185 | #ifdef CONFIG_X86_IOPL_IOPERM |
2186 | tss->io_bitmap.prev_max = 0; |
2187 | tss->io_bitmap.prev_sequence = 0; |
2188 | memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); |
2189 | /* |
2190 | * Invalidate the extra array entry past the end of the all |
2191 | * permission bitmap as required by the hardware. |
2192 | */ |
2193 | tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; |
2194 | #endif |
2195 | } |
2196 | |
2197 | /* |
2198 | * Setup everything needed to handle exceptions from the IDT, including the IST |
2199 | * exceptions which use paranoid_entry(). |
2200 | */ |
2201 | void cpu_init_exception_handling(void) |
2202 | { |
2203 | struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); |
2204 | int cpu = raw_smp_processor_id(); |
2205 | |
2206 | /* paranoid_entry() gets the CPU number from the GDT */ |
2207 | setup_getcpu(cpu); |
2208 | |
2209 | /* IST vectors need TSS to be set up. */ |
2210 | tss_setup_ist(tss); |
2211 | tss_setup_io_bitmap(tss); |
2212 | set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); |
2213 | |
2214 | load_TR_desc(); |
2215 | |
2216 | /* GHCB needs to be setup to handle #VC. */ |
2217 | setup_ghcb(); |
2218 | |
2219 | /* Finally load the IDT */ |
2220 | load_current_idt(); |
2221 | } |
2222 | |
2223 | /* |
2224 | * cpu_init() initializes state that is per-CPU. Some data is already |
2225 | * initialized (naturally) in the bootstrap process, such as the GDT. We |
2226 | * reload it nevertheless, this function acts as a 'CPU state barrier', |
2227 | * nothing should get across. |
2228 | */ |
2229 | void cpu_init(void) |
2230 | { |
2231 | struct task_struct *cur = current; |
2232 | int cpu = raw_smp_processor_id(); |
2233 | |
2234 | #ifdef CONFIG_NUMA |
2235 | if (this_cpu_read(numa_node) == 0 && |
2236 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
2237 | set_numa_node(early_cpu_to_node(cpu)); |
2238 | #endif |
2239 | pr_debug("Initializing CPU#%d\n" , cpu); |
2240 | |
2241 | if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) || |
2242 | boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) |
2243 | cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
2244 | |
2245 | if (IS_ENABLED(CONFIG_X86_64)) { |
2246 | loadsegment(fs, 0); |
2247 | memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); |
2248 | syscall_init(); |
2249 | |
2250 | wrmsrl(MSR_FS_BASE, val: 0); |
2251 | wrmsrl(MSR_KERNEL_GS_BASE, val: 0); |
2252 | barrier(); |
2253 | |
2254 | x2apic_setup(); |
2255 | } |
2256 | |
2257 | mmgrab(mm: &init_mm); |
2258 | cur->active_mm = &init_mm; |
2259 | BUG_ON(cur->mm); |
2260 | initialize_tlbstate_and_flush(); |
2261 | enter_lazy_tlb(mm: &init_mm, tsk: cur); |
2262 | |
2263 | /* |
2264 | * sp0 points to the entry trampoline stack regardless of what task |
2265 | * is running. |
2266 | */ |
2267 | load_sp0(sp0: (unsigned long)(cpu_entry_stack(cpu) + 1)); |
2268 | |
2269 | load_mm_ldt(mm: &init_mm); |
2270 | |
2271 | clear_all_debug_regs(); |
2272 | dbg_restore_debug_regs(); |
2273 | |
2274 | doublefault_init_cpu_tss(); |
2275 | |
2276 | if (is_uv_system()) |
2277 | uv_cpu_init(); |
2278 | |
2279 | load_fixmap_gdt(cpu); |
2280 | } |
2281 | |
2282 | #ifdef CONFIG_MICROCODE_LATE_LOADING |
2283 | /** |
2284 | * store_cpu_caps() - Store a snapshot of CPU capabilities |
2285 | * @curr_info: Pointer where to store it |
2286 | * |
2287 | * Returns: None |
2288 | */ |
2289 | void store_cpu_caps(struct cpuinfo_x86 *curr_info) |
2290 | { |
2291 | /* Reload CPUID max function as it might've changed. */ |
2292 | curr_info->cpuid_level = cpuid_eax(op: 0); |
2293 | |
2294 | /* Copy all capability leafs and pick up the synthetic ones. */ |
2295 | memcpy(&curr_info->x86_capability, &boot_cpu_data.x86_capability, |
2296 | sizeof(curr_info->x86_capability)); |
2297 | |
2298 | /* Get the hardware CPUID leafs */ |
2299 | get_cpu_cap(c: curr_info); |
2300 | } |
2301 | |
2302 | /** |
2303 | * microcode_check() - Check if any CPU capabilities changed after an update. |
2304 | * @prev_info: CPU capabilities stored before an update. |
2305 | * |
2306 | * The microcode loader calls this upon late microcode load to recheck features, |
2307 | * only when microcode has been updated. Caller holds and CPU hotplug lock. |
2308 | * |
2309 | * Return: None |
2310 | */ |
2311 | void microcode_check(struct cpuinfo_x86 *prev_info) |
2312 | { |
2313 | struct cpuinfo_x86 curr_info; |
2314 | |
2315 | perf_check_microcode(); |
2316 | |
2317 | amd_check_microcode(); |
2318 | |
2319 | store_cpu_caps(curr_info: &curr_info); |
2320 | |
2321 | if (!memcmp(p: &prev_info->x86_capability, q: &curr_info.x86_capability, |
2322 | size: sizeof(prev_info->x86_capability))) |
2323 | return; |
2324 | |
2325 | pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n" ); |
2326 | pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n" ); |
2327 | } |
2328 | #endif |
2329 | |
2330 | /* |
2331 | * Invoked from core CPU hotplug code after hotplug operations |
2332 | */ |
2333 | void arch_smt_update(void) |
2334 | { |
2335 | /* Handle the speculative execution misfeatures */ |
2336 | cpu_bugs_smt_update(); |
2337 | /* Check whether IPI broadcasting can be enabled */ |
2338 | apic_smt_update(); |
2339 | } |
2340 | |
2341 | void __init arch_cpu_finalize_init(void) |
2342 | { |
2343 | identify_boot_cpu(); |
2344 | |
2345 | /* |
2346 | * identify_boot_cpu() initialized SMT support information, let the |
2347 | * core code know. |
2348 | */ |
2349 | cpu_smt_set_num_threads(num_threads: smp_num_siblings, max_threads: smp_num_siblings); |
2350 | |
2351 | if (!IS_ENABLED(CONFIG_SMP)) { |
2352 | pr_info("CPU: " ); |
2353 | print_cpu_info(c: &boot_cpu_data); |
2354 | } |
2355 | |
2356 | cpu_select_mitigations(); |
2357 | |
2358 | arch_smt_update(); |
2359 | |
2360 | if (IS_ENABLED(CONFIG_X86_32)) { |
2361 | /* |
2362 | * Check whether this is a real i386 which is not longer |
2363 | * supported and fixup the utsname. |
2364 | */ |
2365 | if (boot_cpu_data.x86 < 4) |
2366 | panic(fmt: "Kernel requires i486+ for 'invlpg' and other features" ); |
2367 | |
2368 | init_utsname()->machine[1] = |
2369 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
2370 | } |
2371 | |
2372 | /* |
2373 | * Must be before alternatives because it might set or clear |
2374 | * feature bits. |
2375 | */ |
2376 | fpu__init_system(); |
2377 | fpu__init_cpu(); |
2378 | |
2379 | alternative_instructions(); |
2380 | |
2381 | if (IS_ENABLED(CONFIG_X86_64)) { |
2382 | /* |
2383 | * Make sure the first 2MB area is not mapped by huge pages |
2384 | * There are typically fixed size MTRRs in there and overlapping |
2385 | * MTRRs into large pages causes slow downs. |
2386 | * |
2387 | * Right now we don't do that with gbpages because there seems |
2388 | * very little benefit for that case. |
2389 | */ |
2390 | if (!direct_gbpages) |
2391 | set_memory_4k(addr: (unsigned long)__va(0), numpages: 1); |
2392 | } else { |
2393 | fpu__init_check_bugs(); |
2394 | } |
2395 | |
2396 | /* |
2397 | * This needs to be called before any devices perform DMA |
2398 | * operations that might use the SWIOTLB bounce buffers. It will |
2399 | * mark the bounce buffers as decrypted so that their usage will |
2400 | * not cause "plain-text" data to be decrypted when accessed. It |
2401 | * must be called after late_time_init() so that Hyper-V x86/x64 |
2402 | * hypercalls work when the SWIOTLB bounce buffers are decrypted. |
2403 | */ |
2404 | mem_encrypt_init(); |
2405 | } |
2406 | |