1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/kernel.h> |
3 | #include <linux/pgtable.h> |
4 | |
5 | #include <linux/string.h> |
6 | #include <linux/bitops.h> |
7 | #include <linux/smp.h> |
8 | #include <linux/sched.h> |
9 | #include <linux/sched/clock.h> |
10 | #include <linux/semaphore.h> |
11 | #include <linux/thread_info.h> |
12 | #include <linux/init.h> |
13 | #include <linux/uaccess.h> |
14 | #include <linux/workqueue.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/cpuhotplug.h> |
17 | |
18 | #include <asm/cpufeature.h> |
19 | #include <asm/msr.h> |
20 | #include <asm/bugs.h> |
21 | #include <asm/cpu.h> |
22 | #include <asm/intel-family.h> |
23 | #include <asm/microcode.h> |
24 | #include <asm/hwcap2.h> |
25 | #include <asm/elf.h> |
26 | #include <asm/cpu_device_id.h> |
27 | #include <asm/cmdline.h> |
28 | #include <asm/traps.h> |
29 | #include <asm/resctrl.h> |
30 | #include <asm/numa.h> |
31 | #include <asm/thermal.h> |
32 | |
33 | #ifdef CONFIG_X86_64 |
34 | #include <linux/topology.h> |
35 | #endif |
36 | |
37 | #include "cpu.h" |
38 | |
39 | #ifdef CONFIG_X86_LOCAL_APIC |
40 | #include <asm/mpspec.h> |
41 | #include <asm/apic.h> |
42 | #endif |
43 | |
44 | enum split_lock_detect_state { |
45 | sld_off = 0, |
46 | sld_warn, |
47 | sld_fatal, |
48 | sld_ratelimit, |
49 | }; |
50 | |
51 | /* |
52 | * Default to sld_off because most systems do not support split lock detection. |
53 | * sld_state_setup() will switch this to sld_warn on systems that support |
54 | * split lock/bus lock detect, unless there is a command line override. |
55 | */ |
56 | static enum split_lock_detect_state sld_state __ro_after_init = sld_off; |
57 | static u64 msr_test_ctrl_cache __ro_after_init; |
58 | |
59 | /* |
60 | * With a name like MSR_TEST_CTL it should go without saying, but don't touch |
61 | * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it |
62 | * on CPUs that do not support SLD can cause fireworks, even when writing '0'. |
63 | */ |
64 | static bool cpu_model_supports_sld __ro_after_init; |
65 | |
66 | /* |
67 | * Processors which have self-snooping capability can handle conflicting |
68 | * memory type across CPUs by snooping its own cache. However, there exists |
69 | * CPU models in which having conflicting memory types still leads to |
70 | * unpredictable behavior, machine check errors, or hangs. Clear this |
71 | * feature to prevent its use on machines with known erratas. |
72 | */ |
73 | static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) |
74 | { |
75 | switch (c->x86_model) { |
76 | case INTEL_FAM6_CORE_YONAH: |
77 | case INTEL_FAM6_CORE2_MEROM: |
78 | case INTEL_FAM6_CORE2_MEROM_L: |
79 | case INTEL_FAM6_CORE2_PENRYN: |
80 | case INTEL_FAM6_CORE2_DUNNINGTON: |
81 | case INTEL_FAM6_NEHALEM: |
82 | case INTEL_FAM6_NEHALEM_G: |
83 | case INTEL_FAM6_NEHALEM_EP: |
84 | case INTEL_FAM6_NEHALEM_EX: |
85 | case INTEL_FAM6_WESTMERE: |
86 | case INTEL_FAM6_WESTMERE_EP: |
87 | case INTEL_FAM6_SANDYBRIDGE: |
88 | setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); |
89 | } |
90 | } |
91 | |
92 | static bool ring3mwait_disabled __read_mostly; |
93 | |
94 | static int __init ring3mwait_disable(char *__unused) |
95 | { |
96 | ring3mwait_disabled = true; |
97 | return 1; |
98 | } |
99 | __setup("ring3mwait=disable" , ring3mwait_disable); |
100 | |
101 | static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) |
102 | { |
103 | /* |
104 | * Ring 3 MONITOR/MWAIT feature cannot be detected without |
105 | * cpu model and family comparison. |
106 | */ |
107 | if (c->x86 != 6) |
108 | return; |
109 | switch (c->x86_model) { |
110 | case INTEL_FAM6_XEON_PHI_KNL: |
111 | case INTEL_FAM6_XEON_PHI_KNM: |
112 | break; |
113 | default: |
114 | return; |
115 | } |
116 | |
117 | if (ring3mwait_disabled) |
118 | return; |
119 | |
120 | set_cpu_cap(c, X86_FEATURE_RING3MWAIT); |
121 | this_cpu_or(msr_misc_features_shadow, |
122 | 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT); |
123 | |
124 | if (c == &boot_cpu_data) |
125 | ELF_HWCAP2 |= HWCAP2_RING3MWAIT; |
126 | } |
127 | |
128 | /* |
129 | * Early microcode releases for the Spectre v2 mitigation were broken. |
130 | * Information taken from; |
131 | * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf |
132 | * - https://kb.vmware.com/s/article/52345 |
133 | * - Microcode revisions observed in the wild |
134 | * - Release note from 20180108 microcode release |
135 | */ |
136 | struct sku_microcode { |
137 | u8 model; |
138 | u8 stepping; |
139 | u32 microcode; |
140 | }; |
141 | static const struct sku_microcode spectre_bad_microcodes[] = { |
142 | { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 }, |
143 | { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 }, |
144 | { INTEL_FAM6_KABYLAKE, 0x09, 0x80 }, |
145 | { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 }, |
146 | { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 }, |
147 | { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, |
148 | { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, |
149 | { INTEL_FAM6_BROADWELL, 0x04, 0x28 }, |
150 | { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b }, |
151 | { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 }, |
152 | { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 }, |
153 | { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, |
154 | { INTEL_FAM6_HASWELL_L, 0x01, 0x21 }, |
155 | { INTEL_FAM6_HASWELL_G, 0x01, 0x18 }, |
156 | { INTEL_FAM6_HASWELL, 0x03, 0x23 }, |
157 | { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, |
158 | { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, |
159 | { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, |
160 | /* Observed in the wild */ |
161 | { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, |
162 | { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, |
163 | }; |
164 | |
165 | static bool bad_spectre_microcode(struct cpuinfo_x86 *c) |
166 | { |
167 | int i; |
168 | |
169 | /* |
170 | * We know that the hypervisor lie to us on the microcode version so |
171 | * we may as well hope that it is running the correct version. |
172 | */ |
173 | if (cpu_has(c, X86_FEATURE_HYPERVISOR)) |
174 | return false; |
175 | |
176 | if (c->x86 != 6) |
177 | return false; |
178 | |
179 | for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { |
180 | if (c->x86_model == spectre_bad_microcodes[i].model && |
181 | c->x86_stepping == spectre_bad_microcodes[i].stepping) |
182 | return (c->microcode <= spectre_bad_microcodes[i].microcode); |
183 | } |
184 | return false; |
185 | } |
186 | |
187 | #define MSR_IA32_TME_ACTIVATE 0x982 |
188 | |
189 | /* Helpers to access TME_ACTIVATE MSR */ |
190 | #define TME_ACTIVATE_LOCKED(x) (x & 0x1) |
191 | #define TME_ACTIVATE_ENABLED(x) (x & 0x2) |
192 | |
193 | #define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ |
194 | #define TME_ACTIVATE_POLICY_AES_XTS_128 0 |
195 | |
196 | #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ |
197 | |
198 | #define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ |
199 | #define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 |
200 | |
201 | /* Values for mktme_status (SW only construct) */ |
202 | #define MKTME_ENABLED 0 |
203 | #define MKTME_DISABLED 1 |
204 | #define MKTME_UNINITIALIZED 2 |
205 | static int mktme_status = MKTME_UNINITIALIZED; |
206 | |
207 | static void detect_tme_early(struct cpuinfo_x86 *c) |
208 | { |
209 | u64 tme_activate, tme_policy, tme_crypto_algs; |
210 | int keyid_bits = 0, nr_keyids = 0; |
211 | static u64 tme_activate_cpu0 = 0; |
212 | |
213 | rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); |
214 | |
215 | if (mktme_status != MKTME_UNINITIALIZED) { |
216 | if (tme_activate != tme_activate_cpu0) { |
217 | /* Broken BIOS? */ |
218 | pr_err_once("x86/tme: configuration is inconsistent between CPUs\n" ); |
219 | pr_err_once("x86/tme: MKTME is not usable\n" ); |
220 | mktme_status = MKTME_DISABLED; |
221 | |
222 | /* Proceed. We may need to exclude bits from x86_phys_bits. */ |
223 | } |
224 | } else { |
225 | tme_activate_cpu0 = tme_activate; |
226 | } |
227 | |
228 | if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { |
229 | pr_info_once("x86/tme: not enabled by BIOS\n" ); |
230 | mktme_status = MKTME_DISABLED; |
231 | return; |
232 | } |
233 | |
234 | if (mktme_status != MKTME_UNINITIALIZED) |
235 | goto detect_keyid_bits; |
236 | |
237 | pr_info("x86/tme: enabled by BIOS\n" ); |
238 | |
239 | tme_policy = TME_ACTIVATE_POLICY(tme_activate); |
240 | if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) |
241 | pr_warn("x86/tme: Unknown policy is active: %#llx\n" , tme_policy); |
242 | |
243 | tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); |
244 | if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { |
245 | pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n" , |
246 | tme_crypto_algs); |
247 | mktme_status = MKTME_DISABLED; |
248 | } |
249 | detect_keyid_bits: |
250 | keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); |
251 | nr_keyids = (1UL << keyid_bits) - 1; |
252 | if (nr_keyids) { |
253 | pr_info_once("x86/mktme: enabled by BIOS\n" ); |
254 | pr_info_once("x86/mktme: %d KeyIDs available\n" , nr_keyids); |
255 | } else { |
256 | pr_info_once("x86/mktme: disabled by BIOS\n" ); |
257 | } |
258 | |
259 | if (mktme_status == MKTME_UNINITIALIZED) { |
260 | /* MKTME is usable */ |
261 | mktme_status = MKTME_ENABLED; |
262 | } |
263 | |
264 | /* |
265 | * KeyID bits effectively lower the number of physical address |
266 | * bits. Update cpuinfo_x86::x86_phys_bits accordingly. |
267 | */ |
268 | c->x86_phys_bits -= keyid_bits; |
269 | } |
270 | |
271 | static void early_init_intel(struct cpuinfo_x86 *c) |
272 | { |
273 | u64 misc_enable; |
274 | |
275 | /* Unmask CPUID levels if masked: */ |
276 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { |
277 | if (msr_clear_bit(MSR_IA32_MISC_ENABLE, |
278 | MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { |
279 | c->cpuid_level = cpuid_eax(op: 0); |
280 | get_cpu_cap(c); |
281 | } |
282 | } |
283 | |
284 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
285 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
286 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
287 | |
288 | if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) |
289 | c->microcode = intel_get_microcode_revision(); |
290 | |
291 | /* Now if any of them are set, check the blacklist and clear the lot */ |
292 | if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || |
293 | cpu_has(c, X86_FEATURE_INTEL_STIBP) || |
294 | cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || |
295 | cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { |
296 | pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n" ); |
297 | setup_clear_cpu_cap(X86_FEATURE_IBRS); |
298 | setup_clear_cpu_cap(X86_FEATURE_IBPB); |
299 | setup_clear_cpu_cap(X86_FEATURE_STIBP); |
300 | setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); |
301 | setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); |
302 | setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); |
303 | setup_clear_cpu_cap(X86_FEATURE_SSBD); |
304 | setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); |
305 | } |
306 | |
307 | /* |
308 | * Atom erratum AAE44/AAF40/AAG38/AAH41: |
309 | * |
310 | * A race condition between speculative fetches and invalidating |
311 | * a large page. This is worked around in microcode, but we |
312 | * need the microcode to have already been loaded... so if it is |
313 | * not, recommend a BIOS update and disable large pages. |
314 | */ |
315 | if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && |
316 | c->microcode < 0x20e) { |
317 | pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n" ); |
318 | clear_cpu_cap(c, X86_FEATURE_PSE); |
319 | } |
320 | |
321 | #ifdef CONFIG_X86_64 |
322 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); |
323 | #else |
324 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ |
325 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
326 | c->x86_cache_alignment = 128; |
327 | #endif |
328 | |
329 | /* CPUID workaround for 0F33/0F34 CPU */ |
330 | if (c->x86 == 0xF && c->x86_model == 0x3 |
331 | && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) |
332 | c->x86_phys_bits = 36; |
333 | |
334 | /* |
335 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate |
336 | * with P/T states and does not stop in deep C-states. |
337 | * |
338 | * It is also reliable across cores and sockets. (but not across |
339 | * cabinets - we turn it off in that case explicitly.) |
340 | */ |
341 | if (c->x86_power & (1 << 8)) { |
342 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
343 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
344 | } |
345 | |
346 | /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ |
347 | if (c->x86 == 6) { |
348 | switch (c->x86_model) { |
349 | case INTEL_FAM6_ATOM_SALTWELL_MID: |
350 | case INTEL_FAM6_ATOM_SALTWELL_TABLET: |
351 | case INTEL_FAM6_ATOM_SILVERMONT_MID: |
352 | case INTEL_FAM6_ATOM_AIRMONT_NP: |
353 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); |
354 | break; |
355 | default: |
356 | break; |
357 | } |
358 | } |
359 | |
360 | /* |
361 | * There is a known erratum on Pentium III and Core Solo |
362 | * and Core Duo CPUs. |
363 | * " Page with PAT set to WC while associated MTRR is UC |
364 | * may consolidate to UC " |
365 | * Because of this erratum, it is better to stick with |
366 | * setting WC in MTRR rather than using PAT on these CPUs. |
367 | * |
368 | * Enable PAT WC only on P4, Core 2 or later CPUs. |
369 | */ |
370 | if (c->x86 == 6 && c->x86_model < 15) |
371 | clear_cpu_cap(c, X86_FEATURE_PAT); |
372 | |
373 | /* |
374 | * If fast string is not enabled in IA32_MISC_ENABLE for any reason, |
375 | * clear the fast string and enhanced fast string CPU capabilities. |
376 | */ |
377 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { |
378 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
379 | if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { |
380 | pr_info("Disabled fast string operations\n" ); |
381 | setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); |
382 | setup_clear_cpu_cap(X86_FEATURE_ERMS); |
383 | } |
384 | } |
385 | |
386 | /* |
387 | * Intel Quark Core DevMan_001.pdf section 6.4.11 |
388 | * "The operating system also is required to invalidate (i.e., flush) |
389 | * the TLB when any changes are made to any of the page table entries. |
390 | * The operating system must reload CR3 to cause the TLB to be flushed" |
391 | * |
392 | * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h |
393 | * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE |
394 | * to be modified. |
395 | */ |
396 | if (c->x86 == 5 && c->x86_model == 9) { |
397 | pr_info("Disabling PGE capability bit\n" ); |
398 | setup_clear_cpu_cap(X86_FEATURE_PGE); |
399 | } |
400 | |
401 | check_memory_type_self_snoop_errata(c); |
402 | |
403 | /* |
404 | * Adjust the number of physical bits early because it affects the |
405 | * valid bits of the MTRR mask registers. |
406 | */ |
407 | if (cpu_has(c, X86_FEATURE_TME)) |
408 | detect_tme_early(c); |
409 | } |
410 | |
411 | static void bsp_init_intel(struct cpuinfo_x86 *c) |
412 | { |
413 | resctrl_cpu_detect(c); |
414 | } |
415 | |
416 | #ifdef CONFIG_X86_32 |
417 | /* |
418 | * Early probe support logic for ppro memory erratum #50 |
419 | * |
420 | * This is called before we do cpu ident work |
421 | */ |
422 | |
423 | int ppro_with_ram_bug(void) |
424 | { |
425 | /* Uses data from early_cpu_detect now */ |
426 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
427 | boot_cpu_data.x86 == 6 && |
428 | boot_cpu_data.x86_model == 1 && |
429 | boot_cpu_data.x86_stepping < 8) { |
430 | pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n" ); |
431 | return 1; |
432 | } |
433 | return 0; |
434 | } |
435 | |
436 | static void intel_smp_check(struct cpuinfo_x86 *c) |
437 | { |
438 | /* calling is from identify_secondary_cpu() ? */ |
439 | if (!c->cpu_index) |
440 | return; |
441 | |
442 | /* |
443 | * Mask B, Pentium, but not Pentium MMX |
444 | */ |
445 | if (c->x86 == 5 && |
446 | c->x86_stepping >= 1 && c->x86_stepping <= 4 && |
447 | c->x86_model <= 3) { |
448 | /* |
449 | * Remember we have B step Pentia with bugs |
450 | */ |
451 | WARN_ONCE(1, "WARNING: SMP operation may be unreliable" |
452 | "with B stepping processors.\n" ); |
453 | } |
454 | } |
455 | |
456 | static int forcepae; |
457 | static int __init forcepae_setup(char *__unused) |
458 | { |
459 | forcepae = 1; |
460 | return 1; |
461 | } |
462 | __setup("forcepae" , forcepae_setup); |
463 | |
464 | static void intel_workarounds(struct cpuinfo_x86 *c) |
465 | { |
466 | #ifdef CONFIG_X86_F00F_BUG |
467 | /* |
468 | * All models of Pentium and Pentium with MMX technology CPUs |
469 | * have the F0 0F bug, which lets nonprivileged users lock up the |
470 | * system. Announce that the fault handler will be checking for it. |
471 | * The Quark is also family 5, but does not have the same bug. |
472 | */ |
473 | clear_cpu_bug(c, X86_BUG_F00F); |
474 | if (c->x86 == 5 && c->x86_model < 9) { |
475 | static int f00f_workaround_enabled; |
476 | |
477 | set_cpu_bug(c, X86_BUG_F00F); |
478 | if (!f00f_workaround_enabled) { |
479 | pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n" ); |
480 | f00f_workaround_enabled = 1; |
481 | } |
482 | } |
483 | #endif |
484 | |
485 | /* |
486 | * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until |
487 | * model 3 mask 3 |
488 | */ |
489 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) |
490 | clear_cpu_cap(c, X86_FEATURE_SEP); |
491 | |
492 | /* |
493 | * PAE CPUID issue: many Pentium M report no PAE but may have a |
494 | * functionally usable PAE implementation. |
495 | * Forcefully enable PAE if kernel parameter "forcepae" is present. |
496 | */ |
497 | if (forcepae) { |
498 | pr_warn("PAE forced!\n" ); |
499 | set_cpu_cap(c, X86_FEATURE_PAE); |
500 | add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); |
501 | } |
502 | |
503 | /* |
504 | * P4 Xeon erratum 037 workaround. |
505 | * Hardware prefetcher may cause stale data to be loaded into the cache. |
506 | */ |
507 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { |
508 | if (msr_set_bit(MSR_IA32_MISC_ENABLE, |
509 | MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { |
510 | pr_info("CPU: C0 stepping P4 Xeon detected.\n" ); |
511 | pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n" ); |
512 | } |
513 | } |
514 | |
515 | /* |
516 | * See if we have a good local APIC by checking for buggy Pentia, |
517 | * i.e. all B steppings and the C2 stepping of P54C when using their |
518 | * integrated APIC (see 11AP erratum in "Pentium Processor |
519 | * Specification Update"). |
520 | */ |
521 | if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && |
522 | (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) |
523 | set_cpu_bug(c, X86_BUG_11AP); |
524 | |
525 | |
526 | #ifdef CONFIG_X86_INTEL_USERCOPY |
527 | /* |
528 | * Set up the preferred alignment for movsl bulk memory moves |
529 | */ |
530 | switch (c->x86) { |
531 | case 4: /* 486: untested */ |
532 | break; |
533 | case 5: /* Old Pentia: untested */ |
534 | break; |
535 | case 6: /* PII/PIII only like movsl with 8-byte alignment */ |
536 | movsl_mask.mask = 7; |
537 | break; |
538 | case 15: /* P4 is OK down to 8-byte alignment */ |
539 | movsl_mask.mask = 7; |
540 | break; |
541 | } |
542 | #endif |
543 | |
544 | intel_smp_check(c); |
545 | } |
546 | #else |
547 | static void intel_workarounds(struct cpuinfo_x86 *c) |
548 | { |
549 | } |
550 | #endif |
551 | |
552 | static void srat_detect_node(struct cpuinfo_x86 *c) |
553 | { |
554 | #ifdef CONFIG_NUMA |
555 | unsigned node; |
556 | int cpu = smp_processor_id(); |
557 | |
558 | /* Don't do the funky fallback heuristics the AMD version employs |
559 | for now. */ |
560 | node = numa_cpu_node(cpu); |
561 | if (node == NUMA_NO_NODE || !node_online(node)) { |
562 | /* reuse the value from init_cpu_to_node() */ |
563 | node = cpu_to_node(cpu); |
564 | } |
565 | numa_set_node(cpu, node); |
566 | #endif |
567 | } |
568 | |
569 | static void init_cpuid_fault(struct cpuinfo_x86 *c) |
570 | { |
571 | u64 msr; |
572 | |
573 | if (!rdmsrl_safe(MSR_PLATFORM_INFO, p: &msr)) { |
574 | if (msr & MSR_PLATFORM_INFO_CPUID_FAULT) |
575 | set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); |
576 | } |
577 | } |
578 | |
579 | static void init_intel_misc_features(struct cpuinfo_x86 *c) |
580 | { |
581 | u64 msr; |
582 | |
583 | if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, p: &msr)) |
584 | return; |
585 | |
586 | /* Clear all MISC features */ |
587 | this_cpu_write(msr_misc_features_shadow, 0); |
588 | |
589 | /* Check features and update capabilities and shadow control bits */ |
590 | init_cpuid_fault(c); |
591 | probe_xeon_phi_r3mwait(c); |
592 | |
593 | msr = this_cpu_read(msr_misc_features_shadow); |
594 | wrmsrl(MSR_MISC_FEATURES_ENABLES, val: msr); |
595 | } |
596 | |
597 | static void split_lock_init(void); |
598 | static void bus_lock_init(void); |
599 | |
600 | static void init_intel(struct cpuinfo_x86 *c) |
601 | { |
602 | early_init_intel(c); |
603 | |
604 | intel_workarounds(c); |
605 | |
606 | init_intel_cacheinfo(c); |
607 | |
608 | if (c->cpuid_level > 9) { |
609 | unsigned eax = cpuid_eax(op: 10); |
610 | /* Check for version and the number of counters */ |
611 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) |
612 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
613 | } |
614 | |
615 | if (cpu_has(c, X86_FEATURE_XMM2)) |
616 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
617 | |
618 | if (boot_cpu_has(X86_FEATURE_DS)) { |
619 | unsigned int l1, l2; |
620 | |
621 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); |
622 | if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)) |
623 | set_cpu_cap(c, X86_FEATURE_BTS); |
624 | if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL)) |
625 | set_cpu_cap(c, X86_FEATURE_PEBS); |
626 | } |
627 | |
628 | if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && |
629 | (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) |
630 | set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); |
631 | |
632 | if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && |
633 | ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) |
634 | set_cpu_bug(c, X86_BUG_MONITOR); |
635 | |
636 | #ifdef CONFIG_X86_64 |
637 | if (c->x86 == 15) |
638 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
639 | if (c->x86 == 6) |
640 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
641 | #else |
642 | /* |
643 | * Names for the Pentium II/Celeron processors |
644 | * detectable only by also checking the cache size. |
645 | * Dixon is NOT a Celeron. |
646 | */ |
647 | if (c->x86 == 6) { |
648 | unsigned int l2 = c->x86_cache_size; |
649 | char *p = NULL; |
650 | |
651 | switch (c->x86_model) { |
652 | case 5: |
653 | if (l2 == 0) |
654 | p = "Celeron (Covington)" ; |
655 | else if (l2 == 256) |
656 | p = "Mobile Pentium II (Dixon)" ; |
657 | break; |
658 | |
659 | case 6: |
660 | if (l2 == 128) |
661 | p = "Celeron (Mendocino)" ; |
662 | else if (c->x86_stepping == 0 || c->x86_stepping == 5) |
663 | p = "Celeron-A" ; |
664 | break; |
665 | |
666 | case 8: |
667 | if (l2 == 128) |
668 | p = "Celeron (Coppermine)" ; |
669 | break; |
670 | } |
671 | |
672 | if (p) |
673 | strcpy(c->x86_model_id, p); |
674 | } |
675 | |
676 | if (c->x86 == 15) |
677 | set_cpu_cap(c, X86_FEATURE_P4); |
678 | if (c->x86 == 6) |
679 | set_cpu_cap(c, X86_FEATURE_P3); |
680 | #endif |
681 | |
682 | /* Work around errata */ |
683 | srat_detect_node(c); |
684 | |
685 | init_ia32_feat_ctl(c); |
686 | |
687 | init_intel_misc_features(c); |
688 | |
689 | split_lock_init(); |
690 | bus_lock_init(); |
691 | |
692 | intel_init_thermal(c); |
693 | } |
694 | |
695 | #ifdef CONFIG_X86_32 |
696 | static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
697 | { |
698 | /* |
699 | * Intel PIII Tualatin. This comes in two flavours. |
700 | * One has 256kb of cache, the other 512. We have no way |
701 | * to determine which, so we use a boottime override |
702 | * for the 512kb model, and assume 256 otherwise. |
703 | */ |
704 | if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) |
705 | size = 256; |
706 | |
707 | /* |
708 | * Intel Quark SoC X1000 contains a 4-way set associative |
709 | * 16K cache with a 16 byte cache line and 256 lines per tag |
710 | */ |
711 | if ((c->x86 == 5) && (c->x86_model == 9)) |
712 | size = 16; |
713 | return size; |
714 | } |
715 | #endif |
716 | |
717 | #define TLB_INST_4K 0x01 |
718 | #define TLB_INST_4M 0x02 |
719 | #define TLB_INST_2M_4M 0x03 |
720 | |
721 | #define TLB_INST_ALL 0x05 |
722 | #define TLB_INST_1G 0x06 |
723 | |
724 | #define TLB_DATA_4K 0x11 |
725 | #define TLB_DATA_4M 0x12 |
726 | #define TLB_DATA_2M_4M 0x13 |
727 | #define TLB_DATA_4K_4M 0x14 |
728 | |
729 | #define TLB_DATA_1G 0x16 |
730 | |
731 | #define TLB_DATA0_4K 0x21 |
732 | #define TLB_DATA0_4M 0x22 |
733 | #define TLB_DATA0_2M_4M 0x23 |
734 | |
735 | #define STLB_4K 0x41 |
736 | #define STLB_4K_2M 0x42 |
737 | |
738 | static const struct _tlb_table intel_tlb_table[] = { |
739 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, |
740 | { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, |
741 | { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, |
742 | { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, |
743 | { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, |
744 | { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, |
745 | { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, |
746 | { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, |
747 | { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, |
748 | { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, |
749 | { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, |
750 | { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, |
751 | { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, |
752 | { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, |
753 | { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, |
754 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, |
755 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, |
756 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, |
757 | { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, |
758 | { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, |
759 | { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, |
760 | { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, |
761 | { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, |
762 | { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, |
763 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, |
764 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, |
765 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, |
766 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, |
767 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, |
768 | { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, |
769 | { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, |
770 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, |
771 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, |
772 | { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, |
773 | { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, |
774 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, |
775 | { 0x00, 0, 0 } |
776 | }; |
777 | |
778 | static void intel_tlb_lookup(const unsigned char desc) |
779 | { |
780 | unsigned char k; |
781 | if (desc == 0) |
782 | return; |
783 | |
784 | /* look up this descriptor in the table */ |
785 | for (k = 0; intel_tlb_table[k].descriptor != desc && |
786 | intel_tlb_table[k].descriptor != 0; k++) |
787 | ; |
788 | |
789 | if (intel_tlb_table[k].tlb_type == 0) |
790 | return; |
791 | |
792 | switch (intel_tlb_table[k].tlb_type) { |
793 | case STLB_4K: |
794 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) |
795 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; |
796 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) |
797 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; |
798 | break; |
799 | case STLB_4K_2M: |
800 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) |
801 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; |
802 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) |
803 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; |
804 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) |
805 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; |
806 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) |
807 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; |
808 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) |
809 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; |
810 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) |
811 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; |
812 | break; |
813 | case TLB_INST_ALL: |
814 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) |
815 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; |
816 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) |
817 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; |
818 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) |
819 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; |
820 | break; |
821 | case TLB_INST_4K: |
822 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) |
823 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; |
824 | break; |
825 | case TLB_INST_4M: |
826 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) |
827 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; |
828 | break; |
829 | case TLB_INST_2M_4M: |
830 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) |
831 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; |
832 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) |
833 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; |
834 | break; |
835 | case TLB_DATA_4K: |
836 | case TLB_DATA0_4K: |
837 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) |
838 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; |
839 | break; |
840 | case TLB_DATA_4M: |
841 | case TLB_DATA0_4M: |
842 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) |
843 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; |
844 | break; |
845 | case TLB_DATA_2M_4M: |
846 | case TLB_DATA0_2M_4M: |
847 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) |
848 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; |
849 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) |
850 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; |
851 | break; |
852 | case TLB_DATA_4K_4M: |
853 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) |
854 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; |
855 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) |
856 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; |
857 | break; |
858 | case TLB_DATA_1G: |
859 | if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) |
860 | tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; |
861 | break; |
862 | } |
863 | } |
864 | |
865 | static void intel_detect_tlb(struct cpuinfo_x86 *c) |
866 | { |
867 | int i, j, n; |
868 | unsigned int regs[4]; |
869 | unsigned char *desc = (unsigned char *)regs; |
870 | |
871 | if (c->cpuid_level < 2) |
872 | return; |
873 | |
874 | /* Number of times to iterate */ |
875 | n = cpuid_eax(op: 2) & 0xFF; |
876 | |
877 | for (i = 0 ; i < n ; i++) { |
878 | cpuid(op: 2, eax: ®s[0], ebx: ®s[1], ecx: ®s[2], edx: ®s[3]); |
879 | |
880 | /* If bit 31 is set, this is an unknown format */ |
881 | for (j = 0 ; j < 3 ; j++) |
882 | if (regs[j] & (1 << 31)) |
883 | regs[j] = 0; |
884 | |
885 | /* Byte 0 is level count, not a descriptor */ |
886 | for (j = 1 ; j < 16 ; j++) |
887 | intel_tlb_lookup(desc: desc[j]); |
888 | } |
889 | } |
890 | |
891 | static const struct cpu_dev intel_cpu_dev = { |
892 | .c_vendor = "Intel" , |
893 | .c_ident = { "GenuineIntel" }, |
894 | #ifdef CONFIG_X86_32 |
895 | .legacy_models = { |
896 | { .family = 4, .model_names = |
897 | { |
898 | [0] = "486 DX-25/33" , |
899 | [1] = "486 DX-50" , |
900 | [2] = "486 SX" , |
901 | [3] = "486 DX/2" , |
902 | [4] = "486 SL" , |
903 | [5] = "486 SX/2" , |
904 | [7] = "486 DX/2-WB" , |
905 | [8] = "486 DX/4" , |
906 | [9] = "486 DX/4-WB" |
907 | } |
908 | }, |
909 | { .family = 5, .model_names = |
910 | { |
911 | [0] = "Pentium 60/66 A-step" , |
912 | [1] = "Pentium 60/66" , |
913 | [2] = "Pentium 75 - 200" , |
914 | [3] = "OverDrive PODP5V83" , |
915 | [4] = "Pentium MMX" , |
916 | [7] = "Mobile Pentium 75 - 200" , |
917 | [8] = "Mobile Pentium MMX" , |
918 | [9] = "Quark SoC X1000" , |
919 | } |
920 | }, |
921 | { .family = 6, .model_names = |
922 | { |
923 | [0] = "Pentium Pro A-step" , |
924 | [1] = "Pentium Pro" , |
925 | [3] = "Pentium II (Klamath)" , |
926 | [4] = "Pentium II (Deschutes)" , |
927 | [5] = "Pentium II (Deschutes)" , |
928 | [6] = "Mobile Pentium II" , |
929 | [7] = "Pentium III (Katmai)" , |
930 | [8] = "Pentium III (Coppermine)" , |
931 | [10] = "Pentium III (Cascades)" , |
932 | [11] = "Pentium III (Tualatin)" , |
933 | } |
934 | }, |
935 | { .family = 15, .model_names = |
936 | { |
937 | [0] = "Pentium 4 (Unknown)" , |
938 | [1] = "Pentium 4 (Willamette)" , |
939 | [2] = "Pentium 4 (Northwood)" , |
940 | [4] = "Pentium 4 (Foster)" , |
941 | [5] = "Pentium 4 (Foster)" , |
942 | } |
943 | }, |
944 | }, |
945 | .legacy_cache_size = intel_size_cache, |
946 | #endif |
947 | .c_detect_tlb = intel_detect_tlb, |
948 | .c_early_init = early_init_intel, |
949 | .c_bsp_init = bsp_init_intel, |
950 | .c_init = init_intel, |
951 | .c_x86_vendor = X86_VENDOR_INTEL, |
952 | }; |
953 | |
954 | cpu_dev_register(intel_cpu_dev); |
955 | |
956 | #undef pr_fmt |
957 | #define pr_fmt(fmt) "x86/split lock detection: " fmt |
958 | |
959 | static const struct { |
960 | const char *option; |
961 | enum split_lock_detect_state state; |
962 | } sld_options[] __initconst = { |
963 | { "off" , sld_off }, |
964 | { "warn" , sld_warn }, |
965 | { "fatal" , sld_fatal }, |
966 | { "ratelimit:" , sld_ratelimit }, |
967 | }; |
968 | |
969 | static struct ratelimit_state bld_ratelimit; |
970 | |
971 | static unsigned int sysctl_sld_mitigate = 1; |
972 | static DEFINE_SEMAPHORE(buslock_sem, 1); |
973 | |
974 | #ifdef CONFIG_PROC_SYSCTL |
975 | static struct ctl_table sld_sysctls[] = { |
976 | { |
977 | .procname = "split_lock_mitigate" , |
978 | .data = &sysctl_sld_mitigate, |
979 | .maxlen = sizeof(unsigned int), |
980 | .mode = 0644, |
981 | .proc_handler = proc_douintvec_minmax, |
982 | .extra1 = SYSCTL_ZERO, |
983 | .extra2 = SYSCTL_ONE, |
984 | }, |
985 | }; |
986 | |
987 | static int __init sld_mitigate_sysctl_init(void) |
988 | { |
989 | register_sysctl_init("kernel" , sld_sysctls); |
990 | return 0; |
991 | } |
992 | |
993 | late_initcall(sld_mitigate_sysctl_init); |
994 | #endif |
995 | |
996 | static inline bool match_option(const char *arg, int arglen, const char *opt) |
997 | { |
998 | int len = strlen(opt), ratelimit; |
999 | |
1000 | if (strncmp(arg, opt, len)) |
1001 | return false; |
1002 | |
1003 | /* |
1004 | * Min ratelimit is 1 bus lock/sec. |
1005 | * Max ratelimit is 1000 bus locks/sec. |
1006 | */ |
1007 | if (sscanf(arg, "ratelimit:%d" , &ratelimit) == 1 && |
1008 | ratelimit > 0 && ratelimit <= 1000) { |
1009 | ratelimit_state_init(rs: &bld_ratelimit, HZ, burst: ratelimit); |
1010 | ratelimit_set_flags(rs: &bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); |
1011 | return true; |
1012 | } |
1013 | |
1014 | return len == arglen; |
1015 | } |
1016 | |
1017 | static bool split_lock_verify_msr(bool on) |
1018 | { |
1019 | u64 ctrl, tmp; |
1020 | |
1021 | if (rdmsrl_safe(MSR_TEST_CTRL, p: &ctrl)) |
1022 | return false; |
1023 | if (on) |
1024 | ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; |
1025 | else |
1026 | ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT; |
1027 | if (wrmsrl_safe(MSR_TEST_CTRL, val: ctrl)) |
1028 | return false; |
1029 | rdmsrl(MSR_TEST_CTRL, tmp); |
1030 | return ctrl == tmp; |
1031 | } |
1032 | |
1033 | static void __init sld_state_setup(void) |
1034 | { |
1035 | enum split_lock_detect_state state = sld_warn; |
1036 | char arg[20]; |
1037 | int i, ret; |
1038 | |
1039 | if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && |
1040 | !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) |
1041 | return; |
1042 | |
1043 | ret = cmdline_find_option(cmdline_ptr: boot_command_line, option: "split_lock_detect" , |
1044 | buffer: arg, bufsize: sizeof(arg)); |
1045 | if (ret >= 0) { |
1046 | for (i = 0; i < ARRAY_SIZE(sld_options); i++) { |
1047 | if (match_option(arg, arglen: ret, opt: sld_options[i].option)) { |
1048 | state = sld_options[i].state; |
1049 | break; |
1050 | } |
1051 | } |
1052 | } |
1053 | sld_state = state; |
1054 | } |
1055 | |
1056 | static void __init __split_lock_setup(void) |
1057 | { |
1058 | if (!split_lock_verify_msr(on: false)) { |
1059 | pr_info("MSR access failed: Disabled\n" ); |
1060 | return; |
1061 | } |
1062 | |
1063 | rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); |
1064 | |
1065 | if (!split_lock_verify_msr(on: true)) { |
1066 | pr_info("MSR access failed: Disabled\n" ); |
1067 | return; |
1068 | } |
1069 | |
1070 | /* Restore the MSR to its cached value. */ |
1071 | wrmsrl(MSR_TEST_CTRL, val: msr_test_ctrl_cache); |
1072 | |
1073 | setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); |
1074 | } |
1075 | |
1076 | /* |
1077 | * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking |
1078 | * is not implemented as one thread could undo the setting of the other |
1079 | * thread immediately after dropping the lock anyway. |
1080 | */ |
1081 | static void sld_update_msr(bool on) |
1082 | { |
1083 | u64 test_ctrl_val = msr_test_ctrl_cache; |
1084 | |
1085 | if (on) |
1086 | test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; |
1087 | |
1088 | wrmsrl(MSR_TEST_CTRL, val: test_ctrl_val); |
1089 | } |
1090 | |
1091 | static void split_lock_init(void) |
1092 | { |
1093 | /* |
1094 | * #DB for bus lock handles ratelimit and #AC for split lock is |
1095 | * disabled. |
1096 | */ |
1097 | if (sld_state == sld_ratelimit) { |
1098 | split_lock_verify_msr(on: false); |
1099 | return; |
1100 | } |
1101 | |
1102 | if (cpu_model_supports_sld) |
1103 | split_lock_verify_msr(on: sld_state != sld_off); |
1104 | } |
1105 | |
1106 | static void __split_lock_reenable_unlock(struct work_struct *work) |
1107 | { |
1108 | sld_update_msr(on: true); |
1109 | up(sem: &buslock_sem); |
1110 | } |
1111 | |
1112 | static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); |
1113 | |
1114 | static void __split_lock_reenable(struct work_struct *work) |
1115 | { |
1116 | sld_update_msr(on: true); |
1117 | } |
1118 | static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); |
1119 | |
1120 | /* |
1121 | * If a CPU goes offline with pending delayed work to re-enable split lock |
1122 | * detection then the delayed work will be executed on some other CPU. That |
1123 | * handles releasing the buslock_sem, but because it executes on a |
1124 | * different CPU probably won't re-enable split lock detection. This is a |
1125 | * problem on HT systems since the sibling CPU on the same core may then be |
1126 | * left running with split lock detection disabled. |
1127 | * |
1128 | * Unconditionally re-enable detection here. |
1129 | */ |
1130 | static int splitlock_cpu_offline(unsigned int cpu) |
1131 | { |
1132 | sld_update_msr(on: true); |
1133 | |
1134 | return 0; |
1135 | } |
1136 | |
1137 | static void split_lock_warn(unsigned long ip) |
1138 | { |
1139 | struct delayed_work *work; |
1140 | int cpu; |
1141 | |
1142 | if (!current->reported_split_lock) |
1143 | pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n" , |
1144 | current->comm, current->pid, ip); |
1145 | current->reported_split_lock = 1; |
1146 | |
1147 | if (sysctl_sld_mitigate) { |
1148 | /* |
1149 | * misery factor #1: |
1150 | * sleep 10ms before trying to execute split lock. |
1151 | */ |
1152 | if (msleep_interruptible(msecs: 10) > 0) |
1153 | return; |
1154 | /* |
1155 | * Misery factor #2: |
1156 | * only allow one buslocked disabled core at a time. |
1157 | */ |
1158 | if (down_interruptible(sem: &buslock_sem) == -EINTR) |
1159 | return; |
1160 | work = &sl_reenable_unlock; |
1161 | } else { |
1162 | work = &sl_reenable; |
1163 | } |
1164 | |
1165 | cpu = get_cpu(); |
1166 | schedule_delayed_work_on(cpu, dwork: work, delay: 2); |
1167 | |
1168 | /* Disable split lock detection on this CPU to make progress */ |
1169 | sld_update_msr(on: false); |
1170 | put_cpu(); |
1171 | } |
1172 | |
1173 | bool handle_guest_split_lock(unsigned long ip) |
1174 | { |
1175 | if (sld_state == sld_warn) { |
1176 | split_lock_warn(ip); |
1177 | return true; |
1178 | } |
1179 | |
1180 | pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n" , |
1181 | current->comm, current->pid, |
1182 | sld_state == sld_fatal ? "fatal" : "bogus" , ip); |
1183 | |
1184 | current->thread.error_code = 0; |
1185 | current->thread.trap_nr = X86_TRAP_AC; |
1186 | force_sig_fault(SIGBUS, BUS_ADRALN, NULL); |
1187 | return false; |
1188 | } |
1189 | EXPORT_SYMBOL_GPL(handle_guest_split_lock); |
1190 | |
1191 | static void bus_lock_init(void) |
1192 | { |
1193 | u64 val; |
1194 | |
1195 | if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) |
1196 | return; |
1197 | |
1198 | rdmsrl(MSR_IA32_DEBUGCTLMSR, val); |
1199 | |
1200 | if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && |
1201 | (sld_state == sld_warn || sld_state == sld_fatal)) || |
1202 | sld_state == sld_off) { |
1203 | /* |
1204 | * Warn and fatal are handled by #AC for split lock if #AC for |
1205 | * split lock is supported. |
1206 | */ |
1207 | val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; |
1208 | } else { |
1209 | val |= DEBUGCTLMSR_BUS_LOCK_DETECT; |
1210 | } |
1211 | |
1212 | wrmsrl(MSR_IA32_DEBUGCTLMSR, val); |
1213 | } |
1214 | |
1215 | bool handle_user_split_lock(struct pt_regs *regs, long error_code) |
1216 | { |
1217 | if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) |
1218 | return false; |
1219 | split_lock_warn(ip: regs->ip); |
1220 | return true; |
1221 | } |
1222 | |
1223 | void handle_bus_lock(struct pt_regs *regs) |
1224 | { |
1225 | switch (sld_state) { |
1226 | case sld_off: |
1227 | break; |
1228 | case sld_ratelimit: |
1229 | /* Enforce no more than bld_ratelimit bus locks/sec. */ |
1230 | while (!__ratelimit(&bld_ratelimit)) |
1231 | msleep(msecs: 20); |
1232 | /* Warn on the bus lock. */ |
1233 | fallthrough; |
1234 | case sld_warn: |
1235 | pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n" , |
1236 | current->comm, current->pid, regs->ip); |
1237 | break; |
1238 | case sld_fatal: |
1239 | force_sig_fault(SIGBUS, BUS_ADRALN, NULL); |
1240 | break; |
1241 | } |
1242 | } |
1243 | |
1244 | /* |
1245 | * CPU models that are known to have the per-core split-lock detection |
1246 | * feature even though they do not enumerate IA32_CORE_CAPABILITIES. |
1247 | */ |
1248 | static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { |
1249 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), |
1250 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), |
1251 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), |
1252 | {} |
1253 | }; |
1254 | |
1255 | static void __init split_lock_setup(struct cpuinfo_x86 *c) |
1256 | { |
1257 | const struct x86_cpu_id *m; |
1258 | u64 ia32_core_caps; |
1259 | |
1260 | if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) |
1261 | return; |
1262 | |
1263 | /* Check for CPUs that have support but do not enumerate it: */ |
1264 | m = x86_match_cpu(match: split_lock_cpu_ids); |
1265 | if (m) |
1266 | goto supported; |
1267 | |
1268 | if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) |
1269 | return; |
1270 | |
1271 | /* |
1272 | * Not all bits in MSR_IA32_CORE_CAPS are architectural, but |
1273 | * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set |
1274 | * it have split lock detection. |
1275 | */ |
1276 | rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); |
1277 | if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) |
1278 | goto supported; |
1279 | |
1280 | /* CPU is not in the model list and does not have the MSR bit: */ |
1281 | return; |
1282 | |
1283 | supported: |
1284 | cpu_model_supports_sld = true; |
1285 | __split_lock_setup(); |
1286 | } |
1287 | |
1288 | static void sld_state_show(void) |
1289 | { |
1290 | if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && |
1291 | !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) |
1292 | return; |
1293 | |
1294 | switch (sld_state) { |
1295 | case sld_off: |
1296 | pr_info("disabled\n" ); |
1297 | break; |
1298 | case sld_warn: |
1299 | if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { |
1300 | pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n" ); |
1301 | if (cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, |
1302 | name: "x86/splitlock" , NULL, teardown: splitlock_cpu_offline) < 0) |
1303 | pr_warn("No splitlock CPU offline handler\n" ); |
1304 | } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { |
1305 | pr_info("#DB: warning on user-space bus_locks\n" ); |
1306 | } |
1307 | break; |
1308 | case sld_fatal: |
1309 | if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { |
1310 | pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n" ); |
1311 | } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { |
1312 | pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n" , |
1313 | boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? |
1314 | " from non-WB" : "" ); |
1315 | } |
1316 | break; |
1317 | case sld_ratelimit: |
1318 | if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) |
1319 | pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n" , bld_ratelimit.burst); |
1320 | break; |
1321 | } |
1322 | } |
1323 | |
1324 | void __init sld_setup(struct cpuinfo_x86 *c) |
1325 | { |
1326 | split_lock_setup(c); |
1327 | sld_state_setup(); |
1328 | sld_state_show(); |
1329 | } |
1330 | |
1331 | #define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 |
1332 | |
1333 | /** |
1334 | * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU |
1335 | * |
1336 | * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in |
1337 | * a hybrid processor. If the processor is not hybrid, returns 0. |
1338 | */ |
1339 | u8 get_this_hybrid_cpu_type(void) |
1340 | { |
1341 | if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) |
1342 | return 0; |
1343 | |
1344 | return cpuid_eax(op: 0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; |
1345 | } |
1346 | |