1/* Generic MTRR (Memory Type Range Register) driver.
2
3 Copyright (C) 1997-2000 Richard Gooch
4 Copyright (c) 2002 Patrick Mochel
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with this library; if not, write to the Free
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20 Richard Gooch may be reached by email at rgooch@atnf.csiro.au
21 The postal address is:
22 Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
23
24 Source: "Pentium Pro Family Developer's Manual, Volume 3:
25 Operating System Writer's Guide" (Intel document number 242692),
26 section 11.11.7
27
28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
29 on 6-7 March 2002.
30 Source: Intel Architecture Software Developers Manual, Volume 3:
31 System Programming Guide; Section 9.11. (1997 edition - PPro).
32*/
33
34#include <linux/types.h> /* FIXME: kvm_para.h needs this */
35
36#include <linux/stop_machine.h>
37#include <linux/kvm_para.h>
38#include <linux/uaccess.h>
39#include <linux/export.h>
40#include <linux/mutex.h>
41#include <linux/init.h>
42#include <linux/sort.h>
43#include <linux/cpu.h>
44#include <linux/pci.h>
45#include <linux/smp.h>
46#include <linux/syscore_ops.h>
47#include <linux/rcupdate.h>
48
49#include <asm/cacheinfo.h>
50#include <asm/cpufeature.h>
51#include <asm/e820/api.h>
52#include <asm/mtrr.h>
53#include <asm/msr.h>
54#include <asm/memtype.h>
55
56#include "mtrr.h"
57
58/* arch_phys_wc_add returns an MTRR register index plus this offset. */
59#define MTRR_TO_PHYS_WC_OFFSET 1000
60
61u32 num_var_ranges;
62
63unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
64DEFINE_MUTEX(mtrr_mutex);
65
66const struct mtrr_ops *mtrr_if;
67
68/* Returns non-zero if we have the write-combining memory type */
69static int have_wrcomb(void)
70{
71 struct pci_dev *dev;
72
73 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
74 if (dev != NULL) {
75 /*
76 * ServerWorks LE chipsets < rev 6 have problems with
77 * write-combining. Don't allow it and leave room for other
78 * chipsets to be tagged
79 */
80 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
81 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
82 dev->revision <= 5) {
83 pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
84 pci_dev_put(dev);
85 return 0;
86 }
87 /*
88 * Intel 450NX errata # 23. Non ascending cacheline evictions to
89 * write combining memory may resulting in data corruption
90 */
91 if (dev->vendor == PCI_VENDOR_ID_INTEL &&
92 dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
93 pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
94 pci_dev_put(dev);
95 return 0;
96 }
97 pci_dev_put(dev);
98 }
99 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
100}
101
102static void __init init_table(void)
103{
104 int i, max;
105
106 max = num_var_ranges;
107 for (i = 0; i < max; i++)
108 mtrr_usage_table[i] = 1;
109}
110
111struct set_mtrr_data {
112 unsigned long smp_base;
113 unsigned long smp_size;
114 unsigned int smp_reg;
115 mtrr_type smp_type;
116};
117
118/**
119 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
120 * by all the CPUs.
121 * @info: pointer to mtrr configuration data
122 *
123 * Returns nothing.
124 */
125static int mtrr_rendezvous_handler(void *info)
126{
127 struct set_mtrr_data *data = info;
128
129 mtrr_if->set(data->smp_reg, data->smp_base,
130 data->smp_size, data->smp_type);
131 return 0;
132}
133
134static inline int types_compatible(mtrr_type type1, mtrr_type type2)
135{
136 return type1 == MTRR_TYPE_UNCACHABLE ||
137 type2 == MTRR_TYPE_UNCACHABLE ||
138 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
139 (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
140}
141
142/**
143 * set_mtrr - update mtrrs on all processors
144 * @reg: mtrr in question
145 * @base: mtrr base
146 * @size: mtrr size
147 * @type: mtrr type
148 *
149 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
150 *
151 * 1. Queue work to do the following on all processors:
152 * 2. Disable Interrupts
153 * 3. Wait for all procs to do so
154 * 4. Enter no-fill cache mode
155 * 5. Flush caches
156 * 6. Clear PGE bit
157 * 7. Flush all TLBs
158 * 8. Disable all range registers
159 * 9. Update the MTRRs
160 * 10. Enable all range registers
161 * 11. Flush all TLBs and caches again
162 * 12. Enter normal cache mode and reenable caching
163 * 13. Set PGE
164 * 14. Wait for buddies to catch up
165 * 15. Enable interrupts.
166 *
167 * What does that mean for us? Well, stop_machine() will ensure that
168 * the rendezvous handler is started on each CPU. And in lockstep they
169 * do the state transition of disabling interrupts, updating MTRR's
170 * (the CPU vendors may each do it differently, so we call mtrr_if->set()
171 * callback and let them take care of it.) and enabling interrupts.
172 *
173 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
174 * becomes nops.
175 */
176static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,
177 mtrr_type type)
178{
179 struct set_mtrr_data data = { .smp_reg = reg,
180 .smp_base = base,
181 .smp_size = size,
182 .smp_type = type
183 };
184
185 stop_machine_cpuslocked(fn: mtrr_rendezvous_handler, data: &data, cpu_online_mask);
186
187 generic_rebuild_map();
188}
189
190/**
191 * mtrr_add_page - Add a memory type region
192 * @base: Physical base address of region in pages (in units of 4 kB!)
193 * @size: Physical size of region in pages (4 kB)
194 * @type: Type of MTRR desired
195 * @increment: If this is true do usage counting on the region
196 *
197 * Memory type region registers control the caching on newer Intel and
198 * non Intel processors. This function allows drivers to request an
199 * MTRR is added. The details and hardware specifics of each processor's
200 * implementation are hidden from the caller, but nevertheless the
201 * caller should expect to need to provide a power of two size on an
202 * equivalent power of two boundary.
203 *
204 * If the region cannot be added either because all regions are in use
205 * or the CPU cannot support it a negative value is returned. On success
206 * the register number for this entry is returned, but should be treated
207 * as a cookie only.
208 *
209 * On a multiprocessor machine the changes are made to all processors.
210 * This is required on x86 by the Intel processors.
211 *
212 * The available types are
213 *
214 * %MTRR_TYPE_UNCACHABLE - No caching
215 *
216 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
217 *
218 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
219 *
220 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
221 *
222 * BUGS: Needs a quiet flag for the cases where drivers do not mind
223 * failures and do not wish system log messages to be sent.
224 */
225int mtrr_add_page(unsigned long base, unsigned long size,
226 unsigned int type, bool increment)
227{
228 unsigned long lbase, lsize;
229 int i, replace, error;
230 mtrr_type ltype;
231
232 if (!mtrr_enabled())
233 return -ENXIO;
234
235 error = mtrr_if->validate_add_page(base, size, type);
236 if (error)
237 return error;
238
239 if (type >= MTRR_NUM_TYPES) {
240 pr_warn("type: %u invalid\n", type);
241 return -EINVAL;
242 }
243
244 /* If the type is WC, check that this processor supports it */
245 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
246 pr_warn("your processor doesn't support write-combining\n");
247 return -ENOSYS;
248 }
249
250 if (!size) {
251 pr_warn("zero sized request\n");
252 return -EINVAL;
253 }
254
255 if ((base | (base + size - 1)) >>
256 (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
257 pr_warn("base or size exceeds the MTRR width\n");
258 return -EINVAL;
259 }
260
261 error = -EINVAL;
262 replace = -1;
263
264 /* No CPU hotplug when we change MTRR entries */
265 cpus_read_lock();
266
267 /* Search for existing MTRR */
268 mutex_lock(&mtrr_mutex);
269 for (i = 0; i < num_var_ranges; ++i) {
270 mtrr_if->get(i, &lbase, &lsize, &ltype);
271 if (!lsize || base > lbase + lsize - 1 ||
272 base + size - 1 < lbase)
273 continue;
274 /*
275 * At this point we know there is some kind of
276 * overlap/enclosure
277 */
278 if (base < lbase || base + size - 1 > lbase + lsize - 1) {
279 if (base <= lbase &&
280 base + size - 1 >= lbase + lsize - 1) {
281 /* New region encloses an existing region */
282 if (type == ltype) {
283 replace = replace == -1 ? i : -2;
284 continue;
285 } else if (types_compatible(type1: type, type2: ltype))
286 continue;
287 }
288 pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
289 lsize);
290 goto out;
291 }
292 /* New region is enclosed by an existing region */
293 if (ltype != type) {
294 if (types_compatible(type1: type, type2: ltype))
295 continue;
296 pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
297 base, size, mtrr_attrib_to_str(ltype),
298 mtrr_attrib_to_str(type));
299 goto out;
300 }
301 if (increment)
302 ++mtrr_usage_table[i];
303 error = i;
304 goto out;
305 }
306 /* Search for an empty MTRR */
307 i = mtrr_if->get_free_region(base, size, replace);
308 if (i >= 0) {
309 set_mtrr(reg: i, base, size, type);
310 if (likely(replace < 0)) {
311 mtrr_usage_table[i] = 1;
312 } else {
313 mtrr_usage_table[i] = mtrr_usage_table[replace];
314 if (increment)
315 mtrr_usage_table[i]++;
316 if (unlikely(replace != i)) {
317 set_mtrr(reg: replace, base: 0, size: 0, type: 0);
318 mtrr_usage_table[replace] = 0;
319 }
320 }
321 } else {
322 pr_info("no more MTRRs available\n");
323 }
324 error = i;
325 out:
326 mutex_unlock(lock: &mtrr_mutex);
327 cpus_read_unlock();
328 return error;
329}
330
331static int mtrr_check(unsigned long base, unsigned long size)
332{
333 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
334 pr_warn("size and base must be multiples of 4 kiB\n");
335 Dprintk("size: 0x%lx base: 0x%lx\n", size, base);
336 dump_stack();
337 return -1;
338 }
339 return 0;
340}
341
342/**
343 * mtrr_add - Add a memory type region
344 * @base: Physical base address of region
345 * @size: Physical size of region
346 * @type: Type of MTRR desired
347 * @increment: If this is true do usage counting on the region
348 *
349 * Memory type region registers control the caching on newer Intel and
350 * non Intel processors. This function allows drivers to request an
351 * MTRR is added. The details and hardware specifics of each processor's
352 * implementation are hidden from the caller, but nevertheless the
353 * caller should expect to need to provide a power of two size on an
354 * equivalent power of two boundary.
355 *
356 * If the region cannot be added either because all regions are in use
357 * or the CPU cannot support it a negative value is returned. On success
358 * the register number for this entry is returned, but should be treated
359 * as a cookie only.
360 *
361 * On a multiprocessor machine the changes are made to all processors.
362 * This is required on x86 by the Intel processors.
363 *
364 * The available types are
365 *
366 * %MTRR_TYPE_UNCACHABLE - No caching
367 *
368 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
369 *
370 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
371 *
372 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
373 *
374 * BUGS: Needs a quiet flag for the cases where drivers do not mind
375 * failures and do not wish system log messages to be sent.
376 */
377int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
378 bool increment)
379{
380 if (!mtrr_enabled())
381 return -ENODEV;
382 if (mtrr_check(base, size))
383 return -EINVAL;
384 return mtrr_add_page(base: base >> PAGE_SHIFT, size: size >> PAGE_SHIFT, type,
385 increment);
386}
387
388/**
389 * mtrr_del_page - delete a memory type region
390 * @reg: Register returned by mtrr_add
391 * @base: Physical base address
392 * @size: Size of region
393 *
394 * If register is supplied then base and size are ignored. This is
395 * how drivers should call it.
396 *
397 * Releases an MTRR region. If the usage count drops to zero the
398 * register is freed and the region returns to default state.
399 * On success the register is returned, on failure a negative error
400 * code.
401 */
402int mtrr_del_page(int reg, unsigned long base, unsigned long size)
403{
404 int i, max;
405 mtrr_type ltype;
406 unsigned long lbase, lsize;
407 int error = -EINVAL;
408
409 if (!mtrr_enabled())
410 return -ENODEV;
411
412 max = num_var_ranges;
413 /* No CPU hotplug when we change MTRR entries */
414 cpus_read_lock();
415 mutex_lock(&mtrr_mutex);
416 if (reg < 0) {
417 /* Search for existing MTRR */
418 for (i = 0; i < max; ++i) {
419 mtrr_if->get(i, &lbase, &lsize, &ltype);
420 if (lbase == base && lsize == size) {
421 reg = i;
422 break;
423 }
424 }
425 if (reg < 0) {
426 Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);
427 goto out;
428 }
429 }
430 if (reg >= max) {
431 pr_warn("register: %d too big\n", reg);
432 goto out;
433 }
434 mtrr_if->get(reg, &lbase, &lsize, &ltype);
435 if (lsize < 1) {
436 pr_warn("MTRR %d not used\n", reg);
437 goto out;
438 }
439 if (mtrr_usage_table[reg] < 1) {
440 pr_warn("reg: %d has count=0\n", reg);
441 goto out;
442 }
443 if (--mtrr_usage_table[reg] < 1)
444 set_mtrr(reg, base: 0, size: 0, type: 0);
445 error = reg;
446 out:
447 mutex_unlock(lock: &mtrr_mutex);
448 cpus_read_unlock();
449 return error;
450}
451
452/**
453 * mtrr_del - delete a memory type region
454 * @reg: Register returned by mtrr_add
455 * @base: Physical base address
456 * @size: Size of region
457 *
458 * If register is supplied then base and size are ignored. This is
459 * how drivers should call it.
460 *
461 * Releases an MTRR region. If the usage count drops to zero the
462 * register is freed and the region returns to default state.
463 * On success the register is returned, on failure a negative error
464 * code.
465 */
466int mtrr_del(int reg, unsigned long base, unsigned long size)
467{
468 if (!mtrr_enabled())
469 return -ENODEV;
470 if (mtrr_check(base, size))
471 return -EINVAL;
472 return mtrr_del_page(reg, base: base >> PAGE_SHIFT, size: size >> PAGE_SHIFT);
473}
474
475/**
476 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
477 * @base: Physical base address
478 * @size: Size of region
479 *
480 * If PAT is available, this does nothing. If PAT is unavailable, it
481 * attempts to add a WC MTRR covering size bytes starting at base and
482 * logs an error if this fails.
483 *
484 * The called should provide a power of two size on an equivalent
485 * power of two boundary.
486 *
487 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
488 * but drivers should not try to interpret that return value.
489 */
490int arch_phys_wc_add(unsigned long base, unsigned long size)
491{
492 int ret;
493
494 if (pat_enabled() || !mtrr_enabled())
495 return 0; /* Success! (We don't need to do anything.) */
496
497 ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, increment: true);
498 if (ret < 0) {
499 pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
500 (void *)base, (void *)(base + size - 1));
501 return ret;
502 }
503 return ret + MTRR_TO_PHYS_WC_OFFSET;
504}
505EXPORT_SYMBOL(arch_phys_wc_add);
506
507/*
508 * arch_phys_wc_del - undoes arch_phys_wc_add
509 * @handle: Return value from arch_phys_wc_add
510 *
511 * This cleans up after mtrr_add_wc_if_needed.
512 *
513 * The API guarantees that mtrr_del_wc_if_needed(error code) and
514 * mtrr_del_wc_if_needed(0) do nothing.
515 */
516void arch_phys_wc_del(int handle)
517{
518 if (handle >= 1) {
519 WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
520 mtrr_del(reg: handle - MTRR_TO_PHYS_WC_OFFSET, base: 0, size: 0);
521 }
522}
523EXPORT_SYMBOL(arch_phys_wc_del);
524
525/*
526 * arch_phys_wc_index - translates arch_phys_wc_add's return value
527 * @handle: Return value from arch_phys_wc_add
528 *
529 * This will turn the return value from arch_phys_wc_add into an mtrr
530 * index suitable for debugging.
531 *
532 * Note: There is no legitimate use for this function, except possibly
533 * in printk line. Alas there is an illegitimate use in some ancient
534 * drm ioctls.
535 */
536int arch_phys_wc_index(int handle)
537{
538 if (handle < MTRR_TO_PHYS_WC_OFFSET)
539 return -1;
540 else
541 return handle - MTRR_TO_PHYS_WC_OFFSET;
542}
543EXPORT_SYMBOL_GPL(arch_phys_wc_index);
544
545int __initdata changed_by_mtrr_cleanup;
546
547/**
548 * mtrr_bp_init - initialize MTRRs on the boot CPU
549 *
550 * This needs to be called early; before any of the other CPUs are
551 * initialized (i.e. before smp_init()).
552 */
553void __init mtrr_bp_init(void)
554{
555 bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);
556 const char *why = "(not available)";
557 unsigned long config, dummy;
558
559 phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
560
561 if (!generic_mtrrs && mtrr_state.enabled) {
562 /*
563 * Software overwrite of MTRR state, only for generic case.
564 * Note that X86_FEATURE_MTRR has been reset in this case.
565 */
566 init_table();
567 mtrr_build_map();
568 pr_info("MTRRs set to read-only\n");
569
570 return;
571 }
572
573 if (generic_mtrrs)
574 mtrr_if = &generic_mtrr_ops;
575 else
576 mtrr_set_if();
577
578 if (mtrr_enabled()) {
579 /* Get the number of variable MTRR ranges. */
580 if (mtrr_if == &generic_mtrr_ops)
581 rdmsr(MSR_MTRRcap, config, dummy);
582 else
583 config = mtrr_if->var_regs;
584 num_var_ranges = config & MTRR_CAP_VCNT;
585
586 init_table();
587 if (mtrr_if == &generic_mtrr_ops) {
588 /* BIOS may override */
589 if (get_mtrr_state()) {
590 memory_caching_control |= CACHE_MTRR;
591 changed_by_mtrr_cleanup = mtrr_cleanup();
592 mtrr_build_map();
593 } else {
594 mtrr_if = NULL;
595 why = "by BIOS";
596 }
597 }
598 }
599
600 if (!mtrr_enabled())
601 pr_info("MTRRs disabled %s\n", why);
602}
603
604/**
605 * mtrr_save_state - Save current fixed-range MTRR state of the first
606 * cpu in cpu_online_mask.
607 */
608void mtrr_save_state(void)
609{
610 int first_cpu;
611
612 if (!mtrr_enabled())
613 return;
614
615 first_cpu = cpumask_first(cpu_online_mask);
616 smp_call_function_single(cpuid: first_cpu, func: mtrr_save_fixed_ranges, NULL, wait: 1);
617}
618
619static int __init mtrr_init_finalize(void)
620{
621 /*
622 * Map might exist if mtrr_overwrite_state() has been called or if
623 * mtrr_enabled() returns true.
624 */
625 mtrr_copy_map();
626
627 if (!mtrr_enabled())
628 return 0;
629
630 if (memory_caching_control & CACHE_MTRR) {
631 if (!changed_by_mtrr_cleanup)
632 mtrr_state_warn();
633 return 0;
634 }
635
636 mtrr_register_syscore();
637
638 return 0;
639}
640subsys_initcall(mtrr_init_finalize);
641

source code of linux/arch/x86/kernel/cpu/mtrr/mtrr.c