1 | /* |
2 | * Nvidia AGPGART routines. |
3 | * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up |
4 | * to work in 2.5 by Dave Jones. |
5 | */ |
6 | |
7 | #include <linux/module.h> |
8 | #include <linux/pci.h> |
9 | #include <linux/init.h> |
10 | #include <linux/agp_backend.h> |
11 | #include <linux/page-flags.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/jiffies.h> |
14 | #include "agp.h" |
15 | |
16 | /* NVIDIA registers */ |
17 | #define NVIDIA_0_APSIZE 0x80 |
18 | #define NVIDIA_1_WBC 0xf0 |
19 | #define NVIDIA_2_GARTCTRL 0xd0 |
20 | #define NVIDIA_2_APBASE 0xd8 |
21 | #define NVIDIA_2_APLIMIT 0xdc |
22 | #define NVIDIA_2_ATTBASE(i) (0xe0 + (i) * 4) |
23 | #define NVIDIA_3_APBASE 0x50 |
24 | #define NVIDIA_3_APLIMIT 0x54 |
25 | |
26 | |
27 | static struct _nvidia_private { |
28 | struct pci_dev *dev_1; |
29 | struct pci_dev *dev_2; |
30 | struct pci_dev *dev_3; |
31 | volatile u32 __iomem *aperture; |
32 | int num_active_entries; |
33 | off_t pg_offset; |
34 | u32 wbc_mask; |
35 | } nvidia_private; |
36 | |
37 | |
38 | static int nvidia_fetch_size(void) |
39 | { |
40 | int i; |
41 | u8 size_value; |
42 | struct aper_size_info_8 *values; |
43 | |
44 | pci_read_config_byte(dev: agp_bridge->dev, NVIDIA_0_APSIZE, val: &size_value); |
45 | size_value &= 0x0f; |
46 | values = A_SIZE_8(agp_bridge->driver->aperture_sizes); |
47 | |
48 | for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) { |
49 | if (size_value == values[i].size_value) { |
50 | agp_bridge->previous_size = |
51 | agp_bridge->current_size = (void *) (values + i); |
52 | agp_bridge->aperture_size_idx = i; |
53 | return values[i].size; |
54 | } |
55 | } |
56 | |
57 | return 0; |
58 | } |
59 | |
60 | #define SYSCFG 0xC0010010 |
61 | #define IORR_BASE0 0xC0010016 |
62 | #define IORR_MASK0 0xC0010017 |
63 | #define AMD_K7_NUM_IORR 2 |
64 | |
65 | static int nvidia_init_iorr(u32 base, u32 size) |
66 | { |
67 | u32 base_hi, base_lo; |
68 | u32 mask_hi, mask_lo; |
69 | u32 sys_hi, sys_lo; |
70 | u32 iorr_addr, free_iorr_addr; |
71 | |
72 | /* Find the iorr that is already used for the base */ |
73 | /* If not found, determine the uppermost available iorr */ |
74 | free_iorr_addr = AMD_K7_NUM_IORR; |
75 | for (iorr_addr = 0; iorr_addr < AMD_K7_NUM_IORR; iorr_addr++) { |
76 | rdmsr(IORR_BASE0 + 2 * iorr_addr, base_lo, base_hi); |
77 | rdmsr(IORR_MASK0 + 2 * iorr_addr, mask_lo, mask_hi); |
78 | |
79 | if ((base_lo & 0xfffff000) == (base & 0xfffff000)) |
80 | break; |
81 | |
82 | if ((mask_lo & 0x00000800) == 0) |
83 | free_iorr_addr = iorr_addr; |
84 | } |
85 | |
86 | if (iorr_addr >= AMD_K7_NUM_IORR) { |
87 | iorr_addr = free_iorr_addr; |
88 | if (iorr_addr >= AMD_K7_NUM_IORR) |
89 | return -EINVAL; |
90 | } |
91 | base_hi = 0x0; |
92 | base_lo = (base & ~0xfff) | 0x18; |
93 | mask_hi = 0xf; |
94 | mask_lo = ((~(size - 1)) & 0xfffff000) | 0x800; |
95 | wrmsr(IORR_BASE0 + 2 * iorr_addr, base_lo, base_hi); |
96 | wrmsr(IORR_MASK0 + 2 * iorr_addr, mask_lo, mask_hi); |
97 | |
98 | rdmsr(SYSCFG, sys_lo, sys_hi); |
99 | sys_lo |= 0x00100000; |
100 | wrmsr(SYSCFG, sys_lo, sys_hi); |
101 | |
102 | return 0; |
103 | } |
104 | |
105 | static int nvidia_configure(void) |
106 | { |
107 | int i, rc, num_dirs; |
108 | u32 apbase, aplimit; |
109 | phys_addr_t apbase_phys; |
110 | struct aper_size_info_8 *current_size; |
111 | u32 temp; |
112 | |
113 | current_size = A_SIZE_8(agp_bridge->current_size); |
114 | |
115 | /* aperture size */ |
116 | pci_write_config_byte(dev: agp_bridge->dev, NVIDIA_0_APSIZE, |
117 | val: current_size->size_value); |
118 | |
119 | /* address to map to */ |
120 | apbase = pci_bus_address(pdev: agp_bridge->dev, AGP_APERTURE_BAR); |
121 | agp_bridge->gart_bus_addr = apbase; |
122 | aplimit = apbase + (current_size->size * 1024 * 1024) - 1; |
123 | pci_write_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_APBASE, val: apbase); |
124 | pci_write_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_APLIMIT, val: aplimit); |
125 | pci_write_config_dword(dev: nvidia_private.dev_3, NVIDIA_3_APBASE, val: apbase); |
126 | pci_write_config_dword(dev: nvidia_private.dev_3, NVIDIA_3_APLIMIT, val: aplimit); |
127 | if (0 != (rc = nvidia_init_iorr(base: apbase, size: current_size->size * 1024 * 1024))) |
128 | return rc; |
129 | |
130 | /* directory size is 64k */ |
131 | num_dirs = current_size->size / 64; |
132 | nvidia_private.num_active_entries = current_size->num_entries; |
133 | nvidia_private.pg_offset = 0; |
134 | if (num_dirs == 0) { |
135 | num_dirs = 1; |
136 | nvidia_private.num_active_entries /= (64 / current_size->size); |
137 | nvidia_private.pg_offset = (apbase & (64 * 1024 * 1024 - 1) & |
138 | ~(current_size->size * 1024 * 1024 - 1)) / PAGE_SIZE; |
139 | } |
140 | |
141 | /* attbase */ |
142 | for (i = 0; i < 8; i++) { |
143 | pci_write_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_ATTBASE(i), |
144 | val: (agp_bridge->gatt_bus_addr + (i % num_dirs) * 64 * 1024) | 1); |
145 | } |
146 | |
147 | /* gtlb control */ |
148 | pci_read_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_GARTCTRL, val: &temp); |
149 | pci_write_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_GARTCTRL, val: temp | 0x11); |
150 | |
151 | /* gart control */ |
152 | pci_read_config_dword(dev: agp_bridge->dev, NVIDIA_0_APSIZE, val: &temp); |
153 | pci_write_config_dword(dev: agp_bridge->dev, NVIDIA_0_APSIZE, val: temp | 0x100); |
154 | |
155 | /* map aperture */ |
156 | apbase_phys = pci_resource_start(agp_bridge->dev, AGP_APERTURE_BAR); |
157 | nvidia_private.aperture = |
158 | (volatile u32 __iomem *) ioremap(offset: apbase_phys, size: 33 * PAGE_SIZE); |
159 | |
160 | if (!nvidia_private.aperture) |
161 | return -ENOMEM; |
162 | |
163 | return 0; |
164 | } |
165 | |
166 | static void nvidia_cleanup(void) |
167 | { |
168 | struct aper_size_info_8 *previous_size; |
169 | u32 temp; |
170 | |
171 | /* gart control */ |
172 | pci_read_config_dword(dev: agp_bridge->dev, NVIDIA_0_APSIZE, val: &temp); |
173 | pci_write_config_dword(dev: agp_bridge->dev, NVIDIA_0_APSIZE, val: temp & ~(0x100)); |
174 | |
175 | /* gtlb control */ |
176 | pci_read_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_GARTCTRL, val: &temp); |
177 | pci_write_config_dword(dev: nvidia_private.dev_2, NVIDIA_2_GARTCTRL, val: temp & ~(0x11)); |
178 | |
179 | /* unmap aperture */ |
180 | iounmap(addr: (void __iomem *) nvidia_private.aperture); |
181 | |
182 | /* restore previous aperture size */ |
183 | previous_size = A_SIZE_8(agp_bridge->previous_size); |
184 | pci_write_config_byte(dev: agp_bridge->dev, NVIDIA_0_APSIZE, |
185 | val: previous_size->size_value); |
186 | |
187 | /* restore iorr for previous aperture size */ |
188 | nvidia_init_iorr(base: agp_bridge->gart_bus_addr, |
189 | size: previous_size->size * 1024 * 1024); |
190 | } |
191 | |
192 | |
193 | /* |
194 | * Note we can't use the generic routines, even though they are 99% the same. |
195 | * Aperture sizes <64M still requires a full 64k GART directory, but |
196 | * only use the portion of the TLB entries that correspond to the apertures |
197 | * alignment inside the surrounding 64M block. |
198 | */ |
199 | extern int agp_memory_reserved; |
200 | |
201 | static int nvidia_insert_memory(struct agp_memory *mem, off_t pg_start, int type) |
202 | { |
203 | int i, j; |
204 | int mask_type; |
205 | |
206 | mask_type = agp_generic_type_to_mask_type(bridge: mem->bridge, type); |
207 | if (mask_type != 0 || type != mem->type) |
208 | return -EINVAL; |
209 | |
210 | if (mem->page_count == 0) |
211 | return 0; |
212 | |
213 | if ((pg_start + mem->page_count) > |
214 | (nvidia_private.num_active_entries - agp_memory_reserved/PAGE_SIZE)) |
215 | return -EINVAL; |
216 | |
217 | for (j = pg_start; j < (pg_start + mem->page_count); j++) { |
218 | if (!PGE_EMPTY(agp_bridge, readl(agp_bridge->gatt_table+nvidia_private.pg_offset+j))) |
219 | return -EBUSY; |
220 | } |
221 | |
222 | if (!mem->is_flushed) { |
223 | global_cache_flush(); |
224 | mem->is_flushed = true; |
225 | } |
226 | for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { |
227 | writel(val: agp_bridge->driver->mask_memory(agp_bridge, |
228 | page_to_phys(mem->pages[i]), mask_type), |
229 | addr: agp_bridge->gatt_table+nvidia_private.pg_offset+j); |
230 | } |
231 | |
232 | /* PCI Posting. */ |
233 | readl(addr: agp_bridge->gatt_table+nvidia_private.pg_offset+j - 1); |
234 | |
235 | agp_bridge->driver->tlb_flush(mem); |
236 | return 0; |
237 | } |
238 | |
239 | |
240 | static int nvidia_remove_memory(struct agp_memory *mem, off_t pg_start, int type) |
241 | { |
242 | int i; |
243 | |
244 | int mask_type; |
245 | |
246 | mask_type = agp_generic_type_to_mask_type(bridge: mem->bridge, type); |
247 | if (mask_type != 0 || type != mem->type) |
248 | return -EINVAL; |
249 | |
250 | if (mem->page_count == 0) |
251 | return 0; |
252 | |
253 | for (i = pg_start; i < (mem->page_count + pg_start); i++) |
254 | writel(val: agp_bridge->scratch_page, addr: agp_bridge->gatt_table+nvidia_private.pg_offset+i); |
255 | |
256 | agp_bridge->driver->tlb_flush(mem); |
257 | return 0; |
258 | } |
259 | |
260 | |
261 | static void nvidia_tlbflush(struct agp_memory *mem) |
262 | { |
263 | unsigned long end; |
264 | u32 wbc_reg; |
265 | u32 __maybe_unused temp; |
266 | int i; |
267 | |
268 | /* flush chipset */ |
269 | if (nvidia_private.wbc_mask) { |
270 | pci_read_config_dword(dev: nvidia_private.dev_1, NVIDIA_1_WBC, val: &wbc_reg); |
271 | wbc_reg |= nvidia_private.wbc_mask; |
272 | pci_write_config_dword(dev: nvidia_private.dev_1, NVIDIA_1_WBC, val: wbc_reg); |
273 | |
274 | end = jiffies + 3*HZ; |
275 | do { |
276 | pci_read_config_dword(dev: nvidia_private.dev_1, |
277 | NVIDIA_1_WBC, val: &wbc_reg); |
278 | if (time_before_eq(end, jiffies)) { |
279 | printk(KERN_ERR PFX |
280 | "TLB flush took more than 3 seconds.\n" ); |
281 | } |
282 | } while (wbc_reg & nvidia_private.wbc_mask); |
283 | } |
284 | |
285 | /* flush TLB entries */ |
286 | for (i = 0; i < 32 + 1; i++) |
287 | temp = readl(addr: nvidia_private.aperture+(i * PAGE_SIZE / sizeof(u32))); |
288 | for (i = 0; i < 32 + 1; i++) |
289 | temp = readl(addr: nvidia_private.aperture+(i * PAGE_SIZE / sizeof(u32))); |
290 | } |
291 | |
292 | |
293 | static const struct aper_size_info_8 nvidia_generic_sizes[5] = |
294 | { |
295 | {512, 131072, 7, 0}, |
296 | {256, 65536, 6, 8}, |
297 | {128, 32768, 5, 12}, |
298 | {64, 16384, 4, 14}, |
299 | /* The 32M mode still requires a 64k gatt */ |
300 | {32, 16384, 4, 15} |
301 | }; |
302 | |
303 | |
304 | static const struct gatt_mask nvidia_generic_masks[] = |
305 | { |
306 | { .mask = 1, .type = 0} |
307 | }; |
308 | |
309 | |
310 | static const struct agp_bridge_driver nvidia_driver = { |
311 | .owner = THIS_MODULE, |
312 | .aperture_sizes = nvidia_generic_sizes, |
313 | .size_type = U8_APER_SIZE, |
314 | .num_aperture_sizes = 5, |
315 | .needs_scratch_page = true, |
316 | .configure = nvidia_configure, |
317 | .fetch_size = nvidia_fetch_size, |
318 | .cleanup = nvidia_cleanup, |
319 | .tlb_flush = nvidia_tlbflush, |
320 | .mask_memory = agp_generic_mask_memory, |
321 | .masks = nvidia_generic_masks, |
322 | .agp_enable = agp_generic_enable, |
323 | .cache_flush = global_cache_flush, |
324 | .create_gatt_table = agp_generic_create_gatt_table, |
325 | .free_gatt_table = agp_generic_free_gatt_table, |
326 | .insert_memory = nvidia_insert_memory, |
327 | .remove_memory = nvidia_remove_memory, |
328 | .alloc_by_type = agp_generic_alloc_by_type, |
329 | .free_by_type = agp_generic_free_by_type, |
330 | .agp_alloc_page = agp_generic_alloc_page, |
331 | .agp_alloc_pages = agp_generic_alloc_pages, |
332 | .agp_destroy_page = agp_generic_destroy_page, |
333 | .agp_destroy_pages = agp_generic_destroy_pages, |
334 | .agp_type_to_mask_type = agp_generic_type_to_mask_type, |
335 | }; |
336 | |
337 | static int agp_nvidia_probe(struct pci_dev *pdev, |
338 | const struct pci_device_id *ent) |
339 | { |
340 | struct agp_bridge_data *bridge; |
341 | u8 cap_ptr; |
342 | |
343 | nvidia_private.dev_1 = |
344 | pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: pdev->bus), |
345 | bus: (unsigned int)pdev->bus->number, |
346 | PCI_DEVFN(0, 1)); |
347 | nvidia_private.dev_2 = |
348 | pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: pdev->bus), |
349 | bus: (unsigned int)pdev->bus->number, |
350 | PCI_DEVFN(0, 2)); |
351 | nvidia_private.dev_3 = |
352 | pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: pdev->bus), |
353 | bus: (unsigned int)pdev->bus->number, |
354 | PCI_DEVFN(30, 0)); |
355 | |
356 | if (!nvidia_private.dev_1 || !nvidia_private.dev_2 || !nvidia_private.dev_3) { |
357 | printk(KERN_INFO PFX "Detected an NVIDIA nForce/nForce2 " |
358 | "chipset, but could not find the secondary devices.\n" ); |
359 | return -ENODEV; |
360 | } |
361 | |
362 | cap_ptr = pci_find_capability(dev: pdev, PCI_CAP_ID_AGP); |
363 | if (!cap_ptr) |
364 | return -ENODEV; |
365 | |
366 | switch (pdev->device) { |
367 | case PCI_DEVICE_ID_NVIDIA_NFORCE: |
368 | printk(KERN_INFO PFX "Detected NVIDIA nForce chipset\n" ); |
369 | nvidia_private.wbc_mask = 0x00010000; |
370 | break; |
371 | case PCI_DEVICE_ID_NVIDIA_NFORCE2: |
372 | printk(KERN_INFO PFX "Detected NVIDIA nForce2 chipset\n" ); |
373 | nvidia_private.wbc_mask = 0x80000000; |
374 | break; |
375 | default: |
376 | printk(KERN_ERR PFX "Unsupported NVIDIA chipset (device id: %04x)\n" , |
377 | pdev->device); |
378 | return -ENODEV; |
379 | } |
380 | |
381 | bridge = agp_alloc_bridge(); |
382 | if (!bridge) |
383 | return -ENOMEM; |
384 | |
385 | bridge->driver = &nvidia_driver; |
386 | bridge->dev_private_data = &nvidia_private; |
387 | bridge->dev = pdev; |
388 | bridge->capndx = cap_ptr; |
389 | |
390 | /* Fill in the mode register */ |
391 | pci_read_config_dword(dev: pdev, |
392 | where: bridge->capndx+PCI_AGP_STATUS, |
393 | val: &bridge->mode); |
394 | |
395 | pci_set_drvdata(pdev, data: bridge); |
396 | return agp_add_bridge(bridge); |
397 | } |
398 | |
399 | static void agp_nvidia_remove(struct pci_dev *pdev) |
400 | { |
401 | struct agp_bridge_data *bridge = pci_get_drvdata(pdev); |
402 | |
403 | agp_remove_bridge(bridge); |
404 | agp_put_bridge(bridge); |
405 | } |
406 | |
407 | static int agp_nvidia_resume(struct device *dev) |
408 | { |
409 | /* reconfigure AGP hardware again */ |
410 | nvidia_configure(); |
411 | |
412 | return 0; |
413 | } |
414 | |
415 | static const struct pci_device_id agp_nvidia_pci_table[] = { |
416 | { |
417 | .class = (PCI_CLASS_BRIDGE_HOST << 8), |
418 | .class_mask = ~0, |
419 | .vendor = PCI_VENDOR_ID_NVIDIA, |
420 | .device = PCI_DEVICE_ID_NVIDIA_NFORCE, |
421 | .subvendor = PCI_ANY_ID, |
422 | .subdevice = PCI_ANY_ID, |
423 | }, |
424 | { |
425 | .class = (PCI_CLASS_BRIDGE_HOST << 8), |
426 | .class_mask = ~0, |
427 | .vendor = PCI_VENDOR_ID_NVIDIA, |
428 | .device = PCI_DEVICE_ID_NVIDIA_NFORCE2, |
429 | .subvendor = PCI_ANY_ID, |
430 | .subdevice = PCI_ANY_ID, |
431 | }, |
432 | { } |
433 | }; |
434 | |
435 | MODULE_DEVICE_TABLE(pci, agp_nvidia_pci_table); |
436 | |
437 | static DEFINE_SIMPLE_DEV_PM_OPS(agp_nvidia_pm_ops, NULL, agp_nvidia_resume); |
438 | |
439 | static struct pci_driver agp_nvidia_pci_driver = { |
440 | .name = "agpgart-nvidia" , |
441 | .id_table = agp_nvidia_pci_table, |
442 | .probe = agp_nvidia_probe, |
443 | .remove = agp_nvidia_remove, |
444 | .driver.pm = &agp_nvidia_pm_ops, |
445 | }; |
446 | |
447 | static int __init agp_nvidia_init(void) |
448 | { |
449 | if (agp_off) |
450 | return -EINVAL; |
451 | return pci_register_driver(&agp_nvidia_pci_driver); |
452 | } |
453 | |
454 | static void __exit agp_nvidia_cleanup(void) |
455 | { |
456 | pci_unregister_driver(dev: &agp_nvidia_pci_driver); |
457 | pci_dev_put(dev: nvidia_private.dev_1); |
458 | pci_dev_put(dev: nvidia_private.dev_2); |
459 | pci_dev_put(dev: nvidia_private.dev_3); |
460 | } |
461 | |
462 | module_init(agp_nvidia_init); |
463 | module_exit(agp_nvidia_cleanup); |
464 | |
465 | MODULE_LICENSE("GPL and additional rights" ); |
466 | MODULE_AUTHOR("NVIDIA Corporation" ); |
467 | |
468 | |