1 | /* |
2 | * AMD 76x Memory Controller kernel module |
3 | * (C) 2003 Linux Networx (http://lnxi.com) |
4 | * This file may be distributed under the terms of the |
5 | * GNU General Public License. |
6 | * |
7 | * Written by Thayne Harbaugh |
8 | * Based on work by Dan Hollis <goemon at anime dot net> and others. |
9 | * http://www.anime.net/~goemon/linux-ecc/ |
10 | * |
11 | * $Id: edac_amd76x.c,v 1.4.2.5 2005/10/05 00:43:44 dsp_llnl Exp $ |
12 | * |
13 | */ |
14 | |
15 | #include <linux/module.h> |
16 | #include <linux/init.h> |
17 | #include <linux/pci.h> |
18 | #include <linux/pci_ids.h> |
19 | #include <linux/edac.h> |
20 | #include "edac_module.h" |
21 | |
22 | #define EDAC_MOD_STR "amd76x_edac" |
23 | |
24 | #define amd76x_printk(level, fmt, arg...) \ |
25 | edac_printk(level, "amd76x", fmt, ##arg) |
26 | |
27 | #define amd76x_mc_printk(mci, level, fmt, arg...) \ |
28 | edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg) |
29 | |
30 | #define AMD76X_NR_CSROWS 8 |
31 | #define AMD76X_NR_DIMMS 4 |
32 | |
33 | /* AMD 76x register addresses - device 0 function 0 - PCI bridge */ |
34 | |
35 | #define AMD76X_ECC_MODE_STATUS 0x48 /* Mode and status of ECC (32b) |
36 | * |
37 | * 31:16 reserved |
38 | * 15:14 SERR enabled: x1=ue 1x=ce |
39 | * 13 reserved |
40 | * 12 diag: disabled, enabled |
41 | * 11:10 mode: dis, EC, ECC, ECC+scrub |
42 | * 9:8 status: x1=ue 1x=ce |
43 | * 7:4 UE cs row |
44 | * 3:0 CE cs row |
45 | */ |
46 | |
47 | #define AMD76X_DRAM_MODE_STATUS 0x58 /* DRAM Mode and status (32b) |
48 | * |
49 | * 31:26 clock disable 5 - 0 |
50 | * 25 SDRAM init |
51 | * 24 reserved |
52 | * 23 mode register service |
53 | * 22:21 suspend to RAM |
54 | * 20 burst refresh enable |
55 | * 19 refresh disable |
56 | * 18 reserved |
57 | * 17:16 cycles-per-refresh |
58 | * 15:8 reserved |
59 | * 7:0 x4 mode enable 7 - 0 |
60 | */ |
61 | |
62 | #define AMD76X_MEM_BASE_ADDR 0xC0 /* Memory base address (8 x 32b) |
63 | * |
64 | * 31:23 chip-select base |
65 | * 22:16 reserved |
66 | * 15:7 chip-select mask |
67 | * 6:3 reserved |
68 | * 2:1 address mode |
69 | * 0 chip-select enable |
70 | */ |
71 | |
72 | struct amd76x_error_info { |
73 | u32 ecc_mode_status; |
74 | }; |
75 | |
76 | enum amd76x_chips { |
77 | AMD761 = 0, |
78 | AMD762 |
79 | }; |
80 | |
81 | struct amd76x_dev_info { |
82 | const char *ctl_name; |
83 | }; |
84 | |
85 | static const struct amd76x_dev_info amd76x_devs[] = { |
86 | [AMD761] = { |
87 | .ctl_name = "AMD761" }, |
88 | [AMD762] = { |
89 | .ctl_name = "AMD762" }, |
90 | }; |
91 | |
92 | static struct edac_pci_ctl_info *amd76x_pci; |
93 | |
94 | /** |
95 | * amd76x_get_error_info - fetch error information |
96 | * @mci: Memory controller |
97 | * @info: Info to fill in |
98 | * |
99 | * Fetch and store the AMD76x ECC status. Clear pending status |
100 | * on the chip so that further errors will be reported |
101 | */ |
102 | static void amd76x_get_error_info(struct mem_ctl_info *mci, |
103 | struct amd76x_error_info *info) |
104 | { |
105 | struct pci_dev *pdev; |
106 | |
107 | pdev = to_pci_dev(mci->pdev); |
108 | pci_read_config_dword(dev: pdev, AMD76X_ECC_MODE_STATUS, |
109 | val: &info->ecc_mode_status); |
110 | |
111 | if (info->ecc_mode_status & BIT(8)) |
112 | pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS, |
113 | value: (u32) BIT(8), mask: (u32) BIT(8)); |
114 | |
115 | if (info->ecc_mode_status & BIT(9)) |
116 | pci_write_bits32(pdev, AMD76X_ECC_MODE_STATUS, |
117 | value: (u32) BIT(9), mask: (u32) BIT(9)); |
118 | } |
119 | |
120 | /** |
121 | * amd76x_process_error_info - Error check |
122 | * @mci: Memory controller |
123 | * @info: Previously fetched information from chip |
124 | * @handle_errors: 1 if we should do recovery |
125 | * |
126 | * Process the chip state and decide if an error has occurred. |
127 | * A return of 1 indicates an error. Also if handle_errors is true |
128 | * then attempt to handle and clean up after the error |
129 | */ |
130 | static int amd76x_process_error_info(struct mem_ctl_info *mci, |
131 | struct amd76x_error_info *info, |
132 | int handle_errors) |
133 | { |
134 | int error_found; |
135 | u32 row; |
136 | |
137 | error_found = 0; |
138 | |
139 | /* |
140 | * Check for an uncorrectable error |
141 | */ |
142 | if (info->ecc_mode_status & BIT(8)) { |
143 | error_found = 1; |
144 | |
145 | if (handle_errors) { |
146 | row = (info->ecc_mode_status >> 4) & 0xf; |
147 | edac_mc_handle_error(type: HW_EVENT_ERR_UNCORRECTED, mci, error_count: 1, |
148 | page_frame_number: mci->csrows[row]->first_page, offset_in_page: 0, syndrome: 0, |
149 | top_layer: row, mid_layer: 0, low_layer: -1, |
150 | msg: mci->ctl_name, other_detail: "" ); |
151 | } |
152 | } |
153 | |
154 | /* |
155 | * Check for a correctable error |
156 | */ |
157 | if (info->ecc_mode_status & BIT(9)) { |
158 | error_found = 1; |
159 | |
160 | if (handle_errors) { |
161 | row = info->ecc_mode_status & 0xf; |
162 | edac_mc_handle_error(type: HW_EVENT_ERR_CORRECTED, mci, error_count: 1, |
163 | page_frame_number: mci->csrows[row]->first_page, offset_in_page: 0, syndrome: 0, |
164 | top_layer: row, mid_layer: 0, low_layer: -1, |
165 | msg: mci->ctl_name, other_detail: "" ); |
166 | } |
167 | } |
168 | |
169 | return error_found; |
170 | } |
171 | |
172 | /** |
173 | * amd76x_check - Poll the controller |
174 | * @mci: Memory controller |
175 | * |
176 | * Called by the poll handlers this function reads the status |
177 | * from the controller and checks for errors. |
178 | */ |
179 | static void amd76x_check(struct mem_ctl_info *mci) |
180 | { |
181 | struct amd76x_error_info info; |
182 | amd76x_get_error_info(mci, info: &info); |
183 | amd76x_process_error_info(mci, info: &info, handle_errors: 1); |
184 | } |
185 | |
186 | static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev, |
187 | enum edac_type edac_mode) |
188 | { |
189 | struct csrow_info *csrow; |
190 | struct dimm_info *dimm; |
191 | u32 mba, mba_base, mba_mask, dms; |
192 | int index; |
193 | |
194 | for (index = 0; index < mci->nr_csrows; index++) { |
195 | csrow = mci->csrows[index]; |
196 | dimm = csrow->channels[0]->dimm; |
197 | |
198 | /* find the DRAM Chip Select Base address and mask */ |
199 | pci_read_config_dword(dev: pdev, |
200 | AMD76X_MEM_BASE_ADDR + (index * 4), val: &mba); |
201 | |
202 | if (!(mba & BIT(0))) |
203 | continue; |
204 | |
205 | mba_base = mba & 0xff800000UL; |
206 | mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL; |
207 | pci_read_config_dword(dev: pdev, AMD76X_DRAM_MODE_STATUS, val: &dms); |
208 | csrow->first_page = mba_base >> PAGE_SHIFT; |
209 | dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT; |
210 | csrow->last_page = csrow->first_page + dimm->nr_pages - 1; |
211 | csrow->page_mask = mba_mask >> PAGE_SHIFT; |
212 | dimm->grain = dimm->nr_pages << PAGE_SHIFT; |
213 | dimm->mtype = MEM_RDDR; |
214 | dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN; |
215 | dimm->edac_mode = edac_mode; |
216 | } |
217 | } |
218 | |
219 | /** |
220 | * amd76x_probe1 - Perform set up for detected device |
221 | * @pdev; PCI device detected |
222 | * @dev_idx: Device type index |
223 | * |
224 | * We have found an AMD76x and now need to set up the memory |
225 | * controller status reporting. We configure and set up the |
226 | * memory controller reporting and claim the device. |
227 | */ |
228 | static int amd76x_probe1(struct pci_dev *pdev, int dev_idx) |
229 | { |
230 | static const enum edac_type ems_modes[] = { |
231 | EDAC_NONE, |
232 | EDAC_EC, |
233 | EDAC_SECDED, |
234 | EDAC_SECDED |
235 | }; |
236 | struct mem_ctl_info *mci; |
237 | struct edac_mc_layer layers[2]; |
238 | u32 ems; |
239 | u32 ems_mode; |
240 | struct amd76x_error_info discard; |
241 | |
242 | edac_dbg(0, "\n" ); |
243 | pci_read_config_dword(dev: pdev, AMD76X_ECC_MODE_STATUS, val: &ems); |
244 | ems_mode = (ems >> 10) & 0x3; |
245 | |
246 | layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; |
247 | layers[0].size = AMD76X_NR_CSROWS; |
248 | layers[0].is_virt_csrow = true; |
249 | layers[1].type = EDAC_MC_LAYER_CHANNEL; |
250 | layers[1].size = 1; |
251 | layers[1].is_virt_csrow = false; |
252 | mci = edac_mc_alloc(mc_num: 0, ARRAY_SIZE(layers), layers, sz_pvt: 0); |
253 | |
254 | if (mci == NULL) |
255 | return -ENOMEM; |
256 | |
257 | edac_dbg(0, "mci = %p\n" , mci); |
258 | mci->pdev = &pdev->dev; |
259 | mci->mtype_cap = MEM_FLAG_RDDR; |
260 | mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; |
261 | mci->edac_cap = ems_mode ? |
262 | (EDAC_FLAG_EC | EDAC_FLAG_SECDED) : EDAC_FLAG_NONE; |
263 | mci->mod_name = EDAC_MOD_STR; |
264 | mci->ctl_name = amd76x_devs[dev_idx].ctl_name; |
265 | mci->dev_name = pci_name(pdev); |
266 | mci->edac_check = amd76x_check; |
267 | mci->ctl_page_to_phys = NULL; |
268 | |
269 | amd76x_init_csrows(mci, pdev, edac_mode: ems_modes[ems_mode]); |
270 | amd76x_get_error_info(mci, info: &discard); /* clear counters */ |
271 | |
272 | /* Here we assume that we will never see multiple instances of this |
273 | * type of memory controller. The ID is therefore hardcoded to 0. |
274 | */ |
275 | if (edac_mc_add_mc(mci)) { |
276 | edac_dbg(3, "failed edac_mc_add_mc()\n" ); |
277 | goto fail; |
278 | } |
279 | |
280 | /* allocating generic PCI control info */ |
281 | amd76x_pci = edac_pci_create_generic_ctl(dev: &pdev->dev, EDAC_MOD_STR); |
282 | if (!amd76x_pci) { |
283 | printk(KERN_WARNING |
284 | "%s(): Unable to create PCI control\n" , |
285 | __func__); |
286 | printk(KERN_WARNING |
287 | "%s(): PCI error report via EDAC not setup\n" , |
288 | __func__); |
289 | } |
290 | |
291 | /* get this far and it's successful */ |
292 | edac_dbg(3, "success\n" ); |
293 | return 0; |
294 | |
295 | fail: |
296 | edac_mc_free(mci); |
297 | return -ENODEV; |
298 | } |
299 | |
300 | /* returns count (>= 0), or negative on error */ |
301 | static int amd76x_init_one(struct pci_dev *pdev, |
302 | const struct pci_device_id *ent) |
303 | { |
304 | edac_dbg(0, "\n" ); |
305 | |
306 | /* don't need to call pci_enable_device() */ |
307 | return amd76x_probe1(pdev, dev_idx: ent->driver_data); |
308 | } |
309 | |
310 | /** |
311 | * amd76x_remove_one - driver shutdown |
312 | * @pdev: PCI device being handed back |
313 | * |
314 | * Called when the driver is unloaded. Find the matching mci |
315 | * structure for the device then delete the mci and free the |
316 | * resources. |
317 | */ |
318 | static void amd76x_remove_one(struct pci_dev *pdev) |
319 | { |
320 | struct mem_ctl_info *mci; |
321 | |
322 | edac_dbg(0, "\n" ); |
323 | |
324 | if (amd76x_pci) |
325 | edac_pci_release_generic_ctl(pci: amd76x_pci); |
326 | |
327 | if ((mci = edac_mc_del_mc(dev: &pdev->dev)) == NULL) |
328 | return; |
329 | |
330 | edac_mc_free(mci); |
331 | } |
332 | |
333 | static const struct pci_device_id amd76x_pci_tbl[] = { |
334 | { |
335 | PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, |
336 | AMD762}, |
337 | { |
338 | PCI_VEND_DEV(AMD, FE_GATE_700E), PCI_ANY_ID, PCI_ANY_ID, 0, 0, |
339 | AMD761}, |
340 | { |
341 | 0, |
342 | } /* 0 terminated list. */ |
343 | }; |
344 | |
345 | MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); |
346 | |
347 | static struct pci_driver amd76x_driver = { |
348 | .name = EDAC_MOD_STR, |
349 | .probe = amd76x_init_one, |
350 | .remove = amd76x_remove_one, |
351 | .id_table = amd76x_pci_tbl, |
352 | }; |
353 | |
354 | static int __init amd76x_init(void) |
355 | { |
356 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ |
357 | opstate_init(); |
358 | |
359 | return pci_register_driver(&amd76x_driver); |
360 | } |
361 | |
362 | static void __exit amd76x_exit(void) |
363 | { |
364 | pci_unregister_driver(dev: &amd76x_driver); |
365 | } |
366 | |
367 | module_init(amd76x_init); |
368 | module_exit(amd76x_exit); |
369 | |
370 | MODULE_LICENSE("GPL" ); |
371 | MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh" ); |
372 | MODULE_DESCRIPTION("MC support for AMD 76x memory controllers" ); |
373 | |
374 | module_param(edac_op_state, int, 0444); |
375 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI" ); |
376 | |