1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 */
5#include <linux/bitfield.h>
6#include <linux/bitops.h>
7#include <linux/edac.h>
8#include <linux/of_irq.h>
9#include <linux/platform_device.h>
10#include <linux/spinlock.h>
11#include "edac_module.h"
12
13/* Registers Offset */
14#define AL_MC_ECC_CFG 0x70
15#define AL_MC_ECC_CLEAR 0x7c
16#define AL_MC_ECC_ERR_COUNT 0x80
17#define AL_MC_ECC_CE_ADDR0 0x84
18#define AL_MC_ECC_CE_ADDR1 0x88
19#define AL_MC_ECC_UE_ADDR0 0xa4
20#define AL_MC_ECC_UE_ADDR1 0xa8
21#define AL_MC_ECC_CE_SYND0 0x8c
22#define AL_MC_ECC_CE_SYND1 0x90
23#define AL_MC_ECC_CE_SYND2 0x94
24#define AL_MC_ECC_UE_SYND0 0xac
25#define AL_MC_ECC_UE_SYND1 0xb0
26#define AL_MC_ECC_UE_SYND2 0xb4
27
28/* Registers Fields */
29#define AL_MC_ECC_CFG_SCRUB_DISABLED BIT(4)
30
31#define AL_MC_ECC_CLEAR_UE_COUNT BIT(3)
32#define AL_MC_ECC_CLEAR_CE_COUNT BIT(2)
33#define AL_MC_ECC_CLEAR_UE_ERR BIT(1)
34#define AL_MC_ECC_CLEAR_CE_ERR BIT(0)
35
36#define AL_MC_ECC_ERR_COUNT_UE GENMASK(31, 16)
37#define AL_MC_ECC_ERR_COUNT_CE GENMASK(15, 0)
38
39#define AL_MC_ECC_CE_ADDR0_RANK GENMASK(25, 24)
40#define AL_MC_ECC_CE_ADDR0_ROW GENMASK(17, 0)
41
42#define AL_MC_ECC_CE_ADDR1_BG GENMASK(25, 24)
43#define AL_MC_ECC_CE_ADDR1_BANK GENMASK(18, 16)
44#define AL_MC_ECC_CE_ADDR1_COLUMN GENMASK(11, 0)
45
46#define AL_MC_ECC_UE_ADDR0_RANK GENMASK(25, 24)
47#define AL_MC_ECC_UE_ADDR0_ROW GENMASK(17, 0)
48
49#define AL_MC_ECC_UE_ADDR1_BG GENMASK(25, 24)
50#define AL_MC_ECC_UE_ADDR1_BANK GENMASK(18, 16)
51#define AL_MC_ECC_UE_ADDR1_COLUMN GENMASK(11, 0)
52
53#define DRV_NAME "al_mc_edac"
54#define AL_MC_EDAC_MSG_MAX 256
55
56struct al_mc_edac {
57 void __iomem *mmio_base;
58 spinlock_t lock;
59 int irq_ce;
60 int irq_ue;
61};
62
63static void prepare_msg(char *message, size_t buffer_size,
64 enum hw_event_mc_err_type type,
65 u8 rank, u32 row, u8 bg, u8 bank, u16 column,
66 u32 syn0, u32 syn1, u32 syn2)
67{
68 snprintf(buf: message, size: buffer_size,
69 fmt: "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
70 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
71 rank, row, bg, bank, column, syn0, syn1, syn2);
72}
73
74static int handle_ce(struct mem_ctl_info *mci)
75{
76 u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
77 struct al_mc_edac *al_mc = mci->pvt_info;
78 char msg[AL_MC_EDAC_MSG_MAX];
79 u16 ce_count, column;
80 unsigned long flags;
81 u8 rank, bg, bank;
82
83 eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
84 ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
85 if (!ce_count)
86 return 0;
87
88 ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
89 ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
90 ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
91 ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
92 ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
93
94 writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
95 al_mc->mmio_base + AL_MC_ECC_CLEAR);
96
97 dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
98 ecccaddr0, ecccaddr1);
99
100 rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
101 row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
102
103 bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
104 bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
105 column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
106
107 prepare_msg(message: msg, buffer_size: sizeof(msg), type: HW_EVENT_ERR_CORRECTED,
108 rank, row, bg, bank, column,
109 syn0: ecccsyn0, syn1: ecccsyn1, syn2: ecccsyn2);
110
111 spin_lock_irqsave(&al_mc->lock, flags);
112 edac_mc_handle_error(type: HW_EVENT_ERR_CORRECTED, mci,
113 error_count: ce_count, page_frame_number: 0, offset_in_page: 0, syndrome: 0, top_layer: 0, mid_layer: 0, low_layer: -1, msg: mci->ctl_name, other_detail: msg);
114 spin_unlock_irqrestore(lock: &al_mc->lock, flags);
115
116 return ce_count;
117}
118
119static int handle_ue(struct mem_ctl_info *mci)
120{
121 u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
122 struct al_mc_edac *al_mc = mci->pvt_info;
123 char msg[AL_MC_EDAC_MSG_MAX];
124 u16 ue_count, column;
125 unsigned long flags;
126 u8 rank, bg, bank;
127
128 eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
129 ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
130 if (!ue_count)
131 return 0;
132
133 eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
134 eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
135 eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
136 eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
137 eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
138
139 writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
140 al_mc->mmio_base + AL_MC_ECC_CLEAR);
141
142 dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
143 eccuaddr0, eccuaddr1);
144
145 rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
146 row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
147
148 bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
149 bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
150 column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
151
152 prepare_msg(message: msg, buffer_size: sizeof(msg), type: HW_EVENT_ERR_UNCORRECTED,
153 rank, row, bg, bank, column,
154 syn0: eccusyn0, syn1: eccusyn1, syn2: eccusyn2);
155
156 spin_lock_irqsave(&al_mc->lock, flags);
157 edac_mc_handle_error(type: HW_EVENT_ERR_UNCORRECTED, mci,
158 error_count: ue_count, page_frame_number: 0, offset_in_page: 0, syndrome: 0, top_layer: 0, mid_layer: 0, low_layer: -1, msg: mci->ctl_name, other_detail: msg);
159 spin_unlock_irqrestore(lock: &al_mc->lock, flags);
160
161 return ue_count;
162}
163
164static void al_mc_edac_check(struct mem_ctl_info *mci)
165{
166 struct al_mc_edac *al_mc = mci->pvt_info;
167
168 if (al_mc->irq_ue <= 0)
169 handle_ue(mci);
170
171 if (al_mc->irq_ce <= 0)
172 handle_ce(mci);
173}
174
175static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
176{
177 struct platform_device *pdev = info;
178 struct mem_ctl_info *mci = platform_get_drvdata(pdev);
179
180 if (handle_ue(mci))
181 return IRQ_HANDLED;
182 return IRQ_NONE;
183}
184
185static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
186{
187 struct platform_device *pdev = info;
188 struct mem_ctl_info *mci = platform_get_drvdata(pdev);
189
190 if (handle_ce(mci))
191 return IRQ_HANDLED;
192 return IRQ_NONE;
193}
194
195static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
196{
197 u32 ecccfg0;
198
199 ecccfg0 = readl(addr: mmio_base + AL_MC_ECC_CFG);
200
201 if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
202 return SCRUB_NONE;
203 else
204 return SCRUB_HW_SRC;
205}
206
207static void devm_al_mc_edac_free(void *data)
208{
209 edac_mc_free(mci: data);
210}
211
212static void devm_al_mc_edac_del(void *data)
213{
214 edac_mc_del_mc(dev: data);
215}
216
217static int al_mc_edac_probe(struct platform_device *pdev)
218{
219 struct edac_mc_layer layers[1];
220 struct mem_ctl_info *mci;
221 struct al_mc_edac *al_mc;
222 void __iomem *mmio_base;
223 struct dimm_info *dimm;
224 int ret;
225
226 mmio_base = devm_platform_ioremap_resource(pdev, index: 0);
227 if (IS_ERR(ptr: mmio_base)) {
228 dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
229 PTR_ERR(mmio_base));
230 return PTR_ERR(ptr: mmio_base);
231 }
232
233 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
234 layers[0].size = 1;
235 layers[0].is_virt_csrow = false;
236 mci = edac_mc_alloc(mc_num: 0, ARRAY_SIZE(layers), layers,
237 sz_pvt: sizeof(struct al_mc_edac));
238 if (!mci)
239 return -ENOMEM;
240
241 ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci);
242 if (ret)
243 return ret;
244
245 platform_set_drvdata(pdev, data: mci);
246 al_mc = mci->pvt_info;
247
248 al_mc->mmio_base = mmio_base;
249
250 al_mc->irq_ue = of_irq_get_byname(dev: pdev->dev.of_node, name: "ue");
251 if (al_mc->irq_ue <= 0)
252 dev_dbg(&pdev->dev,
253 "no IRQ defined for UE - falling back to polling\n");
254
255 al_mc->irq_ce = of_irq_get_byname(dev: pdev->dev.of_node, name: "ce");
256 if (al_mc->irq_ce <= 0)
257 dev_dbg(&pdev->dev,
258 "no IRQ defined for CE - falling back to polling\n");
259
260 /*
261 * In case both interrupts (ue/ce) are to be found, use interrupt mode.
262 * In case none of the interrupt are foud, use polling mode.
263 * In case only one interrupt is found, use interrupt mode for it but
264 * keep polling mode enable for the other.
265 */
266 if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
267 edac_op_state = EDAC_OPSTATE_POLL;
268 mci->edac_check = al_mc_edac_check;
269 } else {
270 edac_op_state = EDAC_OPSTATE_INT;
271 }
272
273 spin_lock_init(&al_mc->lock);
274
275 mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
276 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
277 mci->edac_cap = EDAC_FLAG_SECDED;
278 mci->mod_name = DRV_NAME;
279 mci->ctl_name = "al_mc";
280 mci->pdev = &pdev->dev;
281 mci->scrub_mode = get_scrub_mode(mmio_base);
282
283 dimm = *mci->dimms;
284 dimm->grain = 1;
285
286 ret = edac_mc_add_mc(mci);
287 if (ret < 0) {
288 dev_err(&pdev->dev,
289 "fail to add memory controller device (%d)\n",
290 ret);
291 return ret;
292 }
293
294 ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
295 if (ret)
296 return ret;
297
298 if (al_mc->irq_ue > 0) {
299 ret = devm_request_irq(dev: &pdev->dev,
300 irq: al_mc->irq_ue,
301 handler: al_mc_edac_irq_handler_ue,
302 IRQF_SHARED,
303 devname: pdev->name,
304 dev_id: pdev);
305 if (ret != 0) {
306 dev_err(&pdev->dev,
307 "failed to request UE IRQ %d (%d)\n",
308 al_mc->irq_ue, ret);
309 return ret;
310 }
311 }
312
313 if (al_mc->irq_ce > 0) {
314 ret = devm_request_irq(dev: &pdev->dev,
315 irq: al_mc->irq_ce,
316 handler: al_mc_edac_irq_handler_ce,
317 IRQF_SHARED,
318 devname: pdev->name,
319 dev_id: pdev);
320 if (ret != 0) {
321 dev_err(&pdev->dev,
322 "failed to request CE IRQ %d (%d)\n",
323 al_mc->irq_ce, ret);
324 return ret;
325 }
326 }
327
328 return 0;
329}
330
331static const struct of_device_id al_mc_edac_of_match[] = {
332 { .compatible = "amazon,al-mc-edac", },
333 {},
334};
335
336MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
337
338static struct platform_driver al_mc_edac_driver = {
339 .probe = al_mc_edac_probe,
340 .driver = {
341 .name = DRV_NAME,
342 .of_match_table = al_mc_edac_of_match,
343 },
344};
345
346module_platform_driver(al_mc_edac_driver);
347
348MODULE_LICENSE("GPL v2");
349MODULE_AUTHOR("Talel Shenhar");
350MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");
351

source code of linux/drivers/edac/al_mc_edac.c