1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright © 2012 NetCommWireless |
4 | * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au> |
5 | * |
6 | * Test for multi-bit error recovery on a NAND page This mostly tests the |
7 | * ECC controller / driver. |
8 | * |
9 | * There are two test modes: |
10 | * |
11 | * 0 - artificially inserting bit errors until the ECC fails |
12 | * This is the default method and fairly quick. It should |
13 | * be independent of the quality of the FLASH. |
14 | * |
15 | * 1 - re-writing the same pattern repeatedly until the ECC fails. |
16 | * This method relies on the physics of NAND FLASH to eventually |
17 | * generate '0' bits if '1' has been written sufficient times. |
18 | * Depending on the NAND, the first bit errors will appear after |
19 | * 1000 or more writes and then will usually snowball, reaching the |
20 | * limits of the ECC quickly. |
21 | * |
22 | * The test stops after 10000 cycles, should your FLASH be |
23 | * exceptionally good and not generate bit errors before that. Try |
24 | * a different page in that case. |
25 | * |
26 | * Please note that neither of these tests will significantly 'use up' any |
27 | * FLASH endurance. Only a maximum of two erase operations will be performed. |
28 | */ |
29 | |
30 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
31 | |
32 | #include <linux/init.h> |
33 | #include <linux/module.h> |
34 | #include <linux/moduleparam.h> |
35 | #include <linux/mtd/mtd.h> |
36 | #include <linux/err.h> |
37 | #include <linux/mtd/rawnand.h> |
38 | #include <linux/slab.h> |
39 | #include "mtd_test.h" |
40 | |
41 | static int dev; |
42 | module_param(dev, int, S_IRUGO); |
43 | MODULE_PARM_DESC(dev, "MTD device number to use" ); |
44 | |
45 | static unsigned page_offset; |
46 | module_param(page_offset, uint, S_IRUGO); |
47 | MODULE_PARM_DESC(page_offset, "Page number relative to dev start" ); |
48 | |
49 | static unsigned seed; |
50 | module_param(seed, uint, S_IRUGO); |
51 | MODULE_PARM_DESC(seed, "Random seed" ); |
52 | |
53 | static int mode; |
54 | module_param(mode, int, S_IRUGO); |
55 | MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test" ); |
56 | |
57 | static unsigned max_overwrite = 10000; |
58 | |
59 | static loff_t offset; /* Offset of the page we're using. */ |
60 | static unsigned eraseblock; /* Eraseblock number for our page. */ |
61 | |
62 | /* We assume that the ECC can correct up to a certain number |
63 | * of biterrors per subpage. */ |
64 | static unsigned subsize; /* Size of subpages */ |
65 | static unsigned subcount; /* Number of subpages per page */ |
66 | |
67 | static struct mtd_info *mtd; /* MTD device */ |
68 | |
69 | static uint8_t *wbuffer; /* One page write / compare buffer */ |
70 | static uint8_t *rbuffer; /* One page read buffer */ |
71 | |
72 | /* 'random' bytes from known offsets */ |
73 | static uint8_t hash(unsigned offset) |
74 | { |
75 | unsigned v = offset; |
76 | unsigned char c; |
77 | v ^= 0x7f7edfd3; |
78 | v = v ^ (v >> 3); |
79 | v = v ^ (v >> 5); |
80 | v = v ^ (v >> 13); |
81 | c = v & 0xFF; |
82 | /* Reverse bits of result. */ |
83 | c = (c & 0x0F) << 4 | (c & 0xF0) >> 4; |
84 | c = (c & 0x33) << 2 | (c & 0xCC) >> 2; |
85 | c = (c & 0x55) << 1 | (c & 0xAA) >> 1; |
86 | return c; |
87 | } |
88 | |
89 | /* Writes wbuffer to page */ |
90 | static int write_page(int log) |
91 | { |
92 | if (log) |
93 | pr_info("write_page\n" ); |
94 | |
95 | return mtdtest_write(mtd, addr: offset, size: mtd->writesize, buf: wbuffer); |
96 | } |
97 | |
98 | /* Re-writes the data area while leaving the OOB alone. */ |
99 | static int rewrite_page(int log) |
100 | { |
101 | int err = 0; |
102 | struct mtd_oob_ops ops = { }; |
103 | |
104 | if (log) |
105 | pr_info("rewrite page\n" ); |
106 | |
107 | ops.mode = MTD_OPS_RAW; /* No ECC */ |
108 | ops.len = mtd->writesize; |
109 | ops.retlen = 0; |
110 | ops.ooblen = 0; |
111 | ops.oobretlen = 0; |
112 | ops.ooboffs = 0; |
113 | ops.datbuf = wbuffer; |
114 | ops.oobbuf = NULL; |
115 | |
116 | err = mtd_write_oob(mtd, to: offset, ops: &ops); |
117 | if (err || ops.retlen != mtd->writesize) { |
118 | pr_err("error: write_oob failed (%d)\n" , err); |
119 | if (!err) |
120 | err = -EIO; |
121 | } |
122 | |
123 | return err; |
124 | } |
125 | |
126 | /* Reads page into rbuffer. Returns number of corrected bit errors (>=0) |
127 | * or error (<0) */ |
128 | static int read_page(int log) |
129 | { |
130 | int err = 0; |
131 | size_t read; |
132 | struct mtd_ecc_stats oldstats; |
133 | |
134 | if (log) |
135 | pr_info("read_page\n" ); |
136 | |
137 | /* Saving last mtd stats */ |
138 | memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats)); |
139 | |
140 | err = mtd_read(mtd, from: offset, len: mtd->writesize, retlen: &read, buf: rbuffer); |
141 | if (!err || err == -EUCLEAN) |
142 | err = mtd->ecc_stats.corrected - oldstats.corrected; |
143 | |
144 | if (err < 0 || read != mtd->writesize) { |
145 | pr_err("error: read failed at %#llx\n" , (long long)offset); |
146 | if (err >= 0) |
147 | err = -EIO; |
148 | } |
149 | |
150 | return err; |
151 | } |
152 | |
153 | /* Verifies rbuffer against random sequence */ |
154 | static int verify_page(int log) |
155 | { |
156 | unsigned i, errs = 0; |
157 | |
158 | if (log) |
159 | pr_info("verify_page\n" ); |
160 | |
161 | for (i = 0; i < mtd->writesize; i++) { |
162 | if (rbuffer[i] != hash(offset: i+seed)) { |
163 | pr_err("Error: page offset %u, expected %02x, got %02x\n" , |
164 | i, hash(i+seed), rbuffer[i]); |
165 | errs++; |
166 | } |
167 | } |
168 | |
169 | if (errs) |
170 | return -EIO; |
171 | else |
172 | return 0; |
173 | } |
174 | |
175 | #define CBIT(v, n) ((v) & (1 << (n))) |
176 | #define BCLR(v, n) ((v) = (v) & ~(1 << (n))) |
177 | |
178 | /* Finds the first '1' bit in wbuffer starting at offset 'byte' |
179 | * and sets it to '0'. */ |
180 | static int insert_biterror(unsigned byte) |
181 | { |
182 | int bit; |
183 | |
184 | while (byte < mtd->writesize) { |
185 | for (bit = 7; bit >= 0; bit--) { |
186 | if (CBIT(wbuffer[byte], bit)) { |
187 | BCLR(wbuffer[byte], bit); |
188 | pr_info("Inserted biterror @ %u/%u\n" , byte, bit); |
189 | return 0; |
190 | } |
191 | } |
192 | byte++; |
193 | } |
194 | pr_err("biterror: Failed to find a '1' bit\n" ); |
195 | return -EIO; |
196 | } |
197 | |
198 | /* Writes 'random' data to page and then introduces deliberate bit |
199 | * errors into the page, while verifying each step. */ |
200 | static int incremental_errors_test(void) |
201 | { |
202 | int err = 0; |
203 | unsigned i; |
204 | unsigned errs_per_subpage = 0; |
205 | |
206 | pr_info("incremental biterrors test\n" ); |
207 | |
208 | for (i = 0; i < mtd->writesize; i++) |
209 | wbuffer[i] = hash(offset: i+seed); |
210 | |
211 | err = write_page(log: 1); |
212 | if (err) |
213 | goto exit; |
214 | |
215 | while (1) { |
216 | |
217 | err = rewrite_page(log: 1); |
218 | if (err) |
219 | goto exit; |
220 | |
221 | err = read_page(log: 1); |
222 | if (err > 0) |
223 | pr_info("Read reported %d corrected bit errors\n" , err); |
224 | if (err < 0) { |
225 | pr_err("After %d biterrors per subpage, read reported error %d\n" , |
226 | errs_per_subpage, err); |
227 | err = 0; |
228 | goto exit; |
229 | } |
230 | |
231 | err = verify_page(log: 1); |
232 | if (err) { |
233 | pr_err("ECC failure, read data is incorrect despite read success\n" ); |
234 | goto exit; |
235 | } |
236 | |
237 | pr_info("Successfully corrected %d bit errors per subpage\n" , |
238 | errs_per_subpage); |
239 | |
240 | for (i = 0; i < subcount; i++) { |
241 | err = insert_biterror(byte: i * subsize); |
242 | if (err < 0) |
243 | goto exit; |
244 | } |
245 | errs_per_subpage++; |
246 | } |
247 | |
248 | exit: |
249 | return err; |
250 | } |
251 | |
252 | |
253 | /* Writes 'random' data to page and then re-writes that same data repeatedly. |
254 | This eventually develops bit errors (bits written as '1' will slowly become |
255 | '0'), which are corrected as far as the ECC is capable of. */ |
256 | static int overwrite_test(void) |
257 | { |
258 | int err = 0; |
259 | unsigned i; |
260 | unsigned max_corrected = 0; |
261 | unsigned opno = 0; |
262 | /* We don't expect more than this many correctable bit errors per |
263 | * page. */ |
264 | #define MAXBITS 512 |
265 | static unsigned bitstats[MAXBITS]; /* bit error histogram. */ |
266 | |
267 | memset(bitstats, 0, sizeof(bitstats)); |
268 | |
269 | pr_info("overwrite biterrors test\n" ); |
270 | |
271 | for (i = 0; i < mtd->writesize; i++) |
272 | wbuffer[i] = hash(offset: i+seed); |
273 | |
274 | err = write_page(log: 1); |
275 | if (err) |
276 | goto exit; |
277 | |
278 | while (opno < max_overwrite) { |
279 | |
280 | err = write_page(log: 0); |
281 | if (err) |
282 | break; |
283 | |
284 | err = read_page(log: 0); |
285 | if (err >= 0) { |
286 | if (err >= MAXBITS) { |
287 | pr_info("Implausible number of bit errors corrected\n" ); |
288 | err = -EIO; |
289 | break; |
290 | } |
291 | bitstats[err]++; |
292 | if (err > max_corrected) { |
293 | max_corrected = err; |
294 | pr_info("Read reported %d corrected bit errors\n" , |
295 | err); |
296 | } |
297 | } else { /* err < 0 */ |
298 | pr_info("Read reported error %d\n" , err); |
299 | err = 0; |
300 | break; |
301 | } |
302 | |
303 | err = verify_page(log: 0); |
304 | if (err) { |
305 | bitstats[max_corrected] = opno; |
306 | pr_info("ECC failure, read data is incorrect despite read success\n" ); |
307 | break; |
308 | } |
309 | |
310 | err = mtdtest_relax(); |
311 | if (err) |
312 | break; |
313 | |
314 | opno++; |
315 | } |
316 | |
317 | /* At this point bitstats[0] contains the number of ops with no bit |
318 | * errors, bitstats[1] the number of ops with 1 bit error, etc. */ |
319 | pr_info("Bit error histogram (%d operations total):\n" , opno); |
320 | for (i = 0; i < max_corrected; i++) |
321 | pr_info("Page reads with %3d corrected bit errors: %d\n" , |
322 | i, bitstats[i]); |
323 | |
324 | exit: |
325 | return err; |
326 | } |
327 | |
328 | static int __init mtd_nandbiterrs_init(void) |
329 | { |
330 | int err = 0; |
331 | |
332 | printk("\n" ); |
333 | printk(KERN_INFO "==================================================\n" ); |
334 | pr_info("MTD device: %d\n" , dev); |
335 | |
336 | mtd = get_mtd_device(NULL, num: dev); |
337 | if (IS_ERR(ptr: mtd)) { |
338 | err = PTR_ERR(ptr: mtd); |
339 | pr_err("error: cannot get MTD device\n" ); |
340 | goto exit_mtddev; |
341 | } |
342 | |
343 | if (!mtd_type_is_nand(mtd)) { |
344 | pr_info("this test requires NAND flash\n" ); |
345 | err = -ENODEV; |
346 | goto exit_nand; |
347 | } |
348 | |
349 | pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n" , |
350 | (unsigned long long)mtd->size, mtd->erasesize, |
351 | mtd->writesize, mtd->oobsize); |
352 | |
353 | subsize = mtd->writesize >> mtd->subpage_sft; |
354 | subcount = mtd->writesize / subsize; |
355 | |
356 | pr_info("Device uses %d subpages of %d bytes\n" , subcount, subsize); |
357 | |
358 | offset = (loff_t)page_offset * mtd->writesize; |
359 | eraseblock = mtd_div_by_eb(sz: offset, mtd); |
360 | |
361 | pr_info("Using page=%u, offset=%llu, eraseblock=%u\n" , |
362 | page_offset, offset, eraseblock); |
363 | |
364 | wbuffer = kmalloc(size: mtd->writesize, GFP_KERNEL); |
365 | if (!wbuffer) { |
366 | err = -ENOMEM; |
367 | goto exit_wbuffer; |
368 | } |
369 | |
370 | rbuffer = kmalloc(size: mtd->writesize, GFP_KERNEL); |
371 | if (!rbuffer) { |
372 | err = -ENOMEM; |
373 | goto exit_rbuffer; |
374 | } |
375 | |
376 | err = mtdtest_erase_eraseblock(mtd, ebnum: eraseblock); |
377 | if (err) |
378 | goto exit_error; |
379 | |
380 | if (mode == 0) |
381 | err = incremental_errors_test(); |
382 | else |
383 | err = overwrite_test(); |
384 | |
385 | if (err) |
386 | goto exit_error; |
387 | |
388 | /* We leave the block un-erased in case of test failure. */ |
389 | err = mtdtest_erase_eraseblock(mtd, ebnum: eraseblock); |
390 | if (err) |
391 | goto exit_error; |
392 | |
393 | err = -EIO; |
394 | pr_info("finished successfully.\n" ); |
395 | printk(KERN_INFO "==================================================\n" ); |
396 | |
397 | exit_error: |
398 | kfree(objp: rbuffer); |
399 | exit_rbuffer: |
400 | kfree(objp: wbuffer); |
401 | exit_wbuffer: |
402 | /* Nothing */ |
403 | exit_nand: |
404 | put_mtd_device(mtd); |
405 | exit_mtddev: |
406 | return err; |
407 | } |
408 | |
409 | static void __exit mtd_nandbiterrs_exit(void) |
410 | { |
411 | return; |
412 | } |
413 | |
414 | module_init(mtd_nandbiterrs_init); |
415 | module_exit(mtd_nandbiterrs_exit); |
416 | |
417 | MODULE_DESCRIPTION("NAND bit error recovery test" ); |
418 | MODULE_AUTHOR("Iwo Mergler" ); |
419 | MODULE_LICENSE("GPL" ); |
420 | |