1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * blockcheck.c |
4 | * |
5 | * Checksum and ECC codes for the OCFS2 userspace library. |
6 | * |
7 | * Copyright (C) 2006, 2008 Oracle. All rights reserved. |
8 | */ |
9 | |
10 | #include <linux/kernel.h> |
11 | #include <linux/types.h> |
12 | #include <linux/crc32.h> |
13 | #include <linux/buffer_head.h> |
14 | #include <linux/bitops.h> |
15 | #include <linux/debugfs.h> |
16 | #include <linux/module.h> |
17 | #include <linux/fs.h> |
18 | #include <asm/byteorder.h> |
19 | |
20 | #include <cluster/masklog.h> |
21 | |
22 | #include "ocfs2.h" |
23 | |
24 | #include "blockcheck.h" |
25 | |
26 | |
27 | /* |
28 | * We use the following conventions: |
29 | * |
30 | * d = # data bits |
31 | * p = # parity bits |
32 | * c = # total code bits (d + p) |
33 | */ |
34 | |
35 | |
36 | /* |
37 | * Calculate the bit offset in the hamming code buffer based on the bit's |
38 | * offset in the data buffer. Since the hamming code reserves all |
39 | * power-of-two bits for parity, the data bit number and the code bit |
40 | * number are offset by all the parity bits beforehand. |
41 | * |
42 | * Recall that bit numbers in hamming code are 1-based. This function |
43 | * takes the 0-based data bit from the caller. |
44 | * |
45 | * An example. Take bit 1 of the data buffer. 1 is a power of two (2^0), |
46 | * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit. |
47 | * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3 |
48 | * in the code buffer. |
49 | * |
50 | * The caller can pass in *p if it wants to keep track of the most recent |
51 | * number of parity bits added. This allows the function to start the |
52 | * calculation at the last place. |
53 | */ |
54 | static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache) |
55 | { |
56 | unsigned int b, p = 0; |
57 | |
58 | /* |
59 | * Data bits are 0-based, but we're talking code bits, which |
60 | * are 1-based. |
61 | */ |
62 | b = i + 1; |
63 | |
64 | /* Use the cache if it is there */ |
65 | if (p_cache) |
66 | p = *p_cache; |
67 | b += p; |
68 | |
69 | /* |
70 | * For every power of two below our bit number, bump our bit. |
71 | * |
72 | * We compare with (b + 1) because we have to compare with what b |
73 | * would be _if_ it were bumped up by the parity bit. Capice? |
74 | * |
75 | * p is set above. |
76 | */ |
77 | for (; (1 << p) < (b + 1); p++) |
78 | b++; |
79 | |
80 | if (p_cache) |
81 | *p_cache = p; |
82 | |
83 | return b; |
84 | } |
85 | |
86 | /* |
87 | * This is the low level encoder function. It can be called across |
88 | * multiple hunks just like the crc32 code. 'd' is the number of bits |
89 | * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had |
90 | * two 512B buffers, you would do it like so: |
91 | * |
92 | * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0); |
93 | * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8); |
94 | * |
95 | * If you just have one buffer, use ocfs2_hamming_encode_block(). |
96 | */ |
97 | u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr) |
98 | { |
99 | unsigned int i, b, p = 0; |
100 | |
101 | BUG_ON(!d); |
102 | |
103 | /* |
104 | * b is the hamming code bit number. Hamming code specifies a |
105 | * 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is |
106 | * for the algorithm. |
107 | * |
108 | * The i++ in the for loop is so that the start offset passed |
109 | * to ocfs2_find_next_bit_set() is one greater than the previously |
110 | * found bit. |
111 | */ |
112 | for (i = 0; (i = ocfs2_find_next_bit(addr: data, size: d, offset: i)) < d; i++) |
113 | { |
114 | /* |
115 | * i is the offset in this hunk, nr + i is the total bit |
116 | * offset. |
117 | */ |
118 | b = calc_code_bit(i: nr + i, p_cache: &p); |
119 | |
120 | /* |
121 | * Data bits in the resultant code are checked by |
122 | * parity bits that are part of the bit number |
123 | * representation. Huh? |
124 | * |
125 | * <wikipedia href="https://en.wikipedia.org/wiki/Hamming_code"> |
126 | * In other words, the parity bit at position 2^k |
127 | * checks bits in positions having bit k set in |
128 | * their binary representation. Conversely, for |
129 | * instance, bit 13, i.e. 1101(2), is checked by |
130 | * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1. |
131 | * </wikipedia> |
132 | * |
133 | * Note that 'k' is the _code_ bit number. 'b' in |
134 | * our loop. |
135 | */ |
136 | parity ^= b; |
137 | } |
138 | |
139 | /* While the data buffer was treated as little endian, the |
140 | * return value is in host endian. */ |
141 | return parity; |
142 | } |
143 | |
144 | u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize) |
145 | { |
146 | return ocfs2_hamming_encode(parity: 0, data, d: blocksize * 8, nr: 0); |
147 | } |
148 | |
149 | /* |
150 | * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit |
151 | * offset of the current hunk. If bit to be fixed is not part of the |
152 | * current hunk, this does nothing. |
153 | * |
154 | * If you only have one hunk, use ocfs2_hamming_fix_block(). |
155 | */ |
156 | void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr, |
157 | unsigned int fix) |
158 | { |
159 | unsigned int i, b; |
160 | |
161 | BUG_ON(!d); |
162 | |
163 | /* |
164 | * If the bit to fix has an hweight of 1, it's a parity bit. One |
165 | * busted parity bit is its own error. Nothing to do here. |
166 | */ |
167 | if (hweight32(fix) == 1) |
168 | return; |
169 | |
170 | /* |
171 | * nr + d is the bit right past the data hunk we're looking at. |
172 | * If fix after that, nothing to do |
173 | */ |
174 | if (fix >= calc_code_bit(i: nr + d, NULL)) |
175 | return; |
176 | |
177 | /* |
178 | * nr is the offset in the data hunk we're starting at. Let's |
179 | * start b at the offset in the code buffer. See hamming_encode() |
180 | * for a more detailed description of 'b'. |
181 | */ |
182 | b = calc_code_bit(i: nr, NULL); |
183 | /* If the fix is before this hunk, nothing to do */ |
184 | if (fix < b) |
185 | return; |
186 | |
187 | for (i = 0; i < d; i++, b++) |
188 | { |
189 | /* Skip past parity bits */ |
190 | while (hweight32(b) == 1) |
191 | b++; |
192 | |
193 | /* |
194 | * i is the offset in this data hunk. |
195 | * nr + i is the offset in the total data buffer. |
196 | * b is the offset in the total code buffer. |
197 | * |
198 | * Thus, when b == fix, bit i in the current hunk needs |
199 | * fixing. |
200 | */ |
201 | if (b == fix) |
202 | { |
203 | if (ocfs2_test_bit(nr: i, addr: data)) |
204 | ocfs2_clear_bit(i, data); |
205 | else |
206 | ocfs2_set_bit(i, data); |
207 | break; |
208 | } |
209 | } |
210 | } |
211 | |
212 | void ocfs2_hamming_fix_block(void *data, unsigned int blocksize, |
213 | unsigned int fix) |
214 | { |
215 | ocfs2_hamming_fix(data, d: blocksize * 8, nr: 0, fix); |
216 | } |
217 | |
218 | |
219 | /* |
220 | * Debugfs handling. |
221 | */ |
222 | |
223 | #ifdef CONFIG_DEBUG_FS |
224 | |
225 | static int blockcheck_u64_get(void *data, u64 *val) |
226 | { |
227 | *val = *(u64 *)data; |
228 | return 0; |
229 | } |
230 | DEFINE_DEBUGFS_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n" ); |
231 | |
232 | static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) |
233 | { |
234 | if (stats) { |
235 | debugfs_remove_recursive(dentry: stats->b_debug_dir); |
236 | stats->b_debug_dir = NULL; |
237 | } |
238 | } |
239 | |
240 | static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, |
241 | struct dentry *parent) |
242 | { |
243 | struct dentry *dir; |
244 | |
245 | dir = debugfs_create_dir(name: "blockcheck" , parent); |
246 | stats->b_debug_dir = dir; |
247 | |
248 | debugfs_create_file(name: "blocks_checked" , S_IFREG | S_IRUSR, parent: dir, |
249 | data: &stats->b_check_count, fops: &blockcheck_fops); |
250 | |
251 | debugfs_create_file(name: "checksums_failed" , S_IFREG | S_IRUSR, parent: dir, |
252 | data: &stats->b_failure_count, fops: &blockcheck_fops); |
253 | |
254 | debugfs_create_file(name: "ecc_recoveries" , S_IFREG | S_IRUSR, parent: dir, |
255 | data: &stats->b_recover_count, fops: &blockcheck_fops); |
256 | |
257 | } |
258 | #else |
259 | static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, |
260 | struct dentry *parent) |
261 | { |
262 | } |
263 | |
264 | static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) |
265 | { |
266 | } |
267 | #endif /* CONFIG_DEBUG_FS */ |
268 | |
269 | /* Always-called wrappers for starting and stopping the debugfs files */ |
270 | void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, |
271 | struct dentry *parent) |
272 | { |
273 | ocfs2_blockcheck_debug_install(stats, parent); |
274 | } |
275 | |
276 | void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) |
277 | { |
278 | ocfs2_blockcheck_debug_remove(stats); |
279 | } |
280 | |
281 | static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats) |
282 | { |
283 | u64 new_count; |
284 | |
285 | if (!stats) |
286 | return; |
287 | |
288 | spin_lock(lock: &stats->b_lock); |
289 | stats->b_check_count++; |
290 | new_count = stats->b_check_count; |
291 | spin_unlock(lock: &stats->b_lock); |
292 | |
293 | if (!new_count) |
294 | mlog(ML_NOTICE, "Block check count has wrapped\n" ); |
295 | } |
296 | |
297 | static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats) |
298 | { |
299 | u64 new_count; |
300 | |
301 | if (!stats) |
302 | return; |
303 | |
304 | spin_lock(lock: &stats->b_lock); |
305 | stats->b_failure_count++; |
306 | new_count = stats->b_failure_count; |
307 | spin_unlock(lock: &stats->b_lock); |
308 | |
309 | if (!new_count) |
310 | mlog(ML_NOTICE, "Checksum failure count has wrapped\n" ); |
311 | } |
312 | |
313 | static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats) |
314 | { |
315 | u64 new_count; |
316 | |
317 | if (!stats) |
318 | return; |
319 | |
320 | spin_lock(lock: &stats->b_lock); |
321 | stats->b_recover_count++; |
322 | new_count = stats->b_recover_count; |
323 | spin_unlock(lock: &stats->b_lock); |
324 | |
325 | if (!new_count) |
326 | mlog(ML_NOTICE, "ECC recovery count has wrapped\n" ); |
327 | } |
328 | |
329 | |
330 | |
331 | /* |
332 | * These are the low-level APIs for using the ocfs2_block_check structure. |
333 | */ |
334 | |
335 | /* |
336 | * This function generates check information for a block. |
337 | * data is the block to be checked. bc is a pointer to the |
338 | * ocfs2_block_check structure describing the crc32 and the ecc. |
339 | * |
340 | * bc should be a pointer inside data, as the function will |
341 | * take care of zeroing it before calculating the check information. If |
342 | * bc does not point inside data, the caller must make sure any inline |
343 | * ocfs2_block_check structures are zeroed. |
344 | * |
345 | * The data buffer must be in on-disk endian (little endian for ocfs2). |
346 | * bc will be filled with little-endian values and will be ready to go to |
347 | * disk. |
348 | */ |
349 | void ocfs2_block_check_compute(void *data, size_t blocksize, |
350 | struct ocfs2_block_check *bc) |
351 | { |
352 | u32 crc; |
353 | u32 ecc; |
354 | |
355 | memset(bc, 0, sizeof(struct ocfs2_block_check)); |
356 | |
357 | crc = crc32_le(crc: ~0, p: data, len: blocksize); |
358 | ecc = ocfs2_hamming_encode_block(data, blocksize); |
359 | |
360 | /* |
361 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no |
362 | * larger than 16 bits. |
363 | */ |
364 | BUG_ON(ecc > USHRT_MAX); |
365 | |
366 | bc->bc_crc32e = cpu_to_le32(crc); |
367 | bc->bc_ecc = cpu_to_le16((u16)ecc); |
368 | } |
369 | |
370 | /* |
371 | * This function validates existing check information. Like _compute, |
372 | * the function will take care of zeroing bc before calculating check codes. |
373 | * If bc is not a pointer inside data, the caller must have zeroed any |
374 | * inline ocfs2_block_check structures. |
375 | * |
376 | * Again, the data passed in should be the on-disk endian. |
377 | */ |
378 | int ocfs2_block_check_validate(void *data, size_t blocksize, |
379 | struct ocfs2_block_check *bc, |
380 | struct ocfs2_blockcheck_stats *stats) |
381 | { |
382 | int rc = 0; |
383 | u32 bc_crc32e; |
384 | u16 bc_ecc; |
385 | u32 crc, ecc; |
386 | |
387 | ocfs2_blockcheck_inc_check(stats); |
388 | |
389 | bc_crc32e = le32_to_cpu(bc->bc_crc32e); |
390 | bc_ecc = le16_to_cpu(bc->bc_ecc); |
391 | |
392 | memset(bc, 0, sizeof(struct ocfs2_block_check)); |
393 | |
394 | /* Fast path - if the crc32 validates, we're good to go */ |
395 | crc = crc32_le(crc: ~0, p: data, len: blocksize); |
396 | if (crc == bc_crc32e) |
397 | goto out; |
398 | |
399 | ocfs2_blockcheck_inc_failure(stats); |
400 | mlog(ML_ERROR, |
401 | "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n" , |
402 | (unsigned int)bc_crc32e, (unsigned int)crc); |
403 | |
404 | /* Ok, try ECC fixups */ |
405 | ecc = ocfs2_hamming_encode_block(data, blocksize); |
406 | ocfs2_hamming_fix_block(data, blocksize, fix: ecc ^ bc_ecc); |
407 | |
408 | /* And check the crc32 again */ |
409 | crc = crc32_le(crc: ~0, p: data, len: blocksize); |
410 | if (crc == bc_crc32e) { |
411 | ocfs2_blockcheck_inc_recover(stats); |
412 | goto out; |
413 | } |
414 | |
415 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n" , |
416 | (unsigned int)bc_crc32e, (unsigned int)crc); |
417 | |
418 | rc = -EIO; |
419 | |
420 | out: |
421 | bc->bc_crc32e = cpu_to_le32(bc_crc32e); |
422 | bc->bc_ecc = cpu_to_le16(bc_ecc); |
423 | |
424 | return rc; |
425 | } |
426 | |
427 | /* |
428 | * This function generates check information for a list of buffer_heads. |
429 | * bhs is the blocks to be checked. bc is a pointer to the |
430 | * ocfs2_block_check structure describing the crc32 and the ecc. |
431 | * |
432 | * bc should be a pointer inside data, as the function will |
433 | * take care of zeroing it before calculating the check information. If |
434 | * bc does not point inside data, the caller must make sure any inline |
435 | * ocfs2_block_check structures are zeroed. |
436 | * |
437 | * The data buffer must be in on-disk endian (little endian for ocfs2). |
438 | * bc will be filled with little-endian values and will be ready to go to |
439 | * disk. |
440 | */ |
441 | void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr, |
442 | struct ocfs2_block_check *bc) |
443 | { |
444 | int i; |
445 | u32 crc, ecc; |
446 | |
447 | BUG_ON(nr < 0); |
448 | |
449 | if (!nr) |
450 | return; |
451 | |
452 | memset(bc, 0, sizeof(struct ocfs2_block_check)); |
453 | |
454 | for (i = 0, crc = ~0, ecc = 0; i < nr; i++) { |
455 | crc = crc32_le(crc, p: bhs[i]->b_data, len: bhs[i]->b_size); |
456 | /* |
457 | * The number of bits in a buffer is obviously b_size*8. |
458 | * The offset of this buffer is b_size*i, so the bit offset |
459 | * of this buffer is b_size*8*i. |
460 | */ |
461 | ecc = (u16)ocfs2_hamming_encode(parity: ecc, data: bhs[i]->b_data, |
462 | d: bhs[i]->b_size * 8, |
463 | nr: bhs[i]->b_size * 8 * i); |
464 | } |
465 | |
466 | /* |
467 | * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no |
468 | * larger than 16 bits. |
469 | */ |
470 | BUG_ON(ecc > USHRT_MAX); |
471 | |
472 | bc->bc_crc32e = cpu_to_le32(crc); |
473 | bc->bc_ecc = cpu_to_le16((u16)ecc); |
474 | } |
475 | |
476 | /* |
477 | * This function validates existing check information on a list of |
478 | * buffer_heads. Like _compute_bhs, the function will take care of |
479 | * zeroing bc before calculating check codes. If bc is not a pointer |
480 | * inside data, the caller must have zeroed any inline |
481 | * ocfs2_block_check structures. |
482 | * |
483 | * Again, the data passed in should be the on-disk endian. |
484 | */ |
485 | int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, |
486 | struct ocfs2_block_check *bc, |
487 | struct ocfs2_blockcheck_stats *stats) |
488 | { |
489 | int i, rc = 0; |
490 | u32 bc_crc32e; |
491 | u16 bc_ecc; |
492 | u32 crc, ecc, fix; |
493 | |
494 | BUG_ON(nr < 0); |
495 | |
496 | if (!nr) |
497 | return 0; |
498 | |
499 | ocfs2_blockcheck_inc_check(stats); |
500 | |
501 | bc_crc32e = le32_to_cpu(bc->bc_crc32e); |
502 | bc_ecc = le16_to_cpu(bc->bc_ecc); |
503 | |
504 | memset(bc, 0, sizeof(struct ocfs2_block_check)); |
505 | |
506 | /* Fast path - if the crc32 validates, we're good to go */ |
507 | for (i = 0, crc = ~0; i < nr; i++) |
508 | crc = crc32_le(crc, p: bhs[i]->b_data, len: bhs[i]->b_size); |
509 | if (crc == bc_crc32e) |
510 | goto out; |
511 | |
512 | ocfs2_blockcheck_inc_failure(stats); |
513 | mlog(ML_ERROR, |
514 | "CRC32 failed: stored: %u, computed %u. Applying ECC.\n" , |
515 | (unsigned int)bc_crc32e, (unsigned int)crc); |
516 | |
517 | /* Ok, try ECC fixups */ |
518 | for (i = 0, ecc = 0; i < nr; i++) { |
519 | /* |
520 | * The number of bits in a buffer is obviously b_size*8. |
521 | * The offset of this buffer is b_size*i, so the bit offset |
522 | * of this buffer is b_size*8*i. |
523 | */ |
524 | ecc = (u16)ocfs2_hamming_encode(parity: ecc, data: bhs[i]->b_data, |
525 | d: bhs[i]->b_size * 8, |
526 | nr: bhs[i]->b_size * 8 * i); |
527 | } |
528 | fix = ecc ^ bc_ecc; |
529 | for (i = 0; i < nr; i++) { |
530 | /* |
531 | * Try the fix against each buffer. It will only affect |
532 | * one of them. |
533 | */ |
534 | ocfs2_hamming_fix(data: bhs[i]->b_data, d: bhs[i]->b_size * 8, |
535 | nr: bhs[i]->b_size * 8 * i, fix); |
536 | } |
537 | |
538 | /* And check the crc32 again */ |
539 | for (i = 0, crc = ~0; i < nr; i++) |
540 | crc = crc32_le(crc, p: bhs[i]->b_data, len: bhs[i]->b_size); |
541 | if (crc == bc_crc32e) { |
542 | ocfs2_blockcheck_inc_recover(stats); |
543 | goto out; |
544 | } |
545 | |
546 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n" , |
547 | (unsigned int)bc_crc32e, (unsigned int)crc); |
548 | |
549 | rc = -EIO; |
550 | |
551 | out: |
552 | bc->bc_crc32e = cpu_to_le32(bc_crc32e); |
553 | bc->bc_ecc = cpu_to_le16(bc_ecc); |
554 | |
555 | return rc; |
556 | } |
557 | |
558 | /* |
559 | * These are the main API. They check the superblock flag before |
560 | * calling the underlying operations. |
561 | * |
562 | * They expect the buffer(s) to be in disk format. |
563 | */ |
564 | void ocfs2_compute_meta_ecc(struct super_block *sb, void *data, |
565 | struct ocfs2_block_check *bc) |
566 | { |
567 | if (ocfs2_meta_ecc(OCFS2_SB(sb))) |
568 | ocfs2_block_check_compute(data, blocksize: sb->s_blocksize, bc); |
569 | } |
570 | |
571 | int ocfs2_validate_meta_ecc(struct super_block *sb, void *data, |
572 | struct ocfs2_block_check *bc) |
573 | { |
574 | int rc = 0; |
575 | struct ocfs2_super *osb = OCFS2_SB(sb); |
576 | |
577 | if (ocfs2_meta_ecc(osb)) |
578 | rc = ocfs2_block_check_validate(data, blocksize: sb->s_blocksize, bc, |
579 | stats: &osb->osb_ecc_stats); |
580 | |
581 | return rc; |
582 | } |
583 | |
584 | void ocfs2_compute_meta_ecc_bhs(struct super_block *sb, |
585 | struct buffer_head **bhs, int nr, |
586 | struct ocfs2_block_check *bc) |
587 | { |
588 | if (ocfs2_meta_ecc(OCFS2_SB(sb))) |
589 | ocfs2_block_check_compute_bhs(bhs, nr, bc); |
590 | } |
591 | |
592 | int ocfs2_validate_meta_ecc_bhs(struct super_block *sb, |
593 | struct buffer_head **bhs, int nr, |
594 | struct ocfs2_block_check *bc) |
595 | { |
596 | int rc = 0; |
597 | struct ocfs2_super *osb = OCFS2_SB(sb); |
598 | |
599 | if (ocfs2_meta_ecc(osb)) |
600 | rc = ocfs2_block_check_validate_bhs(bhs, nr, bc, |
601 | stats: &osb->osb_ecc_stats); |
602 | |
603 | return rc; |
604 | } |
605 | |
606 | |