1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Bad block management |
4 | * |
5 | * - Heavily based on MD badblocks code from Neil Brown |
6 | * |
7 | * Copyright (c) 2015, Intel Corporation. |
8 | */ |
9 | |
10 | #include <linux/badblocks.h> |
11 | #include <linux/seqlock.h> |
12 | #include <linux/device.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> |
15 | #include <linux/stddef.h> |
16 | #include <linux/types.h> |
17 | #include <linux/slab.h> |
18 | |
19 | /* |
20 | * The purpose of badblocks set/clear is to manage bad blocks ranges which are |
21 | * identified by LBA addresses. |
22 | * |
23 | * When the caller of badblocks_set() wants to set a range of bad blocks, the |
24 | * setting range can be acked or unacked. And the setting range may merge, |
25 | * overwrite, skip the overlapped already set range, depends on who they are |
26 | * overlapped or adjacent, and the acknowledgment type of the ranges. It can be |
27 | * more complicated when the setting range covers multiple already set bad block |
28 | * ranges, with restrictions of maximum length of each bad range and the bad |
29 | * table space limitation. |
30 | * |
31 | * It is difficult and unnecessary to take care of all the possible situations, |
32 | * for setting a large range of bad blocks, we can handle it by dividing the |
33 | * large range into smaller ones when encounter overlap, max range length or |
34 | * bad table full conditions. Every time only a smaller piece of the bad range |
35 | * is handled with a limited number of conditions how it is interacted with |
36 | * possible overlapped or adjacent already set bad block ranges. Then the hard |
37 | * complicated problem can be much simpler to handle in proper way. |
38 | * |
39 | * When setting a range of bad blocks to the bad table, the simplified situations |
40 | * to be considered are, (The already set bad blocks ranges are naming with |
41 | * prefix E, and the setting bad blocks range is naming with prefix S) |
42 | * |
43 | * 1) A setting range is not overlapped or adjacent to any other already set bad |
44 | * block range. |
45 | * +--------+ |
46 | * | S | |
47 | * +--------+ |
48 | * +-------------+ +-------------+ |
49 | * | E1 | | E2 | |
50 | * +-------------+ +-------------+ |
51 | * For this situation if the bad blocks table is not full, just allocate a |
52 | * free slot from the bad blocks table to mark the setting range S. The |
53 | * result is, |
54 | * +-------------+ +--------+ +-------------+ |
55 | * | E1 | | S | | E2 | |
56 | * +-------------+ +--------+ +-------------+ |
57 | * 2) A setting range starts exactly at a start LBA of an already set bad blocks |
58 | * range. |
59 | * 2.1) The setting range size < already set range size |
60 | * +--------+ |
61 | * | S | |
62 | * +--------+ |
63 | * +-------------+ |
64 | * | E | |
65 | * +-------------+ |
66 | * 2.1.1) If S and E are both acked or unacked range, the setting range S can |
67 | * be merged into existing bad range E. The result is, |
68 | * +-------------+ |
69 | * | S | |
70 | * +-------------+ |
71 | * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and |
72 | * the result is, |
73 | * +-------------+ |
74 | * | E | |
75 | * +-------------+ |
76 | * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E. |
77 | * An extra slot from the bad blocks table will be allocated for S, and head |
78 | * of E will move to end of the inserted range S. The result is, |
79 | * +--------+----+ |
80 | * | S | E | |
81 | * +--------+----+ |
82 | * 2.2) The setting range size == already set range size |
83 | * 2.2.1) If S and E are both acked or unacked range, the setting range S can |
84 | * be merged into existing bad range E. The result is, |
85 | * +-------------+ |
86 | * | S | |
87 | * +-------------+ |
88 | * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and |
89 | * the result is, |
90 | * +-------------+ |
91 | * | E | |
92 | * +-------------+ |
93 | * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of |
94 | bad blocks range E. The result is, |
95 | * +-------------+ |
96 | * | S | |
97 | * +-------------+ |
98 | * 2.3) The setting range size > already set range size |
99 | * +-------------------+ |
100 | * | S | |
101 | * +-------------------+ |
102 | * +-------------+ |
103 | * | E | |
104 | * +-------------+ |
105 | * For such situation, the setting range S can be treated as two parts, the |
106 | * first part (S1) is as same size as the already set range E, the second |
107 | * part (S2) is the rest of setting range. |
108 | * +-------------+-----+ +-------------+ +-----+ |
109 | * | S1 | S2 | | S1 | | S2 | |
110 | * +-------------+-----+ ===> +-------------+ +-----+ |
111 | * +-------------+ +-------------+ |
112 | * | E | | E | |
113 | * +-------------+ +-------------+ |
114 | * Now we only focus on how to handle the setting range S1 and already set |
115 | * range E, which are already explained in 2.2), for the rest S2 it will be |
116 | * handled later in next loop. |
117 | * 3) A setting range starts before the start LBA of an already set bad blocks |
118 | * range. |
119 | * +-------------+ |
120 | * | S | |
121 | * +-------------+ |
122 | * +-------------+ |
123 | * | E | |
124 | * +-------------+ |
125 | * For this situation, the setting range S can be divided into two parts, the |
126 | * first (S1) ends at the start LBA of already set range E, the second part |
127 | * (S2) starts exactly at a start LBA of the already set range E. |
128 | * +----+---------+ +----+ +---------+ |
129 | * | S1 | S2 | | S1 | | S2 | |
130 | * +----+---------+ ===> +----+ +---------+ |
131 | * +-------------+ +-------------+ |
132 | * | E | | E | |
133 | * +-------------+ +-------------+ |
134 | * Now only the first part S1 should be handled in this loop, which is in |
135 | * similar condition as 1). The rest part S2 has exact same start LBA address |
136 | * of the already set range E, they will be handled in next loop in one of |
137 | * situations in 2). |
138 | * 4) A setting range starts after the start LBA of an already set bad blocks |
139 | * range. |
140 | * 4.1) If the setting range S exactly matches the tail part of already set bad |
141 | * blocks range E, like the following chart shows, |
142 | * +---------+ |
143 | * | S | |
144 | * +---------+ |
145 | * +-------------+ |
146 | * | E | |
147 | * +-------------+ |
148 | * 4.1.1) If range S and E have same acknowledge value (both acked or unacked), |
149 | * they will be merged into one, the result is, |
150 | * +-------------+ |
151 | * | S | |
152 | * +-------------+ |
153 | * 4.1.2) If range E is acked and the setting range S is unacked, the setting |
154 | * request of S will be rejected, the result is, |
155 | * +-------------+ |
156 | * | E | |
157 | * +-------------+ |
158 | * 4.1.3) If range E is unacked, and the setting range S is acked, then S may |
159 | * overwrite the overlapped range of E, the result is, |
160 | * +---+---------+ |
161 | * | E | S | |
162 | * +---+---------+ |
163 | * 4.2) If the setting range S stays in middle of an already set range E, like |
164 | * the following chart shows, |
165 | * +----+ |
166 | * | S | |
167 | * +----+ |
168 | * +--------------+ |
169 | * | E | |
170 | * +--------------+ |
171 | * 4.2.1) If range S and E have same acknowledge value (both acked or unacked), |
172 | * they will be merged into one, the result is, |
173 | * +--------------+ |
174 | * | S | |
175 | * +--------------+ |
176 | * 4.2.2) If range E is acked and the setting range S is unacked, the setting |
177 | * request of S will be rejected, the result is also, |
178 | * +--------------+ |
179 | * | E | |
180 | * +--------------+ |
181 | * 4.2.3) If range E is unacked, and the setting range S is acked, then S will |
182 | * inserted into middle of E and split previous range E into two parts (E1 |
183 | * and E2), the result is, |
184 | * +----+----+----+ |
185 | * | E1 | S | E2 | |
186 | * +----+----+----+ |
187 | * 4.3) If the setting bad blocks range S is overlapped with an already set bad |
188 | * blocks range E. The range S starts after the start LBA of range E, and |
189 | * ends after the end LBA of range E, as the following chart shows, |
190 | * +-------------------+ |
191 | * | S | |
192 | * +-------------------+ |
193 | * +-------------+ |
194 | * | E | |
195 | * +-------------+ |
196 | * For this situation the range S can be divided into two parts, the first |
197 | * part (S1) ends at end range E, and the second part (S2) has rest range of |
198 | * origin S. |
199 | * +---------+---------+ +---------+ +---------+ |
200 | * | S1 | S2 | | S1 | | S2 | |
201 | * +---------+---------+ ===> +---------+ +---------+ |
202 | * +-------------+ +-------------+ |
203 | * | E | | E | |
204 | * +-------------+ +-------------+ |
205 | * Now in this loop the setting range S1 and already set range E can be |
206 | * handled as the situations 4.1), the rest range S2 will be handled in next |
207 | * loop and ignored in this loop. |
208 | * 5) A setting bad blocks range S is adjacent to one or more already set bad |
209 | * blocks range(s), and they are all acked or unacked range. |
210 | * 5.1) Front merge: If the already set bad blocks range E is before setting |
211 | * range S and they are adjacent, |
212 | * +------+ |
213 | * | S | |
214 | * +------+ |
215 | * +-------+ |
216 | * | E | |
217 | * +-------+ |
218 | * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge |
219 | * values are same, the setting range S can front merges into range E. The |
220 | * result is, |
221 | * +--------------+ |
222 | * | S | |
223 | * +--------------+ |
224 | * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting |
225 | * range S right after already set range E into the bad blocks table. The |
226 | * result is, |
227 | * +--------+------+ |
228 | * | E | S | |
229 | * +--------+------+ |
230 | * 6) Special cases which above conditions cannot handle |
231 | * 6.1) Multiple already set ranges may merge into less ones in a full bad table |
232 | * +-------------------------------------------------------+ |
233 | * | S | |
234 | * +-------------------------------------------------------+ |
235 | * |<----- BB_MAX_LEN ----->| |
236 | * +-----+ +-----+ +-----+ |
237 | * | E1 | | E2 | | E3 | |
238 | * +-----+ +-----+ +-----+ |
239 | * In the above example, when the bad blocks table is full, inserting the |
240 | * first part of setting range S will fail because no more available slot |
241 | * can be allocated from bad blocks table. In this situation a proper |
242 | * setting method should be go though all the setting bad blocks range and |
243 | * look for chance to merge already set ranges into less ones. When there |
244 | * is available slot from bad blocks table, re-try again to handle more |
245 | * setting bad blocks ranges as many as possible. |
246 | * +------------------------+ |
247 | * | S3 | |
248 | * +------------------------+ |
249 | * |<----- BB_MAX_LEN ----->| |
250 | * +-----+-----+-----+---+-----+--+ |
251 | * | S1 | S2 | |
252 | * +-----+-----+-----+---+-----+--+ |
253 | * The above chart shows although the first part (S3) cannot be inserted due |
254 | * to no-space in bad blocks table, but the following E1, E2 and E3 ranges |
255 | * can be merged with rest part of S into less range S1 and S2. Now there is |
256 | * 1 free slot in bad blocks table. |
257 | * +------------------------+-----+-----+-----+---+-----+--+ |
258 | * | S3 | S1 | S2 | |
259 | * +------------------------+-----+-----+-----+---+-----+--+ |
260 | * Since the bad blocks table is not full anymore, re-try again for the |
261 | * origin setting range S. Now the setting range S3 can be inserted into the |
262 | * bad blocks table with previous freed slot from multiple ranges merge. |
263 | * 6.2) Front merge after overwrite |
264 | * In the following example, in bad blocks table, E1 is an acked bad blocks |
265 | * range and E2 is an unacked bad blocks range, therefore they are not able |
266 | * to merge into a larger range. The setting bad blocks range S is acked, |
267 | * therefore part of E2 can be overwritten by S. |
268 | * +--------+ |
269 | * | S | acknowledged |
270 | * +--------+ S: 1 |
271 | * +-------+-------------+ E1: 1 |
272 | * | E1 | E2 | E2: 0 |
273 | * +-------+-------------+ |
274 | * With previous simplified routines, after overwriting part of E2 with S, |
275 | * the bad blocks table should be (E3 is remaining part of E2 which is not |
276 | * overwritten by S), |
277 | * acknowledged |
278 | * +-------+--------+----+ S: 1 |
279 | * | E1 | S | E3 | E1: 1 |
280 | * +-------+--------+----+ E3: 0 |
281 | * The above result is correct but not perfect. Range E1 and S in the bad |
282 | * blocks table are all acked, merging them into a larger one range may |
283 | * occupy less bad blocks table space and make badblocks_check() faster. |
284 | * Therefore in such situation, after overwriting range S, the previous range |
285 | * E1 should be checked for possible front combination. Then the ideal |
286 | * result can be, |
287 | * +----------------+----+ acknowledged |
288 | * | E1 | E3 | E1: 1 |
289 | * +----------------+----+ E3: 0 |
290 | * 6.3) Behind merge: If the already set bad blocks range E is behind the setting |
291 | * range S and they are adjacent. Normally we don't need to care about this |
292 | * because front merge handles this while going though range S from head to |
293 | * tail, except for the tail part of range S. When the setting range S are |
294 | * fully handled, all the above simplified routine doesn't check whether the |
295 | * tail LBA of range S is adjacent to the next already set range and not |
296 | * merge them even it is possible. |
297 | * +------+ |
298 | * | S | |
299 | * +------+ |
300 | * +-------+ |
301 | * | E | |
302 | * +-------+ |
303 | * For the above special situation, when the setting range S are all handled |
304 | * and the loop ends, an extra check is necessary for whether next already |
305 | * set range E is right after S and mergeable. |
306 | * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge |
307 | * values are same, the setting range S can behind merges into range E. The |
308 | * result is, |
309 | * +--------------+ |
310 | * | S | |
311 | * +--------------+ |
312 | * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range |
313 | * S in front of the already set range E in the bad blocks table. The result |
314 | * is, |
315 | * +------+-------+ |
316 | * | S | E | |
317 | * +------+-------+ |
318 | * |
319 | * All the above 5 simplified situations and 3 special cases may cover 99%+ of |
320 | * the bad block range setting conditions. Maybe there is some rare corner case |
321 | * is not considered and optimized, it won't hurt if badblocks_set() fails due |
322 | * to no space, or some ranges are not merged to save bad blocks table space. |
323 | * |
324 | * Inside badblocks_set() each loop starts by jumping to re_insert label, every |
325 | * time for the new loop prev_badblocks() is called to find an already set range |
326 | * which starts before or at current setting range. Since the setting bad blocks |
327 | * range is handled from head to tail, most of the cases it is unnecessary to do |
328 | * the binary search inside prev_badblocks(), it is possible to provide a hint |
329 | * to prev_badblocks() for a fast path, then the expensive binary search can be |
330 | * avoided. In my test with the hint to prev_badblocks(), except for the first |
331 | * loop, all rested calls to prev_badblocks() can go into the fast path and |
332 | * return correct bad blocks table index immediately. |
333 | * |
334 | * |
335 | * Clearing a bad blocks range from the bad block table has similar idea as |
336 | * setting does, but much more simpler. The only thing needs to be noticed is |
337 | * when the clearing range hits middle of a bad block range, the existing bad |
338 | * block range will split into two, and one more item should be added into the |
339 | * bad block table. The simplified situations to be considered are, (The already |
340 | * set bad blocks ranges in bad block table are naming with prefix E, and the |
341 | * clearing bad blocks range is naming with prefix C) |
342 | * |
343 | * 1) A clearing range is not overlapped to any already set ranges in bad block |
344 | * table. |
345 | * +-----+ | +-----+ | +-----+ |
346 | * | C | | | C | | | C | |
347 | * +-----+ or +-----+ or +-----+ |
348 | * +---+ | +----+ +----+ | +---+ |
349 | * | E | | | E1 | | E2 | | | E | |
350 | * +---+ | +----+ +----+ | +---+ |
351 | * For the above situations, no bad block to be cleared and no failure |
352 | * happens, simply returns 0. |
353 | * 2) The clearing range hits middle of an already setting bad blocks range in |
354 | * the bad block table. |
355 | * +---+ |
356 | * | C | |
357 | * +---+ |
358 | * +-----------------+ |
359 | * | E | |
360 | * +-----------------+ |
361 | * In this situation if the bad block table is not full, the range E will be |
362 | * split into two ranges E1 and E2. The result is, |
363 | * +------+ +------+ |
364 | * | E1 | | E2 | |
365 | * +------+ +------+ |
366 | * 3) The clearing range starts exactly at same LBA as an already set bad block range |
367 | * from the bad block table. |
368 | * 3.1) Partially covered at head part |
369 | * +------------+ |
370 | * | C | |
371 | * +------------+ |
372 | * +-----------------+ |
373 | * | E | |
374 | * +-----------------+ |
375 | * For this situation, the overlapped already set range will update the |
376 | * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No |
377 | * item deleted from bad block table. The result is, |
378 | * +----+ |
379 | * | E1 | |
380 | * +----+ |
381 | * 3.2) Exact fully covered |
382 | * +-----------------+ |
383 | * | C | |
384 | * +-----------------+ |
385 | * +-----------------+ |
386 | * | E | |
387 | * +-----------------+ |
388 | * For this situation the whole bad blocks range E will be cleared and its |
389 | * corresponded item is deleted from the bad block table. |
390 | * 4) The clearing range exactly ends at same LBA as an already set bad block |
391 | * range. |
392 | * +-------+ |
393 | * | C | |
394 | * +-------+ |
395 | * +-----------------+ |
396 | * | E | |
397 | * +-----------------+ |
398 | * For the above situation, the already set range E is updated to shrink its |
399 | * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C). |
400 | * The result is, |
401 | * +---------+ |
402 | * | E | |
403 | * +---------+ |
404 | * 5) The clearing range is partially overlapped with an already set bad block |
405 | * range from the bad block table. |
406 | * 5.1) The already set bad block range is front overlapped with the clearing |
407 | * range. |
408 | * +----------+ |
409 | * | C | |
410 | * +----------+ |
411 | * +------------+ |
412 | * | E | |
413 | * +------------+ |
414 | * For such situation, the clearing range C can be treated as two parts. The |
415 | * first part ends at the start LBA of range E, and the second part starts at |
416 | * same LBA of range E. |
417 | * +----+-----+ +----+ +-----+ |
418 | * | C1 | C2 | | C1 | | C2 | |
419 | * +----+-----+ ===> +----+ +-----+ |
420 | * +------------+ +------------+ |
421 | * | E | | E | |
422 | * +------------+ +------------+ |
423 | * Now the first part C1 can be handled as condition 1), and the second part C2 can be |
424 | * handled as condition 3.1) in next loop. |
425 | * 5.2) The already set bad block range is behind overlaopped with the clearing |
426 | * range. |
427 | * +----------+ |
428 | * | C | |
429 | * +----------+ |
430 | * +------------+ |
431 | * | E | |
432 | * +------------+ |
433 | * For such situation, the clearing range C can be treated as two parts. The |
434 | * first part C1 ends at same end LBA of range E, and the second part starts |
435 | * at end LBA of range E. |
436 | * +----+-----+ +----+ +-----+ |
437 | * | C1 | C2 | | C1 | | C2 | |
438 | * +----+-----+ ===> +----+ +-----+ |
439 | * +------------+ +------------+ |
440 | * | E | | E | |
441 | * +------------+ +------------+ |
442 | * Now the first part clearing range C1 can be handled as condition 4), and |
443 | * the second part clearing range C2 can be handled as condition 1) in next |
444 | * loop. |
445 | * |
446 | * All bad blocks range clearing can be simplified into the above 5 situations |
447 | * by only handling the head part of the clearing range in each run of the |
448 | * while-loop. The idea is similar to bad blocks range setting but much |
449 | * simpler. |
450 | */ |
451 | |
452 | /* |
453 | * Find the range starts at-or-before 's' from bad table. The search |
454 | * starts from index 'hint' and stops at index 'hint_end' from the bad |
455 | * table. |
456 | */ |
457 | static int prev_by_hint(struct badblocks *bb, sector_t s, int hint) |
458 | { |
459 | int hint_end = hint + 2; |
460 | u64 *p = bb->page; |
461 | int ret = -1; |
462 | |
463 | while ((hint < hint_end) && ((hint + 1) <= bb->count) && |
464 | (BB_OFFSET(p[hint]) <= s)) { |
465 | if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) { |
466 | ret = hint; |
467 | break; |
468 | } |
469 | hint++; |
470 | } |
471 | |
472 | return ret; |
473 | } |
474 | |
475 | /* |
476 | * Find the range starts at-or-before bad->start. If 'hint' is provided |
477 | * (hint >= 0) then search in the bad table from hint firstly. It is |
478 | * very probably the wanted bad range can be found from the hint index, |
479 | * then the unnecessary while-loop iteration can be avoided. |
480 | */ |
481 | static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad, |
482 | int hint) |
483 | { |
484 | sector_t s = bad->start; |
485 | int ret = -1; |
486 | int lo, hi; |
487 | u64 *p; |
488 | |
489 | if (!bb->count) |
490 | goto out; |
491 | |
492 | if (hint >= 0) { |
493 | ret = prev_by_hint(bb, s, hint); |
494 | if (ret >= 0) |
495 | goto out; |
496 | } |
497 | |
498 | lo = 0; |
499 | hi = bb->count; |
500 | p = bb->page; |
501 | |
502 | /* The following bisect search might be unnecessary */ |
503 | if (BB_OFFSET(p[lo]) > s) |
504 | return -1; |
505 | if (BB_OFFSET(p[hi - 1]) <= s) |
506 | return hi - 1; |
507 | |
508 | /* Do bisect search in bad table */ |
509 | while (hi - lo > 1) { |
510 | int mid = (lo + hi)/2; |
511 | sector_t a = BB_OFFSET(p[mid]); |
512 | |
513 | if (a == s) { |
514 | ret = mid; |
515 | goto out; |
516 | } |
517 | |
518 | if (a < s) |
519 | lo = mid; |
520 | else |
521 | hi = mid; |
522 | } |
523 | |
524 | if (BB_OFFSET(p[lo]) <= s) |
525 | ret = lo; |
526 | out: |
527 | return ret; |
528 | } |
529 | |
530 | /* |
531 | * Return 'true' if the range indicated by 'bad' can be backward merged |
532 | * with the bad range (from the bad table) index by 'behind'. |
533 | */ |
534 | static bool can_merge_behind(struct badblocks *bb, |
535 | struct badblocks_context *bad, int behind) |
536 | { |
537 | sector_t sectors = bad->len; |
538 | sector_t s = bad->start; |
539 | u64 *p = bb->page; |
540 | |
541 | if ((s < BB_OFFSET(p[behind])) && |
542 | ((s + sectors) >= BB_OFFSET(p[behind])) && |
543 | ((BB_END(p[behind]) - s) <= BB_MAX_LEN) && |
544 | BB_ACK(p[behind]) == bad->ack) |
545 | return true; |
546 | return false; |
547 | } |
548 | |
549 | /* |
550 | * Do backward merge for range indicated by 'bad' and the bad range |
551 | * (from the bad table) indexed by 'behind'. The return value is merged |
552 | * sectors from bad->len. |
553 | */ |
554 | static int behind_merge(struct badblocks *bb, struct badblocks_context *bad, |
555 | int behind) |
556 | { |
557 | sector_t sectors = bad->len; |
558 | sector_t s = bad->start; |
559 | u64 *p = bb->page; |
560 | int merged = 0; |
561 | |
562 | WARN_ON(s >= BB_OFFSET(p[behind])); |
563 | WARN_ON((s + sectors) < BB_OFFSET(p[behind])); |
564 | |
565 | if (s < BB_OFFSET(p[behind])) { |
566 | merged = BB_OFFSET(p[behind]) - s; |
567 | p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack); |
568 | |
569 | WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN); |
570 | } |
571 | |
572 | return merged; |
573 | } |
574 | |
575 | /* |
576 | * Return 'true' if the range indicated by 'bad' can be forward |
577 | * merged with the bad range (from the bad table) indexed by 'prev'. |
578 | */ |
579 | static bool can_merge_front(struct badblocks *bb, int prev, |
580 | struct badblocks_context *bad) |
581 | { |
582 | sector_t s = bad->start; |
583 | u64 *p = bb->page; |
584 | |
585 | if (BB_ACK(p[prev]) == bad->ack && |
586 | (s < BB_END(p[prev]) || |
587 | (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN)))) |
588 | return true; |
589 | return false; |
590 | } |
591 | |
592 | /* |
593 | * Do forward merge for range indicated by 'bad' and the bad range |
594 | * (from bad table) indexed by 'prev'. The return value is sectors |
595 | * merged from bad->len. |
596 | */ |
597 | static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad) |
598 | { |
599 | sector_t sectors = bad->len; |
600 | sector_t s = bad->start; |
601 | u64 *p = bb->page; |
602 | int merged = 0; |
603 | |
604 | WARN_ON(s > BB_END(p[prev])); |
605 | |
606 | if (s < BB_END(p[prev])) { |
607 | merged = min_t(sector_t, sectors, BB_END(p[prev]) - s); |
608 | } else { |
609 | merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev])); |
610 | if ((prev + 1) < bb->count && |
611 | merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) { |
612 | merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]); |
613 | } |
614 | |
615 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
616 | BB_LEN(p[prev]) + merged, bad->ack); |
617 | } |
618 | |
619 | return merged; |
620 | } |
621 | |
622 | /* |
623 | * 'Combine' is a special case which can_merge_front() is not able to |
624 | * handle: If a bad range (indexed by 'prev' from bad table) exactly |
625 | * starts as bad->start, and the bad range ahead of 'prev' (indexed by |
626 | * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and |
627 | * the sum of their lengths does not exceed BB_MAX_LEN limitation, then |
628 | * these two bad range (from bad table) can be combined. |
629 | * |
630 | * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad |
631 | * table can be combined. |
632 | */ |
633 | static bool can_combine_front(struct badblocks *bb, int prev, |
634 | struct badblocks_context *bad) |
635 | { |
636 | u64 *p = bb->page; |
637 | |
638 | if ((prev > 0) && |
639 | (BB_OFFSET(p[prev]) == bad->start) && |
640 | (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) && |
641 | (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) && |
642 | (BB_ACK(p[prev - 1]) == BB_ACK(p[prev]))) |
643 | return true; |
644 | return false; |
645 | } |
646 | |
647 | /* |
648 | * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad |
649 | * table) into one larger bad range, and the new range is indexed by |
650 | * 'prev - 1'. |
651 | * The caller of front_combine() will decrease bb->count, therefore |
652 | * it is unnecessary to clear p[perv] after front merge. |
653 | */ |
654 | static void front_combine(struct badblocks *bb, int prev) |
655 | { |
656 | u64 *p = bb->page; |
657 | |
658 | p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]), |
659 | BB_LEN(p[prev - 1]) + BB_LEN(p[prev]), |
660 | BB_ACK(p[prev])); |
661 | if ((prev + 1) < bb->count) |
662 | memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8); |
663 | } |
664 | |
665 | /* |
666 | * Return 'true' if the range indicated by 'bad' is exactly forward |
667 | * overlapped with the bad range (from bad table) indexed by 'front'. |
668 | * Exactly forward overlap means the bad range (from bad table) indexed |
669 | * by 'prev' does not cover the whole range indicated by 'bad'. |
670 | */ |
671 | static bool overlap_front(struct badblocks *bb, int front, |
672 | struct badblocks_context *bad) |
673 | { |
674 | u64 *p = bb->page; |
675 | |
676 | if (bad->start >= BB_OFFSET(p[front]) && |
677 | bad->start < BB_END(p[front])) |
678 | return true; |
679 | return false; |
680 | } |
681 | |
682 | /* |
683 | * Return 'true' if the range indicated by 'bad' is exactly backward |
684 | * overlapped with the bad range (from bad table) indexed by 'behind'. |
685 | */ |
686 | static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad, |
687 | int behind) |
688 | { |
689 | u64 *p = bb->page; |
690 | |
691 | if (bad->start < BB_OFFSET(p[behind]) && |
692 | (bad->start + bad->len) > BB_OFFSET(p[behind])) |
693 | return true; |
694 | return false; |
695 | } |
696 | |
697 | /* |
698 | * Return 'true' if the range indicated by 'bad' can overwrite the bad |
699 | * range (from bad table) indexed by 'prev'. |
700 | * |
701 | * The range indicated by 'bad' can overwrite the bad range indexed by |
702 | * 'prev' when, |
703 | * 1) The whole range indicated by 'bad' can cover partial or whole bad |
704 | * range (from bad table) indexed by 'prev'. |
705 | * 2) The ack value of 'bad' is larger or equal to the ack value of bad |
706 | * range 'prev'. |
707 | * |
708 | * If the overwriting doesn't cover the whole bad range (from bad table) |
709 | * indexed by 'prev', new range might be split from existing bad range, |
710 | * 1) The overwrite covers head or tail part of existing bad range, 1 |
711 | * extra bad range will be split and added into the bad table. |
712 | * 2) The overwrite covers middle of existing bad range, 2 extra bad |
713 | * ranges will be split (ahead and after the overwritten range) and |
714 | * added into the bad table. |
715 | * The number of extra split ranges of the overwriting is stored in |
716 | * 'extra' and returned for the caller. |
717 | */ |
718 | static bool can_front_overwrite(struct badblocks *bb, int prev, |
719 | struct badblocks_context *bad, int *) |
720 | { |
721 | u64 *p = bb->page; |
722 | int len; |
723 | |
724 | WARN_ON(!overlap_front(bb, prev, bad)); |
725 | |
726 | if (BB_ACK(p[prev]) >= bad->ack) |
727 | return false; |
728 | |
729 | if (BB_END(p[prev]) <= (bad->start + bad->len)) { |
730 | len = BB_END(p[prev]) - bad->start; |
731 | if (BB_OFFSET(p[prev]) == bad->start) |
732 | *extra = 0; |
733 | else |
734 | *extra = 1; |
735 | |
736 | bad->len = len; |
737 | } else { |
738 | if (BB_OFFSET(p[prev]) == bad->start) |
739 | *extra = 1; |
740 | else |
741 | /* |
742 | * prev range will be split into two, beside the overwritten |
743 | * one, an extra slot needed from bad table. |
744 | */ |
745 | *extra = 2; |
746 | } |
747 | |
748 | if ((bb->count + (*extra)) >= MAX_BADBLOCKS) |
749 | return false; |
750 | |
751 | return true; |
752 | } |
753 | |
754 | /* |
755 | * Do the overwrite from the range indicated by 'bad' to the bad range |
756 | * (from bad table) indexed by 'prev'. |
757 | * The previously called can_front_overwrite() will provide how many |
758 | * extra bad range(s) might be split and added into the bad table. All |
759 | * the splitting cases in the bad table will be handled here. |
760 | */ |
761 | static int front_overwrite(struct badblocks *bb, int prev, |
762 | struct badblocks_context *bad, int ) |
763 | { |
764 | u64 *p = bb->page; |
765 | sector_t orig_end = BB_END(p[prev]); |
766 | int orig_ack = BB_ACK(p[prev]); |
767 | |
768 | switch (extra) { |
769 | case 0: |
770 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]), |
771 | bad->ack); |
772 | break; |
773 | case 1: |
774 | if (BB_OFFSET(p[prev]) == bad->start) { |
775 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
776 | bad->len, bad->ack); |
777 | memmove(p + prev + 2, p + prev + 1, |
778 | (bb->count - prev - 1) * 8); |
779 | p[prev + 1] = BB_MAKE(bad->start + bad->len, |
780 | orig_end - BB_END(p[prev]), |
781 | orig_ack); |
782 | } else { |
783 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
784 | bad->start - BB_OFFSET(p[prev]), |
785 | orig_ack); |
786 | /* |
787 | * prev +2 -> prev + 1 + 1, which is for, |
788 | * 1) prev + 1: the slot index of the previous one |
789 | * 2) + 1: one more slot for extra being 1. |
790 | */ |
791 | memmove(p + prev + 2, p + prev + 1, |
792 | (bb->count - prev - 1) * 8); |
793 | p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack); |
794 | } |
795 | break; |
796 | case 2: |
797 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
798 | bad->start - BB_OFFSET(p[prev]), |
799 | orig_ack); |
800 | /* |
801 | * prev + 3 -> prev + 1 + 2, which is for, |
802 | * 1) prev + 1: the slot index of the previous one |
803 | * 2) + 2: two more slots for extra being 2. |
804 | */ |
805 | memmove(p + prev + 3, p + prev + 1, |
806 | (bb->count - prev - 1) * 8); |
807 | p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack); |
808 | p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]), |
809 | orig_end - BB_END(p[prev + 1]), |
810 | orig_ack); |
811 | break; |
812 | default: |
813 | break; |
814 | } |
815 | |
816 | return bad->len; |
817 | } |
818 | |
819 | /* |
820 | * Explicitly insert a range indicated by 'bad' to the bad table, where |
821 | * the location is indexed by 'at'. |
822 | */ |
823 | static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad) |
824 | { |
825 | u64 *p = bb->page; |
826 | int len; |
827 | |
828 | WARN_ON(badblocks_full(bb)); |
829 | |
830 | len = min_t(sector_t, bad->len, BB_MAX_LEN); |
831 | if (at < bb->count) |
832 | memmove(p + at + 1, p + at, (bb->count - at) * 8); |
833 | p[at] = BB_MAKE(bad->start, len, bad->ack); |
834 | |
835 | return len; |
836 | } |
837 | |
838 | static void badblocks_update_acked(struct badblocks *bb) |
839 | { |
840 | bool unacked = false; |
841 | u64 *p = bb->page; |
842 | int i; |
843 | |
844 | if (!bb->unacked_exist) |
845 | return; |
846 | |
847 | for (i = 0; i < bb->count ; i++) { |
848 | if (!BB_ACK(p[i])) { |
849 | unacked = true; |
850 | break; |
851 | } |
852 | } |
853 | |
854 | if (!unacked) |
855 | bb->unacked_exist = 0; |
856 | } |
857 | |
858 | /* Do exact work to set bad block range into the bad block table */ |
859 | static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors, |
860 | int acknowledged) |
861 | { |
862 | int retried = 0, space_desired = 0; |
863 | int orig_len, len = 0, added = 0; |
864 | struct badblocks_context bad; |
865 | int prev = -1, hint = -1; |
866 | sector_t orig_start; |
867 | unsigned long flags; |
868 | int rv = 0; |
869 | u64 *p; |
870 | |
871 | if (bb->shift < 0) |
872 | /* badblocks are disabled */ |
873 | return 1; |
874 | |
875 | if (sectors == 0) |
876 | /* Invalid sectors number */ |
877 | return 1; |
878 | |
879 | if (bb->shift) { |
880 | /* round the start down, and the end up */ |
881 | sector_t next = s + sectors; |
882 | |
883 | rounddown(s, bb->shift); |
884 | roundup(next, bb->shift); |
885 | sectors = next - s; |
886 | } |
887 | |
888 | write_seqlock_irqsave(&bb->lock, flags); |
889 | |
890 | orig_start = s; |
891 | orig_len = sectors; |
892 | bad.ack = acknowledged; |
893 | p = bb->page; |
894 | |
895 | re_insert: |
896 | bad.start = s; |
897 | bad.len = sectors; |
898 | len = 0; |
899 | |
900 | if (badblocks_empty(bb)) { |
901 | len = insert_at(bb, at: 0, bad: &bad); |
902 | bb->count++; |
903 | added++; |
904 | goto update_sectors; |
905 | } |
906 | |
907 | prev = prev_badblocks(bb, bad: &bad, hint); |
908 | |
909 | /* start before all badblocks */ |
910 | if (prev < 0) { |
911 | if (!badblocks_full(bb)) { |
912 | /* insert on the first */ |
913 | if (bad.len > (BB_OFFSET(p[0]) - bad.start)) |
914 | bad.len = BB_OFFSET(p[0]) - bad.start; |
915 | len = insert_at(bb, at: 0, bad: &bad); |
916 | bb->count++; |
917 | added++; |
918 | hint = 0; |
919 | goto update_sectors; |
920 | } |
921 | |
922 | /* No sapce, try to merge */ |
923 | if (overlap_behind(bb, bad: &bad, behind: 0)) { |
924 | if (can_merge_behind(bb, bad: &bad, behind: 0)) { |
925 | len = behind_merge(bb, bad: &bad, behind: 0); |
926 | added++; |
927 | } else { |
928 | len = BB_OFFSET(p[0]) - s; |
929 | space_desired = 1; |
930 | } |
931 | hint = 0; |
932 | goto update_sectors; |
933 | } |
934 | |
935 | /* no table space and give up */ |
936 | goto out; |
937 | } |
938 | |
939 | /* in case p[prev-1] can be merged with p[prev] */ |
940 | if (can_combine_front(bb, prev, bad: &bad)) { |
941 | front_combine(bb, prev); |
942 | bb->count--; |
943 | added++; |
944 | hint = prev; |
945 | goto update_sectors; |
946 | } |
947 | |
948 | if (overlap_front(bb, front: prev, bad: &bad)) { |
949 | if (can_merge_front(bb, prev, bad: &bad)) { |
950 | len = front_merge(bb, prev, bad: &bad); |
951 | added++; |
952 | } else { |
953 | int = 0; |
954 | |
955 | if (!can_front_overwrite(bb, prev, bad: &bad, extra: &extra)) { |
956 | len = min_t(sector_t, |
957 | BB_END(p[prev]) - s, sectors); |
958 | hint = prev; |
959 | goto update_sectors; |
960 | } |
961 | |
962 | len = front_overwrite(bb, prev, bad: &bad, extra); |
963 | added++; |
964 | bb->count += extra; |
965 | |
966 | if (can_combine_front(bb, prev, bad: &bad)) { |
967 | front_combine(bb, prev); |
968 | bb->count--; |
969 | } |
970 | } |
971 | hint = prev; |
972 | goto update_sectors; |
973 | } |
974 | |
975 | if (can_merge_front(bb, prev, bad: &bad)) { |
976 | len = front_merge(bb, prev, bad: &bad); |
977 | added++; |
978 | hint = prev; |
979 | goto update_sectors; |
980 | } |
981 | |
982 | /* if no space in table, still try to merge in the covered range */ |
983 | if (badblocks_full(bb)) { |
984 | /* skip the cannot-merge range */ |
985 | if (((prev + 1) < bb->count) && |
986 | overlap_behind(bb, bad: &bad, behind: prev + 1) && |
987 | ((s + sectors) >= BB_END(p[prev + 1]))) { |
988 | len = BB_END(p[prev + 1]) - s; |
989 | hint = prev + 1; |
990 | goto update_sectors; |
991 | } |
992 | |
993 | /* no retry any more */ |
994 | len = sectors; |
995 | space_desired = 1; |
996 | hint = -1; |
997 | goto update_sectors; |
998 | } |
999 | |
1000 | /* cannot merge and there is space in bad table */ |
1001 | if ((prev + 1) < bb->count && |
1002 | overlap_behind(bb, bad: &bad, behind: prev + 1)) |
1003 | bad.len = min_t(sector_t, |
1004 | bad.len, BB_OFFSET(p[prev + 1]) - bad.start); |
1005 | |
1006 | len = insert_at(bb, at: prev + 1, bad: &bad); |
1007 | bb->count++; |
1008 | added++; |
1009 | hint = prev + 1; |
1010 | |
1011 | update_sectors: |
1012 | s += len; |
1013 | sectors -= len; |
1014 | |
1015 | if (sectors > 0) |
1016 | goto re_insert; |
1017 | |
1018 | WARN_ON(sectors < 0); |
1019 | |
1020 | /* |
1021 | * Check whether the following already set range can be |
1022 | * merged. (prev < 0) condition is not handled here, |
1023 | * because it's already complicated enough. |
1024 | */ |
1025 | if (prev >= 0 && |
1026 | (prev + 1) < bb->count && |
1027 | BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) && |
1028 | (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN && |
1029 | BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) { |
1030 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
1031 | BB_LEN(p[prev]) + BB_LEN(p[prev + 1]), |
1032 | BB_ACK(p[prev])); |
1033 | |
1034 | if ((prev + 2) < bb->count) |
1035 | memmove(p + prev + 1, p + prev + 2, |
1036 | (bb->count - (prev + 2)) * 8); |
1037 | bb->count--; |
1038 | } |
1039 | |
1040 | if (space_desired && !badblocks_full(bb)) { |
1041 | s = orig_start; |
1042 | sectors = orig_len; |
1043 | space_desired = 0; |
1044 | if (retried++ < 3) |
1045 | goto re_insert; |
1046 | } |
1047 | |
1048 | out: |
1049 | if (added) { |
1050 | set_changed(bb); |
1051 | |
1052 | if (!acknowledged) |
1053 | bb->unacked_exist = 1; |
1054 | else |
1055 | badblocks_update_acked(bb); |
1056 | } |
1057 | |
1058 | write_sequnlock_irqrestore(sl: &bb->lock, flags); |
1059 | |
1060 | if (!added) |
1061 | rv = 1; |
1062 | |
1063 | return rv; |
1064 | } |
1065 | |
1066 | /* |
1067 | * Clear the bad block range from bad block table which is front overlapped |
1068 | * with the clearing range. The return value is how many sectors from an |
1069 | * already set bad block range are cleared. If the whole bad block range is |
1070 | * covered by the clearing range and fully cleared, 'delete' is set as 1 for |
1071 | * the caller to reduce bb->count. |
1072 | */ |
1073 | static int front_clear(struct badblocks *bb, int prev, |
1074 | struct badblocks_context *bad, int *deleted) |
1075 | { |
1076 | sector_t sectors = bad->len; |
1077 | sector_t s = bad->start; |
1078 | u64 *p = bb->page; |
1079 | int cleared = 0; |
1080 | |
1081 | *deleted = 0; |
1082 | if (s == BB_OFFSET(p[prev])) { |
1083 | if (BB_LEN(p[prev]) > sectors) { |
1084 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors, |
1085 | BB_LEN(p[prev]) - sectors, |
1086 | BB_ACK(p[prev])); |
1087 | cleared = sectors; |
1088 | } else { |
1089 | /* BB_LEN(p[prev]) <= sectors */ |
1090 | cleared = BB_LEN(p[prev]); |
1091 | if ((prev + 1) < bb->count) |
1092 | memmove(p + prev, p + prev + 1, |
1093 | (bb->count - prev - 1) * 8); |
1094 | *deleted = 1; |
1095 | } |
1096 | } else if (s > BB_OFFSET(p[prev])) { |
1097 | if (BB_END(p[prev]) <= (s + sectors)) { |
1098 | cleared = BB_END(p[prev]) - s; |
1099 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
1100 | s - BB_OFFSET(p[prev]), |
1101 | BB_ACK(p[prev])); |
1102 | } else { |
1103 | /* Splitting is handled in front_splitting_clear() */ |
1104 | BUG(); |
1105 | } |
1106 | } |
1107 | |
1108 | return cleared; |
1109 | } |
1110 | |
1111 | /* |
1112 | * Handle the condition that the clearing range hits middle of an already set |
1113 | * bad block range from bad block table. In this condition the existing bad |
1114 | * block range is split into two after the middle part is cleared. |
1115 | */ |
1116 | static int front_splitting_clear(struct badblocks *bb, int prev, |
1117 | struct badblocks_context *bad) |
1118 | { |
1119 | u64 *p = bb->page; |
1120 | u64 end = BB_END(p[prev]); |
1121 | int ack = BB_ACK(p[prev]); |
1122 | sector_t sectors = bad->len; |
1123 | sector_t s = bad->start; |
1124 | |
1125 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), |
1126 | s - BB_OFFSET(p[prev]), |
1127 | ack); |
1128 | memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8); |
1129 | p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack); |
1130 | return sectors; |
1131 | } |
1132 | |
1133 | /* Do the exact work to clear bad block range from the bad block table */ |
1134 | static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors) |
1135 | { |
1136 | struct badblocks_context bad; |
1137 | int prev = -1, hint = -1; |
1138 | int len = 0, cleared = 0; |
1139 | int rv = 0; |
1140 | u64 *p; |
1141 | |
1142 | if (bb->shift < 0) |
1143 | /* badblocks are disabled */ |
1144 | return 1; |
1145 | |
1146 | if (sectors == 0) |
1147 | /* Invalid sectors number */ |
1148 | return 1; |
1149 | |
1150 | if (bb->shift) { |
1151 | sector_t target; |
1152 | |
1153 | /* When clearing we round the start up and the end down. |
1154 | * This should not matter as the shift should align with |
1155 | * the block size and no rounding should ever be needed. |
1156 | * However it is better the think a block is bad when it |
1157 | * isn't than to think a block is not bad when it is. |
1158 | */ |
1159 | target = s + sectors; |
1160 | roundup(s, bb->shift); |
1161 | rounddown(target, bb->shift); |
1162 | sectors = target - s; |
1163 | } |
1164 | |
1165 | write_seqlock_irq(sl: &bb->lock); |
1166 | |
1167 | bad.ack = true; |
1168 | p = bb->page; |
1169 | |
1170 | re_clear: |
1171 | bad.start = s; |
1172 | bad.len = sectors; |
1173 | |
1174 | if (badblocks_empty(bb)) { |
1175 | len = sectors; |
1176 | cleared++; |
1177 | goto update_sectors; |
1178 | } |
1179 | |
1180 | |
1181 | prev = prev_badblocks(bb, bad: &bad, hint); |
1182 | |
1183 | /* Start before all badblocks */ |
1184 | if (prev < 0) { |
1185 | if (overlap_behind(bb, bad: &bad, behind: 0)) { |
1186 | len = BB_OFFSET(p[0]) - s; |
1187 | hint = 0; |
1188 | } else { |
1189 | len = sectors; |
1190 | } |
1191 | /* |
1192 | * Both situations are to clear non-bad range, |
1193 | * should be treated as successful |
1194 | */ |
1195 | cleared++; |
1196 | goto update_sectors; |
1197 | } |
1198 | |
1199 | /* Start after all badblocks */ |
1200 | if ((prev + 1) >= bb->count && !overlap_front(bb, front: prev, bad: &bad)) { |
1201 | len = sectors; |
1202 | cleared++; |
1203 | goto update_sectors; |
1204 | } |
1205 | |
1206 | /* Clear will split a bad record but the table is full */ |
1207 | if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) && |
1208 | (BB_END(p[prev]) > (bad.start + sectors))) { |
1209 | len = sectors; |
1210 | goto update_sectors; |
1211 | } |
1212 | |
1213 | if (overlap_front(bb, front: prev, bad: &bad)) { |
1214 | if ((BB_OFFSET(p[prev]) < bad.start) && |
1215 | (BB_END(p[prev]) > (bad.start + bad.len))) { |
1216 | /* Splitting */ |
1217 | if ((bb->count + 1) < MAX_BADBLOCKS) { |
1218 | len = front_splitting_clear(bb, prev, bad: &bad); |
1219 | bb->count += 1; |
1220 | cleared++; |
1221 | } else { |
1222 | /* No space to split, give up */ |
1223 | len = sectors; |
1224 | } |
1225 | } else { |
1226 | int deleted = 0; |
1227 | |
1228 | len = front_clear(bb, prev, bad: &bad, deleted: &deleted); |
1229 | bb->count -= deleted; |
1230 | cleared++; |
1231 | hint = prev; |
1232 | } |
1233 | |
1234 | goto update_sectors; |
1235 | } |
1236 | |
1237 | /* Not front overlap, but behind overlap */ |
1238 | if ((prev + 1) < bb->count && overlap_behind(bb, bad: &bad, behind: prev + 1)) { |
1239 | len = BB_OFFSET(p[prev + 1]) - bad.start; |
1240 | hint = prev + 1; |
1241 | /* Clear non-bad range should be treated as successful */ |
1242 | cleared++; |
1243 | goto update_sectors; |
1244 | } |
1245 | |
1246 | /* Not cover any badblocks range in the table */ |
1247 | len = sectors; |
1248 | /* Clear non-bad range should be treated as successful */ |
1249 | cleared++; |
1250 | |
1251 | update_sectors: |
1252 | s += len; |
1253 | sectors -= len; |
1254 | |
1255 | if (sectors > 0) |
1256 | goto re_clear; |
1257 | |
1258 | WARN_ON(sectors < 0); |
1259 | |
1260 | if (cleared) { |
1261 | badblocks_update_acked(bb); |
1262 | set_changed(bb); |
1263 | } |
1264 | |
1265 | write_sequnlock_irq(sl: &bb->lock); |
1266 | |
1267 | if (!cleared) |
1268 | rv = 1; |
1269 | |
1270 | return rv; |
1271 | } |
1272 | |
1273 | /* Do the exact work to check bad blocks range from the bad block table */ |
1274 | static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, |
1275 | sector_t *first_bad, int *bad_sectors) |
1276 | { |
1277 | int unacked_badblocks, acked_badblocks; |
1278 | int prev = -1, hint = -1, set = 0; |
1279 | struct badblocks_context bad; |
1280 | unsigned int seq; |
1281 | int len, rv; |
1282 | u64 *p; |
1283 | |
1284 | WARN_ON(bb->shift < 0 || sectors == 0); |
1285 | |
1286 | if (bb->shift > 0) { |
1287 | sector_t target; |
1288 | |
1289 | /* round the start down, and the end up */ |
1290 | target = s + sectors; |
1291 | rounddown(s, bb->shift); |
1292 | roundup(target, bb->shift); |
1293 | sectors = target - s; |
1294 | } |
1295 | |
1296 | retry: |
1297 | seq = read_seqbegin(sl: &bb->lock); |
1298 | |
1299 | p = bb->page; |
1300 | unacked_badblocks = 0; |
1301 | acked_badblocks = 0; |
1302 | |
1303 | re_check: |
1304 | bad.start = s; |
1305 | bad.len = sectors; |
1306 | |
1307 | if (badblocks_empty(bb)) { |
1308 | len = sectors; |
1309 | goto update_sectors; |
1310 | } |
1311 | |
1312 | prev = prev_badblocks(bb, bad: &bad, hint); |
1313 | |
1314 | /* start after all badblocks */ |
1315 | if ((prev + 1) >= bb->count && !overlap_front(bb, front: prev, bad: &bad)) { |
1316 | len = sectors; |
1317 | goto update_sectors; |
1318 | } |
1319 | |
1320 | if (overlap_front(bb, front: prev, bad: &bad)) { |
1321 | if (BB_ACK(p[prev])) |
1322 | acked_badblocks++; |
1323 | else |
1324 | unacked_badblocks++; |
1325 | |
1326 | if (BB_END(p[prev]) >= (s + sectors)) |
1327 | len = sectors; |
1328 | else |
1329 | len = BB_END(p[prev]) - s; |
1330 | |
1331 | if (set == 0) { |
1332 | *first_bad = BB_OFFSET(p[prev]); |
1333 | *bad_sectors = BB_LEN(p[prev]); |
1334 | set = 1; |
1335 | } |
1336 | goto update_sectors; |
1337 | } |
1338 | |
1339 | /* Not front overlap, but behind overlap */ |
1340 | if ((prev + 1) < bb->count && overlap_behind(bb, bad: &bad, behind: prev + 1)) { |
1341 | len = BB_OFFSET(p[prev + 1]) - bad.start; |
1342 | hint = prev + 1; |
1343 | goto update_sectors; |
1344 | } |
1345 | |
1346 | /* not cover any badblocks range in the table */ |
1347 | len = sectors; |
1348 | |
1349 | update_sectors: |
1350 | s += len; |
1351 | sectors -= len; |
1352 | |
1353 | if (sectors > 0) |
1354 | goto re_check; |
1355 | |
1356 | WARN_ON(sectors < 0); |
1357 | |
1358 | if (unacked_badblocks > 0) |
1359 | rv = -1; |
1360 | else if (acked_badblocks > 0) |
1361 | rv = 1; |
1362 | else |
1363 | rv = 0; |
1364 | |
1365 | if (read_seqretry(sl: &bb->lock, start: seq)) |
1366 | goto retry; |
1367 | |
1368 | return rv; |
1369 | } |
1370 | |
1371 | /** |
1372 | * badblocks_check() - check a given range for bad sectors |
1373 | * @bb: the badblocks structure that holds all badblock information |
1374 | * @s: sector (start) at which to check for badblocks |
1375 | * @sectors: number of sectors to check for badblocks |
1376 | * @first_bad: pointer to store location of the first badblock |
1377 | * @bad_sectors: pointer to store number of badblocks after @first_bad |
1378 | * |
1379 | * We can record which blocks on each device are 'bad' and so just |
1380 | * fail those blocks, or that stripe, rather than the whole device. |
1381 | * Entries in the bad-block table are 64bits wide. This comprises: |
1382 | * Length of bad-range, in sectors: 0-511 for lengths 1-512 |
1383 | * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) |
1384 | * A 'shift' can be set so that larger blocks are tracked and |
1385 | * consequently larger devices can be covered. |
1386 | * 'Acknowledged' flag - 1 bit. - the most significant bit. |
1387 | * |
1388 | * Locking of the bad-block table uses a seqlock so badblocks_check |
1389 | * might need to retry if it is very unlucky. |
1390 | * We will sometimes want to check for bad blocks in a bi_end_io function, |
1391 | * so we use the write_seqlock_irq variant. |
1392 | * |
1393 | * When looking for a bad block we specify a range and want to |
1394 | * know if any block in the range is bad. So we binary-search |
1395 | * to the last range that starts at-or-before the given endpoint, |
1396 | * (or "before the sector after the target range") |
1397 | * then see if it ends after the given start. |
1398 | * |
1399 | * Return: |
1400 | * 0: there are no known bad blocks in the range |
1401 | * 1: there are known bad block which are all acknowledged |
1402 | * -1: there are bad blocks which have not yet been acknowledged in metadata. |
1403 | * plus the start/length of the first bad section we overlap. |
1404 | */ |
1405 | int badblocks_check(struct badblocks *bb, sector_t s, int sectors, |
1406 | sector_t *first_bad, int *bad_sectors) |
1407 | { |
1408 | return _badblocks_check(bb, s, sectors, first_bad, bad_sectors); |
1409 | } |
1410 | EXPORT_SYMBOL_GPL(badblocks_check); |
1411 | |
1412 | /** |
1413 | * badblocks_set() - Add a range of bad blocks to the table. |
1414 | * @bb: the badblocks structure that holds all badblock information |
1415 | * @s: first sector to mark as bad |
1416 | * @sectors: number of sectors to mark as bad |
1417 | * @acknowledged: weather to mark the bad sectors as acknowledged |
1418 | * |
1419 | * This might extend the table, or might contract it if two adjacent ranges |
1420 | * can be merged. We binary-search to find the 'insertion' point, then |
1421 | * decide how best to handle it. |
1422 | * |
1423 | * Return: |
1424 | * 0: success |
1425 | * 1: failed to set badblocks (out of space) |
1426 | */ |
1427 | int badblocks_set(struct badblocks *bb, sector_t s, int sectors, |
1428 | int acknowledged) |
1429 | { |
1430 | return _badblocks_set(bb, s, sectors, acknowledged); |
1431 | } |
1432 | EXPORT_SYMBOL_GPL(badblocks_set); |
1433 | |
1434 | /** |
1435 | * badblocks_clear() - Remove a range of bad blocks to the table. |
1436 | * @bb: the badblocks structure that holds all badblock information |
1437 | * @s: first sector to mark as bad |
1438 | * @sectors: number of sectors to mark as bad |
1439 | * |
1440 | * This may involve extending the table if we spilt a region, |
1441 | * but it must not fail. So if the table becomes full, we just |
1442 | * drop the remove request. |
1443 | * |
1444 | * Return: |
1445 | * 0: success |
1446 | * 1: failed to clear badblocks |
1447 | */ |
1448 | int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) |
1449 | { |
1450 | return _badblocks_clear(bb, s, sectors); |
1451 | } |
1452 | EXPORT_SYMBOL_GPL(badblocks_clear); |
1453 | |
1454 | /** |
1455 | * ack_all_badblocks() - Acknowledge all bad blocks in a list. |
1456 | * @bb: the badblocks structure that holds all badblock information |
1457 | * |
1458 | * This only succeeds if ->changed is clear. It is used by |
1459 | * in-kernel metadata updates |
1460 | */ |
1461 | void ack_all_badblocks(struct badblocks *bb) |
1462 | { |
1463 | if (bb->page == NULL || bb->changed) |
1464 | /* no point even trying */ |
1465 | return; |
1466 | write_seqlock_irq(sl: &bb->lock); |
1467 | |
1468 | if (bb->changed == 0 && bb->unacked_exist) { |
1469 | u64 *p = bb->page; |
1470 | int i; |
1471 | |
1472 | for (i = 0; i < bb->count ; i++) { |
1473 | if (!BB_ACK(p[i])) { |
1474 | sector_t start = BB_OFFSET(p[i]); |
1475 | int len = BB_LEN(p[i]); |
1476 | |
1477 | p[i] = BB_MAKE(start, len, 1); |
1478 | } |
1479 | } |
1480 | bb->unacked_exist = 0; |
1481 | } |
1482 | write_sequnlock_irq(sl: &bb->lock); |
1483 | } |
1484 | EXPORT_SYMBOL_GPL(ack_all_badblocks); |
1485 | |
1486 | /** |
1487 | * badblocks_show() - sysfs access to bad-blocks list |
1488 | * @bb: the badblocks structure that holds all badblock information |
1489 | * @page: buffer received from sysfs |
1490 | * @unack: weather to show unacknowledged badblocks |
1491 | * |
1492 | * Return: |
1493 | * Length of returned data |
1494 | */ |
1495 | ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) |
1496 | { |
1497 | size_t len; |
1498 | int i; |
1499 | u64 *p = bb->page; |
1500 | unsigned seq; |
1501 | |
1502 | if (bb->shift < 0) |
1503 | return 0; |
1504 | |
1505 | retry: |
1506 | seq = read_seqbegin(sl: &bb->lock); |
1507 | |
1508 | len = 0; |
1509 | i = 0; |
1510 | |
1511 | while (len < PAGE_SIZE && i < bb->count) { |
1512 | sector_t s = BB_OFFSET(p[i]); |
1513 | unsigned int length = BB_LEN(p[i]); |
1514 | int ack = BB_ACK(p[i]); |
1515 | |
1516 | i++; |
1517 | |
1518 | if (unack && ack) |
1519 | continue; |
1520 | |
1521 | len += snprintf(buf: page+len, PAGE_SIZE-len, fmt: "%llu %u\n" , |
1522 | (unsigned long long)s << bb->shift, |
1523 | length << bb->shift); |
1524 | } |
1525 | if (unack && len == 0) |
1526 | bb->unacked_exist = 0; |
1527 | |
1528 | if (read_seqretry(sl: &bb->lock, start: seq)) |
1529 | goto retry; |
1530 | |
1531 | return len; |
1532 | } |
1533 | EXPORT_SYMBOL_GPL(badblocks_show); |
1534 | |
1535 | /** |
1536 | * badblocks_store() - sysfs access to bad-blocks list |
1537 | * @bb: the badblocks structure that holds all badblock information |
1538 | * @page: buffer received from sysfs |
1539 | * @len: length of data received from sysfs |
1540 | * @unack: weather to show unacknowledged badblocks |
1541 | * |
1542 | * Return: |
1543 | * Length of the buffer processed or -ve error. |
1544 | */ |
1545 | ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, |
1546 | int unack) |
1547 | { |
1548 | unsigned long long sector; |
1549 | int length; |
1550 | char newline; |
1551 | |
1552 | switch (sscanf(page, "%llu %d%c" , §or, &length, &newline)) { |
1553 | case 3: |
1554 | if (newline != '\n') |
1555 | return -EINVAL; |
1556 | fallthrough; |
1557 | case 2: |
1558 | if (length <= 0) |
1559 | return -EINVAL; |
1560 | break; |
1561 | default: |
1562 | return -EINVAL; |
1563 | } |
1564 | |
1565 | if (badblocks_set(bb, sector, length, !unack)) |
1566 | return -ENOSPC; |
1567 | else |
1568 | return len; |
1569 | } |
1570 | EXPORT_SYMBOL_GPL(badblocks_store); |
1571 | |
1572 | static int __badblocks_init(struct device *dev, struct badblocks *bb, |
1573 | int enable) |
1574 | { |
1575 | bb->dev = dev; |
1576 | bb->count = 0; |
1577 | if (enable) |
1578 | bb->shift = 0; |
1579 | else |
1580 | bb->shift = -1; |
1581 | if (dev) |
1582 | bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); |
1583 | else |
1584 | bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
1585 | if (!bb->page) { |
1586 | bb->shift = -1; |
1587 | return -ENOMEM; |
1588 | } |
1589 | seqlock_init(&bb->lock); |
1590 | |
1591 | return 0; |
1592 | } |
1593 | |
1594 | /** |
1595 | * badblocks_init() - initialize the badblocks structure |
1596 | * @bb: the badblocks structure that holds all badblock information |
1597 | * @enable: weather to enable badblocks accounting |
1598 | * |
1599 | * Return: |
1600 | * 0: success |
1601 | * -ve errno: on error |
1602 | */ |
1603 | int badblocks_init(struct badblocks *bb, int enable) |
1604 | { |
1605 | return __badblocks_init(NULL, bb, enable); |
1606 | } |
1607 | EXPORT_SYMBOL_GPL(badblocks_init); |
1608 | |
1609 | int devm_init_badblocks(struct device *dev, struct badblocks *bb) |
1610 | { |
1611 | if (!bb) |
1612 | return -EINVAL; |
1613 | return __badblocks_init(dev, bb, enable: 1); |
1614 | } |
1615 | EXPORT_SYMBOL_GPL(devm_init_badblocks); |
1616 | |
1617 | /** |
1618 | * badblocks_exit() - free the badblocks structure |
1619 | * @bb: the badblocks structure that holds all badblock information |
1620 | */ |
1621 | void badblocks_exit(struct badblocks *bb) |
1622 | { |
1623 | if (!bb) |
1624 | return; |
1625 | if (bb->dev) |
1626 | devm_kfree(dev: bb->dev, p: bb->page); |
1627 | else |
1628 | kfree(objp: bb->page); |
1629 | bb->page = NULL; |
1630 | } |
1631 | EXPORT_SYMBOL_GPL(badblocks_exit); |
1632 | |