1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Some low level IO code, and hacks for various block layer limitations |
4 | * |
5 | * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> |
6 | * Copyright 2012 Google, Inc. |
7 | */ |
8 | |
9 | #include "bcache.h" |
10 | #include "bset.h" |
11 | #include "debug.h" |
12 | |
13 | #include <linux/blkdev.h> |
14 | |
15 | /* Bios with headers */ |
16 | |
17 | void bch_bbio_free(struct bio *bio, struct cache_set *c) |
18 | { |
19 | struct bbio *b = container_of(bio, struct bbio, bio); |
20 | |
21 | mempool_free(element: b, pool: &c->bio_meta); |
22 | } |
23 | |
24 | struct bio *bch_bbio_alloc(struct cache_set *c) |
25 | { |
26 | struct bbio *b = mempool_alloc(pool: &c->bio_meta, GFP_NOIO); |
27 | struct bio *bio = &b->bio; |
28 | |
29 | bio_init(bio, NULL, table: bio->bi_inline_vecs, |
30 | max_vecs: meta_bucket_pages(sb: &c->cache->sb), opf: 0); |
31 | |
32 | return bio; |
33 | } |
34 | |
35 | void __bch_submit_bbio(struct bio *bio, struct cache_set *c) |
36 | { |
37 | struct bbio *b = container_of(bio, struct bbio, bio); |
38 | |
39 | bio->bi_iter.bi_sector = PTR_OFFSET(k: &b->key, i: 0); |
40 | bio_set_dev(bio, bdev: c->cache->bdev); |
41 | |
42 | b->submit_time_us = local_clock_us(); |
43 | closure_bio_submit(c, bio, cl: bio->bi_private); |
44 | } |
45 | |
46 | void bch_submit_bbio(struct bio *bio, struct cache_set *c, |
47 | struct bkey *k, unsigned int ptr) |
48 | { |
49 | struct bbio *b = container_of(bio, struct bbio, bio); |
50 | |
51 | bch_bkey_copy_single_ptr(dest: &b->key, src: k, i: ptr); |
52 | __bch_submit_bbio(bio, c); |
53 | } |
54 | |
55 | /* IO errors */ |
56 | void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) |
57 | { |
58 | unsigned int errors; |
59 | |
60 | WARN_ONCE(!dc, "NULL pointer of struct cached_dev" ); |
61 | |
62 | /* |
63 | * Read-ahead requests on a degrading and recovering md raid |
64 | * (e.g. raid6) device might be failured immediately by md |
65 | * raid code, which is not a real hardware media failure. So |
66 | * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. |
67 | */ |
68 | if (bio->bi_opf & REQ_RAHEAD) { |
69 | pr_warn_ratelimited("%pg: Read-ahead I/O failed on backing device, ignore\n" , |
70 | dc->bdev); |
71 | return; |
72 | } |
73 | |
74 | errors = atomic_add_return(i: 1, v: &dc->io_errors); |
75 | if (errors < dc->error_limit) |
76 | pr_err("%pg: IO error on backing device, unrecoverable\n" , |
77 | dc->bdev); |
78 | else |
79 | bch_cached_dev_error(dc); |
80 | } |
81 | |
82 | void bch_count_io_errors(struct cache *ca, |
83 | blk_status_t error, |
84 | int is_read, |
85 | const char *m) |
86 | { |
87 | /* |
88 | * The halflife of an error is: |
89 | * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh |
90 | */ |
91 | |
92 | if (ca->set->error_decay) { |
93 | unsigned int count = atomic_inc_return(v: &ca->io_count); |
94 | |
95 | while (count > ca->set->error_decay) { |
96 | unsigned int errors; |
97 | unsigned int old = count; |
98 | unsigned int new = count - ca->set->error_decay; |
99 | |
100 | /* |
101 | * First we subtract refresh from count; each time we |
102 | * successfully do so, we rescale the errors once: |
103 | */ |
104 | |
105 | count = atomic_cmpxchg(v: &ca->io_count, old, new); |
106 | |
107 | if (count == old) { |
108 | count = new; |
109 | |
110 | errors = atomic_read(v: &ca->io_errors); |
111 | do { |
112 | old = errors; |
113 | new = ((uint64_t) errors * 127) / 128; |
114 | errors = atomic_cmpxchg(v: &ca->io_errors, |
115 | old, new); |
116 | } while (old != errors); |
117 | } |
118 | } |
119 | } |
120 | |
121 | if (error) { |
122 | unsigned int errors = atomic_add_return(i: 1 << IO_ERROR_SHIFT, |
123 | v: &ca->io_errors); |
124 | errors >>= IO_ERROR_SHIFT; |
125 | |
126 | if (errors < ca->set->error_limit) |
127 | pr_err("%pg: IO error on %s%s\n" , |
128 | ca->bdev, m, |
129 | is_read ? ", recovering." : "." ); |
130 | else |
131 | bch_cache_set_error(c: ca->set, |
132 | fmt: "%pg: too many IO errors %s\n" , |
133 | ca->bdev, m); |
134 | } |
135 | } |
136 | |
137 | void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, |
138 | blk_status_t error, const char *m) |
139 | { |
140 | struct bbio *b = container_of(bio, struct bbio, bio); |
141 | struct cache *ca = c->cache; |
142 | int is_read = (bio_data_dir(bio) == READ ? 1 : 0); |
143 | |
144 | unsigned int threshold = op_is_write(op: bio_op(bio)) |
145 | ? c->congested_write_threshold_us |
146 | : c->congested_read_threshold_us; |
147 | |
148 | if (threshold) { |
149 | unsigned int t = local_clock_us(); |
150 | int us = t - b->submit_time_us; |
151 | int congested = atomic_read(v: &c->congested); |
152 | |
153 | if (us > (int) threshold) { |
154 | int ms = us / 1024; |
155 | |
156 | c->congested_last_us = t; |
157 | |
158 | ms = min(ms, CONGESTED_MAX + congested); |
159 | atomic_sub(i: ms, v: &c->congested); |
160 | } else if (congested < 0) |
161 | atomic_inc(v: &c->congested); |
162 | } |
163 | |
164 | bch_count_io_errors(ca, error, is_read, m); |
165 | } |
166 | |
167 | void bch_bbio_endio(struct cache_set *c, struct bio *bio, |
168 | blk_status_t error, const char *m) |
169 | { |
170 | struct closure *cl = bio->bi_private; |
171 | |
172 | bch_bbio_count_io_errors(c, bio, error, m); |
173 | bio_put(bio); |
174 | closure_put(cl); |
175 | } |
176 | |