1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Moving/copying garbage collector |
4 | * |
5 | * Copyright 2012 Google, Inc. |
6 | */ |
7 | |
8 | #include "bcache.h" |
9 | #include "btree.h" |
10 | #include "debug.h" |
11 | #include "request.h" |
12 | |
13 | #include <trace/events/bcache.h> |
14 | |
15 | struct moving_io { |
16 | struct closure cl; |
17 | struct keybuf_key *w; |
18 | struct data_insert_op op; |
19 | struct bbio bio; |
20 | }; |
21 | |
22 | static bool moving_pred(struct keybuf *buf, struct bkey *k) |
23 | { |
24 | struct cache_set *c = container_of(buf, struct cache_set, |
25 | moving_gc_keys); |
26 | unsigned int i; |
27 | |
28 | for (i = 0; i < KEY_PTRS(k); i++) |
29 | if (ptr_available(c, k, i) && |
30 | GC_MOVE(k: PTR_BUCKET(c, k, ptr: i))) |
31 | return true; |
32 | |
33 | return false; |
34 | } |
35 | |
36 | /* Moving GC - IO loop */ |
37 | |
38 | static void moving_io_destructor(struct closure *cl) |
39 | { |
40 | struct moving_io *io = container_of(cl, struct moving_io, cl); |
41 | |
42 | kfree(objp: io); |
43 | } |
44 | |
45 | static void write_moving_finish(struct closure *cl) |
46 | { |
47 | struct moving_io *io = container_of(cl, struct moving_io, cl); |
48 | struct bio *bio = &io->bio.bio; |
49 | |
50 | bio_free_pages(bio); |
51 | |
52 | if (io->op.replace_collision) |
53 | trace_bcache_gc_copy_collision(k: &io->w->key); |
54 | |
55 | bch_keybuf_del(buf: &io->op.c->moving_gc_keys, w: io->w); |
56 | |
57 | up(sem: &io->op.c->moving_in_flight); |
58 | |
59 | closure_return_with_destructor(cl, moving_io_destructor); |
60 | } |
61 | |
62 | static void read_moving_endio(struct bio *bio) |
63 | { |
64 | struct bbio *b = container_of(bio, struct bbio, bio); |
65 | struct moving_io *io = container_of(bio->bi_private, |
66 | struct moving_io, cl); |
67 | |
68 | if (bio->bi_status) |
69 | io->op.status = bio->bi_status; |
70 | else if (!KEY_DIRTY(k: &b->key) && |
71 | ptr_stale(c: io->op.c, k: &b->key, i: 0)) { |
72 | io->op.status = BLK_STS_IOERR; |
73 | } |
74 | |
75 | bch_bbio_endio(c: io->op.c, bio, error: bio->bi_status, m: "reading data to move" ); |
76 | } |
77 | |
78 | static void moving_init(struct moving_io *io) |
79 | { |
80 | struct bio *bio = &io->bio.bio; |
81 | |
82 | bio_init(bio, NULL, table: bio->bi_inline_vecs, |
83 | DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), opf: 0); |
84 | bio_get(bio); |
85 | bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); |
86 | |
87 | bio->bi_iter.bi_size = KEY_SIZE(k: &io->w->key) << 9; |
88 | bio->bi_private = &io->cl; |
89 | bch_bio_map(bio, NULL); |
90 | } |
91 | |
92 | static void write_moving(struct closure *cl) |
93 | { |
94 | struct moving_io *io = container_of(cl, struct moving_io, cl); |
95 | struct data_insert_op *op = &io->op; |
96 | |
97 | if (!op->status) { |
98 | moving_init(io); |
99 | |
100 | io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key); |
101 | op->write_prio = 1; |
102 | op->bio = &io->bio.bio; |
103 | |
104 | op->writeback = KEY_DIRTY(k: &io->w->key); |
105 | op->csum = KEY_CSUM(k: &io->w->key); |
106 | |
107 | bkey_copy(&op->replace_key, &io->w->key); |
108 | op->replace = true; |
109 | |
110 | closure_call(cl: &op->cl, fn: bch_data_insert, NULL, parent: cl); |
111 | } |
112 | |
113 | continue_at(cl, write_moving_finish, op->wq); |
114 | } |
115 | |
116 | static void read_moving_submit(struct closure *cl) |
117 | { |
118 | struct moving_io *io = container_of(cl, struct moving_io, cl); |
119 | struct bio *bio = &io->bio.bio; |
120 | |
121 | bch_submit_bbio(bio, c: io->op.c, k: &io->w->key, ptr: 0); |
122 | |
123 | continue_at(cl, write_moving, io->op.wq); |
124 | } |
125 | |
126 | static void read_moving(struct cache_set *c) |
127 | { |
128 | struct keybuf_key *w; |
129 | struct moving_io *io; |
130 | struct bio *bio; |
131 | struct closure cl; |
132 | |
133 | closure_init_stack(cl: &cl); |
134 | |
135 | /* XXX: if we error, background writeback could stall indefinitely */ |
136 | |
137 | while (!test_bit(CACHE_SET_STOPPING, &c->flags)) { |
138 | w = bch_keybuf_next_rescan(c, buf: &c->moving_gc_keys, |
139 | end: &MAX_KEY, pred: moving_pred); |
140 | if (!w) |
141 | break; |
142 | |
143 | if (ptr_stale(c, k: &w->key, i: 0)) { |
144 | bch_keybuf_del(buf: &c->moving_gc_keys, w); |
145 | continue; |
146 | } |
147 | |
148 | io = kzalloc(struct_size(io, bio.bio.bi_inline_vecs, |
149 | DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)), |
150 | GFP_KERNEL); |
151 | if (!io) |
152 | goto err; |
153 | |
154 | w->private = io; |
155 | io->w = w; |
156 | io->op.inode = KEY_INODE(k: &w->key); |
157 | io->op.c = c; |
158 | io->op.wq = c->moving_gc_wq; |
159 | |
160 | moving_init(io); |
161 | bio = &io->bio.bio; |
162 | |
163 | bio->bi_opf = REQ_OP_READ; |
164 | bio->bi_end_io = read_moving_endio; |
165 | |
166 | if (bch_bio_alloc_pages(bio, GFP_KERNEL)) |
167 | goto err; |
168 | |
169 | trace_bcache_gc_copy(k: &w->key); |
170 | |
171 | down(sem: &c->moving_in_flight); |
172 | closure_call(cl: &io->cl, fn: read_moving_submit, NULL, parent: &cl); |
173 | } |
174 | |
175 | if (0) { |
176 | err: if (!IS_ERR_OR_NULL(ptr: w->private)) |
177 | kfree(objp: w->private); |
178 | |
179 | bch_keybuf_del(buf: &c->moving_gc_keys, w); |
180 | } |
181 | |
182 | closure_sync(cl: &cl); |
183 | } |
184 | |
185 | static bool bucket_cmp(struct bucket *l, struct bucket *r) |
186 | { |
187 | return GC_SECTORS_USED(k: l) < GC_SECTORS_USED(k: r); |
188 | } |
189 | |
190 | static unsigned int bucket_heap_top(struct cache *ca) |
191 | { |
192 | struct bucket *b; |
193 | |
194 | return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(k: b) : 0; |
195 | } |
196 | |
197 | void bch_moving_gc(struct cache_set *c) |
198 | { |
199 | struct cache *ca = c->cache; |
200 | struct bucket *b; |
201 | unsigned long sectors_to_move, reserve_sectors; |
202 | |
203 | if (!c->copy_gc_enabled) |
204 | return; |
205 | |
206 | mutex_lock(&c->bucket_lock); |
207 | |
208 | sectors_to_move = 0; |
209 | reserve_sectors = ca->sb.bucket_size * |
210 | fifo_used(&ca->free[RESERVE_MOVINGGC]); |
211 | |
212 | ca->heap.used = 0; |
213 | |
214 | for_each_bucket(b, ca) { |
215 | if (GC_MARK(k: b) == GC_MARK_METADATA || |
216 | !GC_SECTORS_USED(k: b) || |
217 | GC_SECTORS_USED(k: b) == ca->sb.bucket_size || |
218 | atomic_read(v: &b->pin)) |
219 | continue; |
220 | |
221 | if (!heap_full(&ca->heap)) { |
222 | sectors_to_move += GC_SECTORS_USED(k: b); |
223 | heap_add(&ca->heap, b, bucket_cmp); |
224 | } else if (bucket_cmp(l: b, heap_peek(&ca->heap))) { |
225 | sectors_to_move -= bucket_heap_top(ca); |
226 | sectors_to_move += GC_SECTORS_USED(k: b); |
227 | |
228 | ca->heap.data[0] = b; |
229 | heap_sift(&ca->heap, 0, bucket_cmp); |
230 | } |
231 | } |
232 | |
233 | while (sectors_to_move > reserve_sectors) { |
234 | heap_pop(&ca->heap, b, bucket_cmp); |
235 | sectors_to_move -= GC_SECTORS_USED(k: b); |
236 | } |
237 | |
238 | while (heap_pop(&ca->heap, b, bucket_cmp)) |
239 | SET_GC_MOVE(k: b, v: 1); |
240 | |
241 | mutex_unlock(lock: &c->bucket_lock); |
242 | |
243 | c->moving_gc_keys.last_scanned = ZERO_KEY; |
244 | |
245 | read_moving(c); |
246 | } |
247 | |
248 | void bch_moving_init_cache_set(struct cache_set *c) |
249 | { |
250 | bch_keybuf_init(buf: &c->moving_gc_keys); |
251 | sema_init(sem: &c->moving_in_flight, val: 64); |
252 | } |
253 | |