1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | drbd_worker.c |
4 | |
5 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. |
6 | |
7 | Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. |
8 | Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. |
9 | Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. |
10 | |
11 | |
12 | */ |
13 | |
14 | #include <linux/module.h> |
15 | #include <linux/drbd.h> |
16 | #include <linux/sched/signal.h> |
17 | #include <linux/wait.h> |
18 | #include <linux/mm.h> |
19 | #include <linux/memcontrol.h> |
20 | #include <linux/mm_inline.h> |
21 | #include <linux/slab.h> |
22 | #include <linux/random.h> |
23 | #include <linux/string.h> |
24 | #include <linux/scatterlist.h> |
25 | #include <linux/part_stat.h> |
26 | |
27 | #include "drbd_int.h" |
28 | #include "drbd_protocol.h" |
29 | #include "drbd_req.h" |
30 | |
31 | static int make_ov_request(struct drbd_peer_device *, int); |
32 | static int make_resync_request(struct drbd_peer_device *, int); |
33 | |
34 | /* endio handlers: |
35 | * drbd_md_endio (defined here) |
36 | * drbd_request_endio (defined here) |
37 | * drbd_peer_request_endio (defined here) |
38 | * drbd_bm_endio (defined in drbd_bitmap.c) |
39 | * |
40 | * For all these callbacks, note the following: |
41 | * The callbacks will be called in irq context by the IDE drivers, |
42 | * and in Softirqs/Tasklets/BH context by the SCSI drivers. |
43 | * Try to get the locking right :) |
44 | * |
45 | */ |
46 | |
47 | /* used for synchronous meta data and bitmap IO |
48 | * submitted by drbd_md_sync_page_io() |
49 | */ |
50 | void drbd_md_endio(struct bio *bio) |
51 | { |
52 | struct drbd_device *device; |
53 | |
54 | device = bio->bi_private; |
55 | device->md_io.error = blk_status_to_errno(status: bio->bi_status); |
56 | |
57 | /* special case: drbd_md_read() during drbd_adm_attach() */ |
58 | if (device->ldev) |
59 | put_ldev(device); |
60 | bio_put(bio); |
61 | |
62 | /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able |
63 | * to timeout on the lower level device, and eventually detach from it. |
64 | * If this io completion runs after that timeout expired, this |
65 | * drbd_md_put_buffer() may allow us to finally try and re-attach. |
66 | * During normal operation, this only puts that extra reference |
67 | * down to 1 again. |
68 | * Make sure we first drop the reference, and only then signal |
69 | * completion, or we may (in drbd_al_read_log()) cycle so fast into the |
70 | * next drbd_md_sync_page_io(), that we trigger the |
71 | * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. |
72 | */ |
73 | drbd_md_put_buffer(device); |
74 | device->md_io.done = 1; |
75 | wake_up(&device->misc_wait); |
76 | } |
77 | |
78 | /* reads on behalf of the partner, |
79 | * "submitted" by the receiver |
80 | */ |
81 | static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) |
82 | { |
83 | unsigned long flags = 0; |
84 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
85 | struct drbd_device *device = peer_device->device; |
86 | |
87 | spin_lock_irqsave(&device->resource->req_lock, flags); |
88 | device->read_cnt += peer_req->i.size >> 9; |
89 | list_del(entry: &peer_req->w.list); |
90 | if (list_empty(head: &device->read_ee)) |
91 | wake_up(&device->ee_wait); |
92 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) |
93 | __drbd_chk_io_error(device, DRBD_READ_ERROR); |
94 | spin_unlock_irqrestore(lock: &device->resource->req_lock, flags); |
95 | |
96 | drbd_queue_work(q: &peer_device->connection->sender_work, w: &peer_req->w); |
97 | put_ldev(device); |
98 | } |
99 | |
100 | /* writes on behalf of the partner, or resync writes, |
101 | * "submitted" by the receiver, final stage. */ |
102 | void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) |
103 | { |
104 | unsigned long flags = 0; |
105 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
106 | struct drbd_device *device = peer_device->device; |
107 | struct drbd_connection *connection = peer_device->connection; |
108 | struct drbd_interval i; |
109 | int do_wake; |
110 | u64 block_id; |
111 | int do_al_complete_io; |
112 | |
113 | /* after we moved peer_req to done_ee, |
114 | * we may no longer access it, |
115 | * it may be freed/reused already! |
116 | * (as soon as we release the req_lock) */ |
117 | i = peer_req->i; |
118 | do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; |
119 | block_id = peer_req->block_id; |
120 | peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; |
121 | |
122 | if (peer_req->flags & EE_WAS_ERROR) { |
123 | /* In protocol != C, we usually do not send write acks. |
124 | * In case of a write error, send the neg ack anyways. */ |
125 | if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) |
126 | inc_unacked(device); |
127 | drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); |
128 | } |
129 | |
130 | spin_lock_irqsave(&device->resource->req_lock, flags); |
131 | device->writ_cnt += peer_req->i.size >> 9; |
132 | list_move_tail(list: &peer_req->w.list, head: &device->done_ee); |
133 | |
134 | /* |
135 | * Do not remove from the write_requests tree here: we did not send the |
136 | * Ack yet and did not wake possibly waiting conflicting requests. |
137 | * Removed from the tree from "drbd_process_done_ee" within the |
138 | * appropriate dw.cb (e_end_block/e_end_resync_block) or from |
139 | * _drbd_clear_done_ee. |
140 | */ |
141 | |
142 | do_wake = list_empty(head: block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); |
143 | |
144 | /* FIXME do we want to detach for failed REQ_OP_DISCARD? |
145 | * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */ |
146 | if (peer_req->flags & EE_WAS_ERROR) |
147 | __drbd_chk_io_error(device, DRBD_WRITE_ERROR); |
148 | |
149 | if (connection->cstate >= C_WF_REPORT_PARAMS) { |
150 | kref_get(kref: &device->kref); /* put is in drbd_send_acks_wf() */ |
151 | if (!queue_work(wq: connection->ack_sender, work: &peer_device->send_acks_work)) |
152 | kref_put(kref: &device->kref, release: drbd_destroy_device); |
153 | } |
154 | spin_unlock_irqrestore(lock: &device->resource->req_lock, flags); |
155 | |
156 | if (block_id == ID_SYNCER) |
157 | drbd_rs_complete_io(device, sector: i.sector); |
158 | |
159 | if (do_wake) |
160 | wake_up(&device->ee_wait); |
161 | |
162 | if (do_al_complete_io) |
163 | drbd_al_complete_io(device, i: &i); |
164 | |
165 | put_ldev(device); |
166 | } |
167 | |
168 | /* writes on behalf of the partner, or resync writes, |
169 | * "submitted" by the receiver. |
170 | */ |
171 | void drbd_peer_request_endio(struct bio *bio) |
172 | { |
173 | struct drbd_peer_request *peer_req = bio->bi_private; |
174 | struct drbd_device *device = peer_req->peer_device->device; |
175 | bool is_write = bio_data_dir(bio) == WRITE; |
176 | bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || |
177 | bio_op(bio) == REQ_OP_DISCARD; |
178 | |
179 | if (bio->bi_status && drbd_ratelimit()) |
180 | drbd_warn(device, "%s: error=%d s=%llus\n" , |
181 | is_write ? (is_discard ? "discard" : "write" ) |
182 | : "read" , bio->bi_status, |
183 | (unsigned long long)peer_req->i.sector); |
184 | |
185 | if (bio->bi_status) |
186 | set_bit(nr: __EE_WAS_ERROR, addr: &peer_req->flags); |
187 | |
188 | bio_put(bio); /* no need for the bio anymore */ |
189 | if (atomic_dec_and_test(v: &peer_req->pending_bios)) { |
190 | if (is_write) |
191 | drbd_endio_write_sec_final(peer_req); |
192 | else |
193 | drbd_endio_read_sec_final(peer_req); |
194 | } |
195 | } |
196 | |
197 | static void |
198 | drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) |
199 | { |
200 | panic(fmt: "drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n" , |
201 | device->minor, device->resource->name, device->vnr); |
202 | } |
203 | |
204 | /* read, readA or write requests on R_PRIMARY coming from drbd_make_request |
205 | */ |
206 | void drbd_request_endio(struct bio *bio) |
207 | { |
208 | unsigned long flags; |
209 | struct drbd_request *req = bio->bi_private; |
210 | struct drbd_device *device = req->device; |
211 | struct bio_and_error m; |
212 | enum drbd_req_event what; |
213 | |
214 | /* If this request was aborted locally before, |
215 | * but now was completed "successfully", |
216 | * chances are that this caused arbitrary data corruption. |
217 | * |
218 | * "aborting" requests, or force-detaching the disk, is intended for |
219 | * completely blocked/hung local backing devices which do no longer |
220 | * complete requests at all, not even do error completions. In this |
221 | * situation, usually a hard-reset and failover is the only way out. |
222 | * |
223 | * By "aborting", basically faking a local error-completion, |
224 | * we allow for a more graceful swichover by cleanly migrating services. |
225 | * Still the affected node has to be rebooted "soon". |
226 | * |
227 | * By completing these requests, we allow the upper layers to re-use |
228 | * the associated data pages. |
229 | * |
230 | * If later the local backing device "recovers", and now DMAs some data |
231 | * from disk into the original request pages, in the best case it will |
232 | * just put random data into unused pages; but typically it will corrupt |
233 | * meanwhile completely unrelated data, causing all sorts of damage. |
234 | * |
235 | * Which means delayed successful completion, |
236 | * especially for READ requests, |
237 | * is a reason to panic(). |
238 | * |
239 | * We assume that a delayed *error* completion is OK, |
240 | * though we still will complain noisily about it. |
241 | */ |
242 | if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) { |
243 | if (drbd_ratelimit()) |
244 | drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n" ); |
245 | |
246 | if (!bio->bi_status) |
247 | drbd_panic_after_delayed_completion_of_aborted_request(device); |
248 | } |
249 | |
250 | /* to avoid recursion in __req_mod */ |
251 | if (unlikely(bio->bi_status)) { |
252 | switch (bio_op(bio)) { |
253 | case REQ_OP_WRITE_ZEROES: |
254 | case REQ_OP_DISCARD: |
255 | if (bio->bi_status == BLK_STS_NOTSUPP) |
256 | what = DISCARD_COMPLETED_NOTSUPP; |
257 | else |
258 | what = DISCARD_COMPLETED_WITH_ERROR; |
259 | break; |
260 | case REQ_OP_READ: |
261 | if (bio->bi_opf & REQ_RAHEAD) |
262 | what = READ_AHEAD_COMPLETED_WITH_ERROR; |
263 | else |
264 | what = READ_COMPLETED_WITH_ERROR; |
265 | break; |
266 | default: |
267 | what = WRITE_COMPLETED_WITH_ERROR; |
268 | break; |
269 | } |
270 | } else { |
271 | what = COMPLETED_OK; |
272 | } |
273 | |
274 | req->private_bio = ERR_PTR(error: blk_status_to_errno(status: bio->bi_status)); |
275 | bio_put(bio); |
276 | |
277 | /* not req_mod(), we need irqsave here! */ |
278 | spin_lock_irqsave(&device->resource->req_lock, flags); |
279 | __req_mod(req, what, NULL, m: &m); |
280 | spin_unlock_irqrestore(lock: &device->resource->req_lock, flags); |
281 | put_ldev(device); |
282 | |
283 | if (m.bio) |
284 | complete_master_bio(device, m: &m); |
285 | } |
286 | |
287 | void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest) |
288 | { |
289 | SHASH_DESC_ON_STACK(desc, tfm); |
290 | struct page *page = peer_req->pages; |
291 | struct page *tmp; |
292 | unsigned len; |
293 | void *src; |
294 | |
295 | desc->tfm = tfm; |
296 | |
297 | crypto_shash_init(desc); |
298 | |
299 | src = kmap_atomic(page); |
300 | while ((tmp = page_chain_next(page))) { |
301 | /* all but the last page will be fully used */ |
302 | crypto_shash_update(desc, data: src, PAGE_SIZE); |
303 | kunmap_atomic(src); |
304 | page = tmp; |
305 | src = kmap_atomic(page); |
306 | } |
307 | /* and now the last, possibly only partially used page */ |
308 | len = peer_req->i.size & (PAGE_SIZE - 1); |
309 | crypto_shash_update(desc, data: src, len: len ?: PAGE_SIZE); |
310 | kunmap_atomic(src); |
311 | |
312 | crypto_shash_final(desc, out: digest); |
313 | shash_desc_zero(desc); |
314 | } |
315 | |
316 | void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest) |
317 | { |
318 | SHASH_DESC_ON_STACK(desc, tfm); |
319 | struct bio_vec bvec; |
320 | struct bvec_iter iter; |
321 | |
322 | desc->tfm = tfm; |
323 | |
324 | crypto_shash_init(desc); |
325 | |
326 | bio_for_each_segment(bvec, bio, iter) { |
327 | u8 *src; |
328 | |
329 | src = bvec_kmap_local(bvec: &bvec); |
330 | crypto_shash_update(desc, data: src, len: bvec.bv_len); |
331 | kunmap_local(src); |
332 | } |
333 | crypto_shash_final(desc, out: digest); |
334 | shash_desc_zero(desc); |
335 | } |
336 | |
337 | /* MAYBE merge common code with w_e_end_ov_req */ |
338 | static int w_e_send_csum(struct drbd_work *w, int cancel) |
339 | { |
340 | struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); |
341 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
342 | struct drbd_device *device = peer_device->device; |
343 | int digest_size; |
344 | void *digest; |
345 | int err = 0; |
346 | |
347 | if (unlikely(cancel)) |
348 | goto out; |
349 | |
350 | if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) |
351 | goto out; |
352 | |
353 | digest_size = crypto_shash_digestsize(tfm: peer_device->connection->csums_tfm); |
354 | digest = kmalloc(size: digest_size, GFP_NOIO); |
355 | if (digest) { |
356 | sector_t sector = peer_req->i.sector; |
357 | unsigned int size = peer_req->i.size; |
358 | drbd_csum_ee(tfm: peer_device->connection->csums_tfm, peer_req, digest); |
359 | /* Free peer_req and pages before send. |
360 | * In case we block on congestion, we could otherwise run into |
361 | * some distributed deadlock, if the other side blocks on |
362 | * congestion as well, because our receiver blocks in |
363 | * drbd_alloc_pages due to pp_in_use > max_buffers. */ |
364 | drbd_free_peer_req(device, peer_req); |
365 | peer_req = NULL; |
366 | inc_rs_pending(peer_device); |
367 | err = drbd_send_drequest_csum(peer_device, sector, size, |
368 | digest, digest_size, |
369 | cmd: P_CSUM_RS_REQUEST); |
370 | kfree(objp: digest); |
371 | } else { |
372 | drbd_err(device, "kmalloc() of digest failed.\n" ); |
373 | err = -ENOMEM; |
374 | } |
375 | |
376 | out: |
377 | if (peer_req) |
378 | drbd_free_peer_req(device, peer_req); |
379 | |
380 | if (unlikely(err)) |
381 | drbd_err(device, "drbd_send_drequest(..., csum) failed\n" ); |
382 | return err; |
383 | } |
384 | |
385 | #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) |
386 | |
387 | static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size) |
388 | { |
389 | struct drbd_device *device = peer_device->device; |
390 | struct drbd_peer_request *peer_req; |
391 | |
392 | if (!get_ldev(device)) |
393 | return -EIO; |
394 | |
395 | /* GFP_TRY, because if there is no memory available right now, this may |
396 | * be rescheduled for later. It is "only" background resync, after all. */ |
397 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, |
398 | size, size, GFP_TRY); |
399 | if (!peer_req) |
400 | goto defer; |
401 | |
402 | peer_req->w.cb = w_e_send_csum; |
403 | peer_req->opf = REQ_OP_READ; |
404 | spin_lock_irq(lock: &device->resource->req_lock); |
405 | list_add_tail(new: &peer_req->w.list, head: &device->read_ee); |
406 | spin_unlock_irq(lock: &device->resource->req_lock); |
407 | |
408 | atomic_add(i: size >> 9, v: &device->rs_sect_ev); |
409 | if (drbd_submit_peer_request(peer_req) == 0) |
410 | return 0; |
411 | |
412 | /* If it failed because of ENOMEM, retry should help. If it failed |
413 | * because bio_add_page failed (probably broken lower level driver), |
414 | * retry may or may not help. |
415 | * If it does not, you may need to force disconnect. */ |
416 | spin_lock_irq(lock: &device->resource->req_lock); |
417 | list_del(entry: &peer_req->w.list); |
418 | spin_unlock_irq(lock: &device->resource->req_lock); |
419 | |
420 | drbd_free_peer_req(device, peer_req); |
421 | defer: |
422 | put_ldev(device); |
423 | return -EAGAIN; |
424 | } |
425 | |
426 | int w_resync_timer(struct drbd_work *w, int cancel) |
427 | { |
428 | struct drbd_device *device = |
429 | container_of(w, struct drbd_device, resync_work); |
430 | |
431 | switch (device->state.conn) { |
432 | case C_VERIFY_S: |
433 | make_ov_request(first_peer_device(device), cancel); |
434 | break; |
435 | case C_SYNC_TARGET: |
436 | make_resync_request(first_peer_device(device), cancel); |
437 | break; |
438 | } |
439 | |
440 | return 0; |
441 | } |
442 | |
443 | void resync_timer_fn(struct timer_list *t) |
444 | { |
445 | struct drbd_device *device = from_timer(device, t, resync_timer); |
446 | |
447 | drbd_queue_work_if_unqueued( |
448 | q: &first_peer_device(device)->connection->sender_work, |
449 | w: &device->resync_work); |
450 | } |
451 | |
452 | static void fifo_set(struct fifo_buffer *fb, int value) |
453 | { |
454 | int i; |
455 | |
456 | for (i = 0; i < fb->size; i++) |
457 | fb->values[i] = value; |
458 | } |
459 | |
460 | static int fifo_push(struct fifo_buffer *fb, int value) |
461 | { |
462 | int ov; |
463 | |
464 | ov = fb->values[fb->head_index]; |
465 | fb->values[fb->head_index++] = value; |
466 | |
467 | if (fb->head_index >= fb->size) |
468 | fb->head_index = 0; |
469 | |
470 | return ov; |
471 | } |
472 | |
473 | static void fifo_add_val(struct fifo_buffer *fb, int value) |
474 | { |
475 | int i; |
476 | |
477 | for (i = 0; i < fb->size; i++) |
478 | fb->values[i] += value; |
479 | } |
480 | |
481 | struct fifo_buffer *fifo_alloc(unsigned int fifo_size) |
482 | { |
483 | struct fifo_buffer *fb; |
484 | |
485 | fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO); |
486 | if (!fb) |
487 | return NULL; |
488 | |
489 | fb->head_index = 0; |
490 | fb->size = fifo_size; |
491 | fb->total = 0; |
492 | |
493 | return fb; |
494 | } |
495 | |
496 | static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in) |
497 | { |
498 | struct drbd_device *device = peer_device->device; |
499 | struct disk_conf *dc; |
500 | unsigned int want; /* The number of sectors we want in-flight */ |
501 | int req_sect; /* Number of sectors to request in this turn */ |
502 | int correction; /* Number of sectors more we need in-flight */ |
503 | int cps; /* correction per invocation of drbd_rs_controller() */ |
504 | int steps; /* Number of time steps to plan ahead */ |
505 | int curr_corr; |
506 | int max_sect; |
507 | struct fifo_buffer *plan; |
508 | |
509 | dc = rcu_dereference(device->ldev->disk_conf); |
510 | plan = rcu_dereference(device->rs_plan_s); |
511 | |
512 | steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ |
513 | |
514 | if (device->rs_in_flight + sect_in == 0) { /* At start of resync */ |
515 | want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps; |
516 | } else { /* normal path */ |
517 | want = dc->c_fill_target ? dc->c_fill_target : |
518 | sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10); |
519 | } |
520 | |
521 | correction = want - device->rs_in_flight - plan->total; |
522 | |
523 | /* Plan ahead */ |
524 | cps = correction / steps; |
525 | fifo_add_val(fb: plan, value: cps); |
526 | plan->total += cps * steps; |
527 | |
528 | /* What we do in this step */ |
529 | curr_corr = fifo_push(fb: plan, value: 0); |
530 | plan->total -= curr_corr; |
531 | |
532 | req_sect = sect_in + curr_corr; |
533 | if (req_sect < 0) |
534 | req_sect = 0; |
535 | |
536 | max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ; |
537 | if (req_sect > max_sect) |
538 | req_sect = max_sect; |
539 | |
540 | /* |
541 | drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", |
542 | sect_in, device->rs_in_flight, want, correction, |
543 | steps, cps, device->rs_planed, curr_corr, req_sect); |
544 | */ |
545 | |
546 | return req_sect; |
547 | } |
548 | |
549 | static int drbd_rs_number_requests(struct drbd_peer_device *peer_device) |
550 | { |
551 | struct drbd_device *device = peer_device->device; |
552 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ |
553 | int number, mxb; |
554 | |
555 | sect_in = atomic_xchg(v: &device->rs_sect_in, new: 0); |
556 | device->rs_in_flight -= sect_in; |
557 | |
558 | rcu_read_lock(); |
559 | mxb = drbd_get_max_buffers(device) / 2; |
560 | if (rcu_dereference(device->rs_plan_s)->size) { |
561 | number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9); |
562 | device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; |
563 | } else { |
564 | device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; |
565 | number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); |
566 | } |
567 | rcu_read_unlock(); |
568 | |
569 | /* Don't have more than "max-buffers"/2 in-flight. |
570 | * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), |
571 | * potentially causing a distributed deadlock on congestion during |
572 | * online-verify or (checksum-based) resync, if max-buffers, |
573 | * socket buffer sizes and resync rate settings are mis-configured. */ |
574 | |
575 | /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k), |
576 | * mxb (as used here, and in drbd_alloc_pages on the peer) is |
577 | * "number of pages" (typically also 4k), |
578 | * but "rs_in_flight" is in "sectors" (512 Byte). */ |
579 | if (mxb - device->rs_in_flight/8 < number) |
580 | number = mxb - device->rs_in_flight/8; |
581 | |
582 | return number; |
583 | } |
584 | |
585 | static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel) |
586 | { |
587 | struct drbd_device *const device = peer_device->device; |
588 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; |
589 | unsigned long bit; |
590 | sector_t sector; |
591 | const sector_t capacity = get_capacity(disk: device->vdisk); |
592 | int max_bio_size; |
593 | int number, rollback_i, size; |
594 | int align, requeue = 0; |
595 | int i = 0; |
596 | int discard_granularity = 0; |
597 | |
598 | if (unlikely(cancel)) |
599 | return 0; |
600 | |
601 | if (device->rs_total == 0) { |
602 | /* empty resync? */ |
603 | drbd_resync_finished(peer_device); |
604 | return 0; |
605 | } |
606 | |
607 | if (!get_ldev(device)) { |
608 | /* Since we only need to access device->rsync a |
609 | get_ldev_if_state(device,D_FAILED) would be sufficient, but |
610 | to continue resync with a broken disk makes no sense at |
611 | all */ |
612 | drbd_err(device, "Disk broke down during resync!\n" ); |
613 | return 0; |
614 | } |
615 | |
616 | if (connection->agreed_features & DRBD_FF_THIN_RESYNC) { |
617 | rcu_read_lock(); |
618 | discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity; |
619 | rcu_read_unlock(); |
620 | } |
621 | |
622 | max_bio_size = queue_max_hw_sectors(q: device->rq_queue) << 9; |
623 | number = drbd_rs_number_requests(peer_device); |
624 | if (number <= 0) |
625 | goto requeue; |
626 | |
627 | for (i = 0; i < number; i++) { |
628 | /* Stop generating RS requests when half of the send buffer is filled, |
629 | * but notify TCP that we'd like to have more space. */ |
630 | mutex_lock(&connection->data.mutex); |
631 | if (connection->data.socket) { |
632 | struct sock *sk = connection->data.socket->sk; |
633 | int queued = sk->sk_wmem_queued; |
634 | int sndbuf = sk->sk_sndbuf; |
635 | if (queued > sndbuf / 2) { |
636 | requeue = 1; |
637 | if (sk->sk_socket) |
638 | set_bit(SOCK_NOSPACE, addr: &sk->sk_socket->flags); |
639 | } |
640 | } else |
641 | requeue = 1; |
642 | mutex_unlock(lock: &connection->data.mutex); |
643 | if (requeue) |
644 | goto requeue; |
645 | |
646 | next_sector: |
647 | size = BM_BLOCK_SIZE; |
648 | bit = drbd_bm_find_next(device, bm_fo: device->bm_resync_fo); |
649 | |
650 | if (bit == DRBD_END_OF_BITMAP) { |
651 | device->bm_resync_fo = drbd_bm_bits(device); |
652 | put_ldev(device); |
653 | return 0; |
654 | } |
655 | |
656 | sector = BM_BIT_TO_SECT(bit); |
657 | |
658 | if (drbd_try_rs_begin_io(peer_device, sector)) { |
659 | device->bm_resync_fo = bit; |
660 | goto requeue; |
661 | } |
662 | device->bm_resync_fo = bit + 1; |
663 | |
664 | if (unlikely(drbd_bm_test_bit(device, bit) == 0)) { |
665 | drbd_rs_complete_io(device, sector); |
666 | goto next_sector; |
667 | } |
668 | |
669 | #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE |
670 | /* try to find some adjacent bits. |
671 | * we stop if we have already the maximum req size. |
672 | * |
673 | * Additionally always align bigger requests, in order to |
674 | * be prepared for all stripe sizes of software RAIDs. |
675 | */ |
676 | align = 1; |
677 | rollback_i = i; |
678 | while (i < number) { |
679 | if (size + BM_BLOCK_SIZE > max_bio_size) |
680 | break; |
681 | |
682 | /* Be always aligned */ |
683 | if (sector & ((1<<(align+3))-1)) |
684 | break; |
685 | |
686 | if (discard_granularity && size == discard_granularity) |
687 | break; |
688 | |
689 | /* do not cross extent boundaries */ |
690 | if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) |
691 | break; |
692 | /* now, is it actually dirty, after all? |
693 | * caution, drbd_bm_test_bit is tri-state for some |
694 | * obscure reason; ( b == 0 ) would get the out-of-band |
695 | * only accidentally right because of the "oddly sized" |
696 | * adjustment below */ |
697 | if (drbd_bm_test_bit(device, bitnr: bit+1) != 1) |
698 | break; |
699 | bit++; |
700 | size += BM_BLOCK_SIZE; |
701 | if ((BM_BLOCK_SIZE << align) <= size) |
702 | align++; |
703 | i++; |
704 | } |
705 | /* if we merged some, |
706 | * reset the offset to start the next drbd_bm_find_next from */ |
707 | if (size > BM_BLOCK_SIZE) |
708 | device->bm_resync_fo = bit + 1; |
709 | #endif |
710 | |
711 | /* adjust very last sectors, in case we are oddly sized */ |
712 | if (sector + (size>>9) > capacity) |
713 | size = (capacity-sector)<<9; |
714 | |
715 | if (device->use_csums) { |
716 | switch (read_for_csum(peer_device, sector, size)) { |
717 | case -EIO: /* Disk failure */ |
718 | put_ldev(device); |
719 | return -EIO; |
720 | case -EAGAIN: /* allocation failed, or ldev busy */ |
721 | drbd_rs_complete_io(device, sector); |
722 | device->bm_resync_fo = BM_SECT_TO_BIT(sector); |
723 | i = rollback_i; |
724 | goto requeue; |
725 | case 0: |
726 | /* everything ok */ |
727 | break; |
728 | default: |
729 | BUG(); |
730 | } |
731 | } else { |
732 | int err; |
733 | |
734 | inc_rs_pending(peer_device); |
735 | err = drbd_send_drequest(peer_device, |
736 | cmd: size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, |
737 | sector, size, ID_SYNCER); |
738 | if (err) { |
739 | drbd_err(device, "drbd_send_drequest() failed, aborting...\n" ); |
740 | dec_rs_pending(peer_device); |
741 | put_ldev(device); |
742 | return err; |
743 | } |
744 | } |
745 | } |
746 | |
747 | if (device->bm_resync_fo >= drbd_bm_bits(device)) { |
748 | /* last syncer _request_ was sent, |
749 | * but the P_RS_DATA_REPLY not yet received. sync will end (and |
750 | * next sync group will resume), as soon as we receive the last |
751 | * resync data block, and the last bit is cleared. |
752 | * until then resync "work" is "inactive" ... |
753 | */ |
754 | put_ldev(device); |
755 | return 0; |
756 | } |
757 | |
758 | requeue: |
759 | device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); |
760 | mod_timer(timer: &device->resync_timer, expires: jiffies + SLEEP_TIME); |
761 | put_ldev(device); |
762 | return 0; |
763 | } |
764 | |
765 | static int make_ov_request(struct drbd_peer_device *peer_device, int cancel) |
766 | { |
767 | struct drbd_device *device = peer_device->device; |
768 | int number, i, size; |
769 | sector_t sector; |
770 | const sector_t capacity = get_capacity(disk: device->vdisk); |
771 | bool stop_sector_reached = false; |
772 | |
773 | if (unlikely(cancel)) |
774 | return 1; |
775 | |
776 | number = drbd_rs_number_requests(peer_device); |
777 | |
778 | sector = device->ov_position; |
779 | for (i = 0; i < number; i++) { |
780 | if (sector >= capacity) |
781 | return 1; |
782 | |
783 | /* We check for "finished" only in the reply path: |
784 | * w_e_end_ov_reply(). |
785 | * We need to send at least one request out. */ |
786 | stop_sector_reached = i > 0 |
787 | && verify_can_do_stop_sector(device) |
788 | && sector >= device->ov_stop_sector; |
789 | if (stop_sector_reached) |
790 | break; |
791 | |
792 | size = BM_BLOCK_SIZE; |
793 | |
794 | if (drbd_try_rs_begin_io(peer_device, sector)) { |
795 | device->ov_position = sector; |
796 | goto requeue; |
797 | } |
798 | |
799 | if (sector + (size>>9) > capacity) |
800 | size = (capacity-sector)<<9; |
801 | |
802 | inc_rs_pending(peer_device); |
803 | if (drbd_send_ov_request(first_peer_device(device), sector, size)) { |
804 | dec_rs_pending(peer_device); |
805 | return 0; |
806 | } |
807 | sector += BM_SECT_PER_BIT; |
808 | } |
809 | device->ov_position = sector; |
810 | |
811 | requeue: |
812 | device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); |
813 | if (i == 0 || !stop_sector_reached) |
814 | mod_timer(timer: &device->resync_timer, expires: jiffies + SLEEP_TIME); |
815 | return 1; |
816 | } |
817 | |
818 | int w_ov_finished(struct drbd_work *w, int cancel) |
819 | { |
820 | struct drbd_device_work *dw = |
821 | container_of(w, struct drbd_device_work, w); |
822 | struct drbd_device *device = dw->device; |
823 | kfree(objp: dw); |
824 | ov_out_of_sync_print(peer_device: first_peer_device(device)); |
825 | drbd_resync_finished(peer_device: first_peer_device(device)); |
826 | |
827 | return 0; |
828 | } |
829 | |
830 | static int w_resync_finished(struct drbd_work *w, int cancel) |
831 | { |
832 | struct drbd_device_work *dw = |
833 | container_of(w, struct drbd_device_work, w); |
834 | struct drbd_device *device = dw->device; |
835 | kfree(objp: dw); |
836 | |
837 | drbd_resync_finished(peer_device: first_peer_device(device)); |
838 | |
839 | return 0; |
840 | } |
841 | |
842 | static void ping_peer(struct drbd_device *device) |
843 | { |
844 | struct drbd_connection *connection = first_peer_device(device)->connection; |
845 | |
846 | clear_bit(nr: GOT_PING_ACK, addr: &connection->flags); |
847 | request_ping(connection); |
848 | wait_event(connection->ping_wait, |
849 | test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); |
850 | } |
851 | |
852 | int drbd_resync_finished(struct drbd_peer_device *peer_device) |
853 | { |
854 | struct drbd_device *device = peer_device->device; |
855 | struct drbd_connection *connection = peer_device->connection; |
856 | unsigned long db, dt, dbdt; |
857 | unsigned long n_oos; |
858 | union drbd_state os, ns; |
859 | struct drbd_device_work *dw; |
860 | char *khelper_cmd = NULL; |
861 | int verify_done = 0; |
862 | |
863 | /* Remove all elements from the resync LRU. Since future actions |
864 | * might set bits in the (main) bitmap, then the entries in the |
865 | * resync LRU would be wrong. */ |
866 | if (drbd_rs_del_all(device)) { |
867 | /* In case this is not possible now, most probably because |
868 | * there are P_RS_DATA_REPLY Packets lingering on the worker's |
869 | * queue (or even the read operations for those packets |
870 | * is not finished by now). Retry in 100ms. */ |
871 | |
872 | schedule_timeout_interruptible(HZ / 10); |
873 | dw = kmalloc(size: sizeof(struct drbd_device_work), GFP_ATOMIC); |
874 | if (dw) { |
875 | dw->w.cb = w_resync_finished; |
876 | dw->device = device; |
877 | drbd_queue_work(q: &connection->sender_work, w: &dw->w); |
878 | return 1; |
879 | } |
880 | drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n" ); |
881 | } |
882 | |
883 | dt = (jiffies - device->rs_start - device->rs_paused) / HZ; |
884 | if (dt <= 0) |
885 | dt = 1; |
886 | |
887 | db = device->rs_total; |
888 | /* adjust for verify start and stop sectors, respective reached position */ |
889 | if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) |
890 | db -= device->ov_left; |
891 | |
892 | dbdt = Bit2KB(db/dt); |
893 | device->rs_paused /= HZ; |
894 | |
895 | if (!get_ldev(device)) |
896 | goto out; |
897 | |
898 | ping_peer(device); |
899 | |
900 | spin_lock_irq(lock: &device->resource->req_lock); |
901 | os = drbd_read_state(device); |
902 | |
903 | verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); |
904 | |
905 | /* This protects us against multiple calls (that can happen in the presence |
906 | of application IO), and against connectivity loss just before we arrive here. */ |
907 | if (os.conn <= C_CONNECTED) |
908 | goto out_unlock; |
909 | |
910 | ns = os; |
911 | ns.conn = C_CONNECTED; |
912 | |
913 | drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n" , |
914 | verify_done ? "Online verify" : "Resync" , |
915 | dt + device->rs_paused, device->rs_paused, dbdt); |
916 | |
917 | n_oos = drbd_bm_total_weight(device); |
918 | |
919 | if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { |
920 | if (n_oos) { |
921 | drbd_alert(device, "Online verify found %lu %dk block out of sync!\n" , |
922 | n_oos, Bit2KB(1)); |
923 | khelper_cmd = "out-of-sync" ; |
924 | } |
925 | } else { |
926 | D_ASSERT(device, (n_oos - device->rs_failed) == 0); |
927 | |
928 | if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) |
929 | khelper_cmd = "after-resync-target" ; |
930 | |
931 | if (device->use_csums && device->rs_total) { |
932 | const unsigned long s = device->rs_same_csum; |
933 | const unsigned long t = device->rs_total; |
934 | const int ratio = |
935 | (t == 0) ? 0 : |
936 | (t < 100000) ? ((s*100)/t) : (s/(t/100)); |
937 | drbd_info(device, "%u %% had equal checksums, eliminated: %luK; " |
938 | "transferred %luK total %luK\n" , |
939 | ratio, |
940 | Bit2KB(device->rs_same_csum), |
941 | Bit2KB(device->rs_total - device->rs_same_csum), |
942 | Bit2KB(device->rs_total)); |
943 | } |
944 | } |
945 | |
946 | if (device->rs_failed) { |
947 | drbd_info(device, " %lu failed blocks\n" , device->rs_failed); |
948 | |
949 | if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { |
950 | ns.disk = D_INCONSISTENT; |
951 | ns.pdsk = D_UP_TO_DATE; |
952 | } else { |
953 | ns.disk = D_UP_TO_DATE; |
954 | ns.pdsk = D_INCONSISTENT; |
955 | } |
956 | } else { |
957 | ns.disk = D_UP_TO_DATE; |
958 | ns.pdsk = D_UP_TO_DATE; |
959 | |
960 | if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { |
961 | if (device->p_uuid) { |
962 | int i; |
963 | for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) |
964 | _drbd_uuid_set(device, idx: i, val: device->p_uuid[i]); |
965 | drbd_uuid_set(device, idx: UI_BITMAP, val: device->ldev->md.uuid[UI_CURRENT]); |
966 | _drbd_uuid_set(device, idx: UI_CURRENT, val: device->p_uuid[UI_CURRENT]); |
967 | } else { |
968 | drbd_err(device, "device->p_uuid is NULL! BUG\n" ); |
969 | } |
970 | } |
971 | |
972 | if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) { |
973 | /* for verify runs, we don't update uuids here, |
974 | * so there would be nothing to report. */ |
975 | drbd_uuid_set_bm(device, val: 0UL); |
976 | drbd_print_uuids(device, text: "updated UUIDs" ); |
977 | if (device->p_uuid) { |
978 | /* Now the two UUID sets are equal, update what we |
979 | * know of the peer. */ |
980 | int i; |
981 | for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) |
982 | device->p_uuid[i] = device->ldev->md.uuid[i]; |
983 | } |
984 | } |
985 | } |
986 | |
987 | _drbd_set_state(device, ns, CS_VERBOSE, NULL); |
988 | out_unlock: |
989 | spin_unlock_irq(lock: &device->resource->req_lock); |
990 | |
991 | /* If we have been sync source, and have an effective fencing-policy, |
992 | * once *all* volumes are back in sync, call "unfence". */ |
993 | if (os.conn == C_SYNC_SOURCE) { |
994 | enum drbd_disk_state disk_state = D_MASK; |
995 | enum drbd_disk_state pdsk_state = D_MASK; |
996 | enum drbd_fencing_p fp = FP_DONT_CARE; |
997 | |
998 | rcu_read_lock(); |
999 | fp = rcu_dereference(device->ldev->disk_conf)->fencing; |
1000 | if (fp != FP_DONT_CARE) { |
1001 | struct drbd_peer_device *peer_device; |
1002 | int vnr; |
1003 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1004 | struct drbd_device *device = peer_device->device; |
1005 | disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); |
1006 | pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk); |
1007 | } |
1008 | } |
1009 | rcu_read_unlock(); |
1010 | if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE) |
1011 | conn_khelper(connection, cmd: "unfence-peer" ); |
1012 | } |
1013 | |
1014 | put_ldev(device); |
1015 | out: |
1016 | device->rs_total = 0; |
1017 | device->rs_failed = 0; |
1018 | device->rs_paused = 0; |
1019 | |
1020 | /* reset start sector, if we reached end of device */ |
1021 | if (verify_done && device->ov_left == 0) |
1022 | device->ov_start_sector = 0; |
1023 | |
1024 | drbd_md_sync(device); |
1025 | |
1026 | if (khelper_cmd) |
1027 | drbd_khelper(device, cmd: khelper_cmd); |
1028 | |
1029 | return 1; |
1030 | } |
1031 | |
1032 | /* helper */ |
1033 | static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) |
1034 | { |
1035 | if (drbd_peer_req_has_active_page(peer_req)) { |
1036 | /* This might happen if sendpage() has not finished */ |
1037 | int i = PFN_UP(peer_req->i.size); |
1038 | atomic_add(i, v: &device->pp_in_use_by_net); |
1039 | atomic_sub(i, v: &device->pp_in_use); |
1040 | spin_lock_irq(lock: &device->resource->req_lock); |
1041 | list_add_tail(new: &peer_req->w.list, head: &device->net_ee); |
1042 | spin_unlock_irq(lock: &device->resource->req_lock); |
1043 | wake_up(&drbd_pp_wait); |
1044 | } else |
1045 | drbd_free_peer_req(device, peer_req); |
1046 | } |
1047 | |
1048 | /** |
1049 | * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST |
1050 | * @w: work object. |
1051 | * @cancel: The connection will be closed anyways |
1052 | */ |
1053 | int w_e_end_data_req(struct drbd_work *w, int cancel) |
1054 | { |
1055 | struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); |
1056 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
1057 | struct drbd_device *device = peer_device->device; |
1058 | int err; |
1059 | |
1060 | if (unlikely(cancel)) { |
1061 | drbd_free_peer_req(device, peer_req); |
1062 | dec_unacked(device); |
1063 | return 0; |
1064 | } |
1065 | |
1066 | if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { |
1067 | err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req); |
1068 | } else { |
1069 | if (drbd_ratelimit()) |
1070 | drbd_err(device, "Sending NegDReply. sector=%llus.\n" , |
1071 | (unsigned long long)peer_req->i.sector); |
1072 | |
1073 | err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); |
1074 | } |
1075 | |
1076 | dec_unacked(device); |
1077 | |
1078 | move_to_net_ee_or_free(device, peer_req); |
1079 | |
1080 | if (unlikely(err)) |
1081 | drbd_err(device, "drbd_send_block() failed\n" ); |
1082 | return err; |
1083 | } |
1084 | |
1085 | static bool all_zero(struct drbd_peer_request *peer_req) |
1086 | { |
1087 | struct page *page = peer_req->pages; |
1088 | unsigned int len = peer_req->i.size; |
1089 | |
1090 | page_chain_for_each(page) { |
1091 | unsigned int l = min_t(unsigned int, len, PAGE_SIZE); |
1092 | unsigned int i, words = l / sizeof(long); |
1093 | unsigned long *d; |
1094 | |
1095 | d = kmap_atomic(page); |
1096 | for (i = 0; i < words; i++) { |
1097 | if (d[i]) { |
1098 | kunmap_atomic(d); |
1099 | return false; |
1100 | } |
1101 | } |
1102 | kunmap_atomic(d); |
1103 | len -= l; |
1104 | } |
1105 | |
1106 | return true; |
1107 | } |
1108 | |
1109 | /** |
1110 | * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST |
1111 | * @w: work object. |
1112 | * @cancel: The connection will be closed anyways |
1113 | */ |
1114 | int w_e_end_rsdata_req(struct drbd_work *w, int cancel) |
1115 | { |
1116 | struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); |
1117 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
1118 | struct drbd_device *device = peer_device->device; |
1119 | int err; |
1120 | |
1121 | if (unlikely(cancel)) { |
1122 | drbd_free_peer_req(device, peer_req); |
1123 | dec_unacked(device); |
1124 | return 0; |
1125 | } |
1126 | |
1127 | if (get_ldev_if_state(device, D_FAILED)) { |
1128 | drbd_rs_complete_io(device, sector: peer_req->i.sector); |
1129 | put_ldev(device); |
1130 | } |
1131 | |
1132 | if (device->state.conn == C_AHEAD) { |
1133 | err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); |
1134 | } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { |
1135 | if (likely(device->state.pdsk >= D_INCONSISTENT)) { |
1136 | inc_rs_pending(peer_device); |
1137 | if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) |
1138 | err = drbd_send_rs_deallocated(peer_device, peer_req); |
1139 | else |
1140 | err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); |
1141 | } else { |
1142 | if (drbd_ratelimit()) |
1143 | drbd_err(device, "Not sending RSDataReply, " |
1144 | "partner DISKLESS!\n" ); |
1145 | err = 0; |
1146 | } |
1147 | } else { |
1148 | if (drbd_ratelimit()) |
1149 | drbd_err(device, "Sending NegRSDReply. sector %llus.\n" , |
1150 | (unsigned long long)peer_req->i.sector); |
1151 | |
1152 | err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); |
1153 | |
1154 | /* update resync data with failure */ |
1155 | drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); |
1156 | } |
1157 | |
1158 | dec_unacked(device); |
1159 | |
1160 | move_to_net_ee_or_free(device, peer_req); |
1161 | |
1162 | if (unlikely(err)) |
1163 | drbd_err(device, "drbd_send_block() failed\n" ); |
1164 | return err; |
1165 | } |
1166 | |
1167 | int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) |
1168 | { |
1169 | struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); |
1170 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
1171 | struct drbd_device *device = peer_device->device; |
1172 | struct digest_info *di; |
1173 | int digest_size; |
1174 | void *digest = NULL; |
1175 | int err, eq = 0; |
1176 | |
1177 | if (unlikely(cancel)) { |
1178 | drbd_free_peer_req(device, peer_req); |
1179 | dec_unacked(device); |
1180 | return 0; |
1181 | } |
1182 | |
1183 | if (get_ldev(device)) { |
1184 | drbd_rs_complete_io(device, sector: peer_req->i.sector); |
1185 | put_ldev(device); |
1186 | } |
1187 | |
1188 | di = peer_req->digest; |
1189 | |
1190 | if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { |
1191 | /* quick hack to try to avoid a race against reconfiguration. |
1192 | * a real fix would be much more involved, |
1193 | * introducing more locking mechanisms */ |
1194 | if (peer_device->connection->csums_tfm) { |
1195 | digest_size = crypto_shash_digestsize(tfm: peer_device->connection->csums_tfm); |
1196 | D_ASSERT(device, digest_size == di->digest_size); |
1197 | digest = kmalloc(size: digest_size, GFP_NOIO); |
1198 | } |
1199 | if (digest) { |
1200 | drbd_csum_ee(tfm: peer_device->connection->csums_tfm, peer_req, digest); |
1201 | eq = !memcmp(p: digest, q: di->digest, size: digest_size); |
1202 | kfree(objp: digest); |
1203 | } |
1204 | |
1205 | if (eq) { |
1206 | drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size); |
1207 | /* rs_same_csums unit is BM_BLOCK_SIZE */ |
1208 | device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; |
1209 | err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); |
1210 | } else { |
1211 | inc_rs_pending(peer_device); |
1212 | peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ |
1213 | peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ |
1214 | kfree(objp: di); |
1215 | err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); |
1216 | } |
1217 | } else { |
1218 | err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); |
1219 | if (drbd_ratelimit()) |
1220 | drbd_err(device, "Sending NegDReply. I guess it gets messy.\n" ); |
1221 | } |
1222 | |
1223 | dec_unacked(device); |
1224 | move_to_net_ee_or_free(device, peer_req); |
1225 | |
1226 | if (unlikely(err)) |
1227 | drbd_err(device, "drbd_send_block/ack() failed\n" ); |
1228 | return err; |
1229 | } |
1230 | |
1231 | int w_e_end_ov_req(struct drbd_work *w, int cancel) |
1232 | { |
1233 | struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); |
1234 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
1235 | struct drbd_device *device = peer_device->device; |
1236 | sector_t sector = peer_req->i.sector; |
1237 | unsigned int size = peer_req->i.size; |
1238 | int digest_size; |
1239 | void *digest; |
1240 | int err = 0; |
1241 | |
1242 | if (unlikely(cancel)) |
1243 | goto out; |
1244 | |
1245 | digest_size = crypto_shash_digestsize(tfm: peer_device->connection->verify_tfm); |
1246 | digest = kmalloc(size: digest_size, GFP_NOIO); |
1247 | if (!digest) { |
1248 | err = 1; /* terminate the connection in case the allocation failed */ |
1249 | goto out; |
1250 | } |
1251 | |
1252 | if (likely(!(peer_req->flags & EE_WAS_ERROR))) |
1253 | drbd_csum_ee(tfm: peer_device->connection->verify_tfm, peer_req, digest); |
1254 | else |
1255 | memset(digest, 0, digest_size); |
1256 | |
1257 | /* Free e and pages before send. |
1258 | * In case we block on congestion, we could otherwise run into |
1259 | * some distributed deadlock, if the other side blocks on |
1260 | * congestion as well, because our receiver blocks in |
1261 | * drbd_alloc_pages due to pp_in_use > max_buffers. */ |
1262 | drbd_free_peer_req(device, peer_req); |
1263 | peer_req = NULL; |
1264 | inc_rs_pending(peer_device); |
1265 | err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, cmd: P_OV_REPLY); |
1266 | if (err) |
1267 | dec_rs_pending(peer_device); |
1268 | kfree(objp: digest); |
1269 | |
1270 | out: |
1271 | if (peer_req) |
1272 | drbd_free_peer_req(device, peer_req); |
1273 | dec_unacked(device); |
1274 | return err; |
1275 | } |
1276 | |
1277 | void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t sector, int size) |
1278 | { |
1279 | struct drbd_device *device = peer_device->device; |
1280 | if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { |
1281 | device->ov_last_oos_size += size>>9; |
1282 | } else { |
1283 | device->ov_last_oos_start = sector; |
1284 | device->ov_last_oos_size = size>>9; |
1285 | } |
1286 | drbd_set_out_of_sync(peer_device, sector, size); |
1287 | } |
1288 | |
1289 | int w_e_end_ov_reply(struct drbd_work *w, int cancel) |
1290 | { |
1291 | struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); |
1292 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
1293 | struct drbd_device *device = peer_device->device; |
1294 | struct digest_info *di; |
1295 | void *digest; |
1296 | sector_t sector = peer_req->i.sector; |
1297 | unsigned int size = peer_req->i.size; |
1298 | int digest_size; |
1299 | int err, eq = 0; |
1300 | bool stop_sector_reached = false; |
1301 | |
1302 | if (unlikely(cancel)) { |
1303 | drbd_free_peer_req(device, peer_req); |
1304 | dec_unacked(device); |
1305 | return 0; |
1306 | } |
1307 | |
1308 | /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all |
1309 | * the resync lru has been cleaned up already */ |
1310 | if (get_ldev(device)) { |
1311 | drbd_rs_complete_io(device, sector: peer_req->i.sector); |
1312 | put_ldev(device); |
1313 | } |
1314 | |
1315 | di = peer_req->digest; |
1316 | |
1317 | if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { |
1318 | digest_size = crypto_shash_digestsize(tfm: peer_device->connection->verify_tfm); |
1319 | digest = kmalloc(size: digest_size, GFP_NOIO); |
1320 | if (digest) { |
1321 | drbd_csum_ee(tfm: peer_device->connection->verify_tfm, peer_req, digest); |
1322 | |
1323 | D_ASSERT(device, digest_size == di->digest_size); |
1324 | eq = !memcmp(p: digest, q: di->digest, size: digest_size); |
1325 | kfree(objp: digest); |
1326 | } |
1327 | } |
1328 | |
1329 | /* Free peer_req and pages before send. |
1330 | * In case we block on congestion, we could otherwise run into |
1331 | * some distributed deadlock, if the other side blocks on |
1332 | * congestion as well, because our receiver blocks in |
1333 | * drbd_alloc_pages due to pp_in_use > max_buffers. */ |
1334 | drbd_free_peer_req(device, peer_req); |
1335 | if (!eq) |
1336 | drbd_ov_out_of_sync_found(peer_device, sector, size); |
1337 | else |
1338 | ov_out_of_sync_print(peer_device); |
1339 | |
1340 | err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, blksize: size, |
1341 | block_id: eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); |
1342 | |
1343 | dec_unacked(device); |
1344 | |
1345 | --device->ov_left; |
1346 | |
1347 | /* let's advance progress step marks only for every other megabyte */ |
1348 | if ((device->ov_left & 0x200) == 0x200) |
1349 | drbd_advance_rs_marks(peer_device, still_to_go: device->ov_left); |
1350 | |
1351 | stop_sector_reached = verify_can_do_stop_sector(device) && |
1352 | (sector + (size>>9)) >= device->ov_stop_sector; |
1353 | |
1354 | if (device->ov_left == 0 || stop_sector_reached) { |
1355 | ov_out_of_sync_print(peer_device); |
1356 | drbd_resync_finished(peer_device); |
1357 | } |
1358 | |
1359 | return err; |
1360 | } |
1361 | |
1362 | /* FIXME |
1363 | * We need to track the number of pending barrier acks, |
1364 | * and to be able to wait for them. |
1365 | * See also comment in drbd_adm_attach before drbd_suspend_io. |
1366 | */ |
1367 | static int drbd_send_barrier(struct drbd_connection *connection) |
1368 | { |
1369 | struct p_barrier *p; |
1370 | struct drbd_socket *sock; |
1371 | |
1372 | sock = &connection->data; |
1373 | p = conn_prepare_command(connection, sock); |
1374 | if (!p) |
1375 | return -EIO; |
1376 | p->barrier = connection->send.current_epoch_nr; |
1377 | p->pad = 0; |
1378 | connection->send.current_epoch_writes = 0; |
1379 | connection->send.last_sent_barrier_jif = jiffies; |
1380 | |
1381 | return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); |
1382 | } |
1383 | |
1384 | static int pd_send_unplug_remote(struct drbd_peer_device *pd) |
1385 | { |
1386 | struct drbd_socket *sock = &pd->connection->data; |
1387 | if (!drbd_prepare_command(pd, sock)) |
1388 | return -EIO; |
1389 | return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0); |
1390 | } |
1391 | |
1392 | int w_send_write_hint(struct drbd_work *w, int cancel) |
1393 | { |
1394 | struct drbd_device *device = |
1395 | container_of(w, struct drbd_device, unplug_work); |
1396 | |
1397 | if (cancel) |
1398 | return 0; |
1399 | return pd_send_unplug_remote(pd: first_peer_device(device)); |
1400 | } |
1401 | |
1402 | static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch) |
1403 | { |
1404 | if (!connection->send.seen_any_write_yet) { |
1405 | connection->send.seen_any_write_yet = true; |
1406 | connection->send.current_epoch_nr = epoch; |
1407 | connection->send.current_epoch_writes = 0; |
1408 | connection->send.last_sent_barrier_jif = jiffies; |
1409 | } |
1410 | } |
1411 | |
1412 | static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch) |
1413 | { |
1414 | /* re-init if first write on this connection */ |
1415 | if (!connection->send.seen_any_write_yet) |
1416 | return; |
1417 | if (connection->send.current_epoch_nr != epoch) { |
1418 | if (connection->send.current_epoch_writes) |
1419 | drbd_send_barrier(connection); |
1420 | connection->send.current_epoch_nr = epoch; |
1421 | } |
1422 | } |
1423 | |
1424 | int w_send_out_of_sync(struct drbd_work *w, int cancel) |
1425 | { |
1426 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1427 | struct drbd_device *device = req->device; |
1428 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
1429 | struct drbd_connection *const connection = peer_device->connection; |
1430 | int err; |
1431 | |
1432 | if (unlikely(cancel)) { |
1433 | req_mod(req, what: SEND_CANCELED, peer_device); |
1434 | return 0; |
1435 | } |
1436 | req->pre_send_jif = jiffies; |
1437 | |
1438 | /* this time, no connection->send.current_epoch_writes++; |
1439 | * If it was sent, it was the closing barrier for the last |
1440 | * replicated epoch, before we went into AHEAD mode. |
1441 | * No more barriers will be sent, until we leave AHEAD mode again. */ |
1442 | maybe_send_barrier(connection, epoch: req->epoch); |
1443 | |
1444 | err = drbd_send_out_of_sync(peer_device, req); |
1445 | req_mod(req, what: OOS_HANDED_TO_NETWORK, peer_device); |
1446 | |
1447 | return err; |
1448 | } |
1449 | |
1450 | /** |
1451 | * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request |
1452 | * @w: work object. |
1453 | * @cancel: The connection will be closed anyways |
1454 | */ |
1455 | int w_send_dblock(struct drbd_work *w, int cancel) |
1456 | { |
1457 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1458 | struct drbd_device *device = req->device; |
1459 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
1460 | struct drbd_connection *connection = peer_device->connection; |
1461 | bool do_send_unplug = req->rq_state & RQ_UNPLUG; |
1462 | int err; |
1463 | |
1464 | if (unlikely(cancel)) { |
1465 | req_mod(req, what: SEND_CANCELED, peer_device); |
1466 | return 0; |
1467 | } |
1468 | req->pre_send_jif = jiffies; |
1469 | |
1470 | re_init_if_first_write(connection, epoch: req->epoch); |
1471 | maybe_send_barrier(connection, epoch: req->epoch); |
1472 | connection->send.current_epoch_writes++; |
1473 | |
1474 | err = drbd_send_dblock(peer_device, req); |
1475 | req_mod(req, what: err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); |
1476 | |
1477 | if (do_send_unplug && !err) |
1478 | pd_send_unplug_remote(pd: peer_device); |
1479 | |
1480 | return err; |
1481 | } |
1482 | |
1483 | /** |
1484 | * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet |
1485 | * @w: work object. |
1486 | * @cancel: The connection will be closed anyways |
1487 | */ |
1488 | int w_send_read_req(struct drbd_work *w, int cancel) |
1489 | { |
1490 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1491 | struct drbd_device *device = req->device; |
1492 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
1493 | struct drbd_connection *connection = peer_device->connection; |
1494 | bool do_send_unplug = req->rq_state & RQ_UNPLUG; |
1495 | int err; |
1496 | |
1497 | if (unlikely(cancel)) { |
1498 | req_mod(req, what: SEND_CANCELED, peer_device); |
1499 | return 0; |
1500 | } |
1501 | req->pre_send_jif = jiffies; |
1502 | |
1503 | /* Even read requests may close a write epoch, |
1504 | * if there was any yet. */ |
1505 | maybe_send_barrier(connection, epoch: req->epoch); |
1506 | |
1507 | err = drbd_send_drequest(peer_device, cmd: P_DATA_REQUEST, sector: req->i.sector, size: req->i.size, |
1508 | block_id: (unsigned long)req); |
1509 | |
1510 | req_mod(req, what: err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); |
1511 | |
1512 | if (do_send_unplug && !err) |
1513 | pd_send_unplug_remote(pd: peer_device); |
1514 | |
1515 | return err; |
1516 | } |
1517 | |
1518 | int w_restart_disk_io(struct drbd_work *w, int cancel) |
1519 | { |
1520 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1521 | struct drbd_device *device = req->device; |
1522 | |
1523 | if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) |
1524 | drbd_al_begin_io(device, i: &req->i); |
1525 | |
1526 | req->private_bio = bio_alloc_clone(bdev: device->ldev->backing_bdev, |
1527 | bio_src: req->master_bio, GFP_NOIO, |
1528 | bs: &drbd_io_bio_set); |
1529 | req->private_bio->bi_private = req; |
1530 | req->private_bio->bi_end_io = drbd_request_endio; |
1531 | submit_bio_noacct(bio: req->private_bio); |
1532 | |
1533 | return 0; |
1534 | } |
1535 | |
1536 | static int _drbd_may_sync_now(struct drbd_device *device) |
1537 | { |
1538 | struct drbd_device *odev = device; |
1539 | int resync_after; |
1540 | |
1541 | while (1) { |
1542 | if (!odev->ldev || odev->state.disk == D_DISKLESS) |
1543 | return 1; |
1544 | rcu_read_lock(); |
1545 | resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; |
1546 | rcu_read_unlock(); |
1547 | if (resync_after == -1) |
1548 | return 1; |
1549 | odev = minor_to_device(minor: resync_after); |
1550 | if (!odev) |
1551 | return 1; |
1552 | if ((odev->state.conn >= C_SYNC_SOURCE && |
1553 | odev->state.conn <= C_PAUSED_SYNC_T) || |
1554 | odev->state.aftr_isp || odev->state.peer_isp || |
1555 | odev->state.user_isp) |
1556 | return 0; |
1557 | } |
1558 | } |
1559 | |
1560 | /** |
1561 | * drbd_pause_after() - Pause resync on all devices that may not resync now |
1562 | * @device: DRBD device. |
1563 | * |
1564 | * Called from process context only (admin command and after_state_ch). |
1565 | */ |
1566 | static bool drbd_pause_after(struct drbd_device *device) |
1567 | { |
1568 | bool changed = false; |
1569 | struct drbd_device *odev; |
1570 | int i; |
1571 | |
1572 | rcu_read_lock(); |
1573 | idr_for_each_entry(&drbd_devices, odev, i) { |
1574 | if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) |
1575 | continue; |
1576 | if (!_drbd_may_sync_now(device: odev) && |
1577 | _drbd_set_state(_NS(odev, aftr_isp, 1), |
1578 | CS_HARD, NULL) != SS_NOTHING_TO_DO) |
1579 | changed = true; |
1580 | } |
1581 | rcu_read_unlock(); |
1582 | |
1583 | return changed; |
1584 | } |
1585 | |
1586 | /** |
1587 | * drbd_resume_next() - Resume resync on all devices that may resync now |
1588 | * @device: DRBD device. |
1589 | * |
1590 | * Called from process context only (admin command and worker). |
1591 | */ |
1592 | static bool drbd_resume_next(struct drbd_device *device) |
1593 | { |
1594 | bool changed = false; |
1595 | struct drbd_device *odev; |
1596 | int i; |
1597 | |
1598 | rcu_read_lock(); |
1599 | idr_for_each_entry(&drbd_devices, odev, i) { |
1600 | if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) |
1601 | continue; |
1602 | if (odev->state.aftr_isp) { |
1603 | if (_drbd_may_sync_now(device: odev) && |
1604 | _drbd_set_state(_NS(odev, aftr_isp, 0), |
1605 | CS_HARD, NULL) != SS_NOTHING_TO_DO) |
1606 | changed = true; |
1607 | } |
1608 | } |
1609 | rcu_read_unlock(); |
1610 | return changed; |
1611 | } |
1612 | |
1613 | void resume_next_sg(struct drbd_device *device) |
1614 | { |
1615 | lock_all_resources(); |
1616 | drbd_resume_next(device); |
1617 | unlock_all_resources(); |
1618 | } |
1619 | |
1620 | void suspend_other_sg(struct drbd_device *device) |
1621 | { |
1622 | lock_all_resources(); |
1623 | drbd_pause_after(device); |
1624 | unlock_all_resources(); |
1625 | } |
1626 | |
1627 | /* caller must lock_all_resources() */ |
1628 | enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) |
1629 | { |
1630 | struct drbd_device *odev; |
1631 | int resync_after; |
1632 | |
1633 | if (o_minor == -1) |
1634 | return NO_ERROR; |
1635 | if (o_minor < -1 || o_minor > MINORMASK) |
1636 | return ERR_RESYNC_AFTER; |
1637 | |
1638 | /* check for loops */ |
1639 | odev = minor_to_device(minor: o_minor); |
1640 | while (1) { |
1641 | if (odev == device) |
1642 | return ERR_RESYNC_AFTER_CYCLE; |
1643 | |
1644 | /* You are free to depend on diskless, non-existing, |
1645 | * or not yet/no longer existing minors. |
1646 | * We only reject dependency loops. |
1647 | * We cannot follow the dependency chain beyond a detached or |
1648 | * missing minor. |
1649 | */ |
1650 | if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS) |
1651 | return NO_ERROR; |
1652 | |
1653 | rcu_read_lock(); |
1654 | resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after; |
1655 | rcu_read_unlock(); |
1656 | /* dependency chain ends here, no cycles. */ |
1657 | if (resync_after == -1) |
1658 | return NO_ERROR; |
1659 | |
1660 | /* follow the dependency chain */ |
1661 | odev = minor_to_device(minor: resync_after); |
1662 | } |
1663 | } |
1664 | |
1665 | /* caller must lock_all_resources() */ |
1666 | void drbd_resync_after_changed(struct drbd_device *device) |
1667 | { |
1668 | int changed; |
1669 | |
1670 | do { |
1671 | changed = drbd_pause_after(device); |
1672 | changed |= drbd_resume_next(device); |
1673 | } while (changed); |
1674 | } |
1675 | |
1676 | void drbd_rs_controller_reset(struct drbd_peer_device *peer_device) |
1677 | { |
1678 | struct drbd_device *device = peer_device->device; |
1679 | struct gendisk *disk = device->ldev->backing_bdev->bd_disk; |
1680 | struct fifo_buffer *plan; |
1681 | |
1682 | atomic_set(v: &device->rs_sect_in, i: 0); |
1683 | atomic_set(v: &device->rs_sect_ev, i: 0); |
1684 | device->rs_in_flight = 0; |
1685 | device->rs_last_events = |
1686 | (int)part_stat_read_accum(disk->part0, sectors); |
1687 | |
1688 | /* Updating the RCU protected object in place is necessary since |
1689 | this function gets called from atomic context. |
1690 | It is valid since all other updates also lead to an completely |
1691 | empty fifo */ |
1692 | rcu_read_lock(); |
1693 | plan = rcu_dereference(device->rs_plan_s); |
1694 | plan->total = 0; |
1695 | fifo_set(fb: plan, value: 0); |
1696 | rcu_read_unlock(); |
1697 | } |
1698 | |
1699 | void start_resync_timer_fn(struct timer_list *t) |
1700 | { |
1701 | struct drbd_device *device = from_timer(device, t, start_resync_timer); |
1702 | drbd_device_post_work(device, work_bit: RS_START); |
1703 | } |
1704 | |
1705 | static void do_start_resync(struct drbd_device *device) |
1706 | { |
1707 | if (atomic_read(v: &device->unacked_cnt) || atomic_read(v: &device->rs_pending_cnt)) { |
1708 | drbd_warn(device, "postponing start_resync ...\n" ); |
1709 | device->start_resync_timer.expires = jiffies + HZ/10; |
1710 | add_timer(timer: &device->start_resync_timer); |
1711 | return; |
1712 | } |
1713 | |
1714 | drbd_start_resync(device, side: C_SYNC_SOURCE); |
1715 | clear_bit(nr: AHEAD_TO_SYNC_SOURCE, addr: &device->flags); |
1716 | } |
1717 | |
1718 | static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) |
1719 | { |
1720 | bool csums_after_crash_only; |
1721 | rcu_read_lock(); |
1722 | csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; |
1723 | rcu_read_unlock(); |
1724 | return connection->agreed_pro_version >= 89 && /* supported? */ |
1725 | connection->csums_tfm && /* configured? */ |
1726 | (csums_after_crash_only == false /* use for each resync? */ |
1727 | || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ |
1728 | } |
1729 | |
1730 | /** |
1731 | * drbd_start_resync() - Start the resync process |
1732 | * @device: DRBD device. |
1733 | * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET |
1734 | * |
1735 | * This function might bring you directly into one of the |
1736 | * C_PAUSED_SYNC_* states. |
1737 | */ |
1738 | void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) |
1739 | { |
1740 | struct drbd_peer_device *peer_device = first_peer_device(device); |
1741 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; |
1742 | union drbd_state ns; |
1743 | int r; |
1744 | |
1745 | if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) { |
1746 | drbd_err(device, "Resync already running!\n" ); |
1747 | return; |
1748 | } |
1749 | |
1750 | if (!connection) { |
1751 | drbd_err(device, "No connection to peer, aborting!\n" ); |
1752 | return; |
1753 | } |
1754 | |
1755 | if (!test_bit(B_RS_H_DONE, &device->flags)) { |
1756 | if (side == C_SYNC_TARGET) { |
1757 | /* Since application IO was locked out during C_WF_BITMAP_T and |
1758 | C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET |
1759 | we check that we might make the data inconsistent. */ |
1760 | r = drbd_khelper(device, cmd: "before-resync-target" ); |
1761 | r = (r >> 8) & 0xff; |
1762 | if (r > 0) { |
1763 | drbd_info(device, "before-resync-target handler returned %d, " |
1764 | "dropping connection.\n" , r); |
1765 | conn_request_state(connection, NS(conn, C_DISCONNECTING), flags: CS_HARD); |
1766 | return; |
1767 | } |
1768 | } else /* C_SYNC_SOURCE */ { |
1769 | r = drbd_khelper(device, cmd: "before-resync-source" ); |
1770 | r = (r >> 8) & 0xff; |
1771 | if (r > 0) { |
1772 | if (r == 3) { |
1773 | drbd_info(device, "before-resync-source handler returned %d, " |
1774 | "ignoring. Old userland tools?" , r); |
1775 | } else { |
1776 | drbd_info(device, "before-resync-source handler returned %d, " |
1777 | "dropping connection.\n" , r); |
1778 | conn_request_state(connection, |
1779 | NS(conn, C_DISCONNECTING), flags: CS_HARD); |
1780 | return; |
1781 | } |
1782 | } |
1783 | } |
1784 | } |
1785 | |
1786 | if (current == connection->worker.task) { |
1787 | /* The worker should not sleep waiting for state_mutex, |
1788 | that can take long */ |
1789 | if (!mutex_trylock(lock: device->state_mutex)) { |
1790 | set_bit(nr: B_RS_H_DONE, addr: &device->flags); |
1791 | device->start_resync_timer.expires = jiffies + HZ/5; |
1792 | add_timer(timer: &device->start_resync_timer); |
1793 | return; |
1794 | } |
1795 | } else { |
1796 | mutex_lock(device->state_mutex); |
1797 | } |
1798 | |
1799 | lock_all_resources(); |
1800 | clear_bit(nr: B_RS_H_DONE, addr: &device->flags); |
1801 | /* Did some connection breakage or IO error race with us? */ |
1802 | if (device->state.conn < C_CONNECTED |
1803 | || !get_ldev_if_state(device, D_NEGOTIATING)) { |
1804 | unlock_all_resources(); |
1805 | goto out; |
1806 | } |
1807 | |
1808 | ns = drbd_read_state(device); |
1809 | |
1810 | ns.aftr_isp = !_drbd_may_sync_now(device); |
1811 | |
1812 | ns.conn = side; |
1813 | |
1814 | if (side == C_SYNC_TARGET) |
1815 | ns.disk = D_INCONSISTENT; |
1816 | else /* side == C_SYNC_SOURCE */ |
1817 | ns.pdsk = D_INCONSISTENT; |
1818 | |
1819 | r = _drbd_set_state(device, ns, CS_VERBOSE, NULL); |
1820 | ns = drbd_read_state(device); |
1821 | |
1822 | if (ns.conn < C_CONNECTED) |
1823 | r = SS_UNKNOWN_ERROR; |
1824 | |
1825 | if (r == SS_SUCCESS) { |
1826 | unsigned long tw = drbd_bm_total_weight(device); |
1827 | unsigned long now = jiffies; |
1828 | int i; |
1829 | |
1830 | device->rs_failed = 0; |
1831 | device->rs_paused = 0; |
1832 | device->rs_same_csum = 0; |
1833 | device->rs_last_sect_ev = 0; |
1834 | device->rs_total = tw; |
1835 | device->rs_start = now; |
1836 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { |
1837 | device->rs_mark_left[i] = tw; |
1838 | device->rs_mark_time[i] = now; |
1839 | } |
1840 | drbd_pause_after(device); |
1841 | /* Forget potentially stale cached per resync extent bit-counts. |
1842 | * Open coded drbd_rs_cancel_all(device), we already have IRQs |
1843 | * disabled, and know the disk state is ok. */ |
1844 | spin_lock(lock: &device->al_lock); |
1845 | lc_reset(lc: device->resync); |
1846 | device->resync_locked = 0; |
1847 | device->resync_wenr = LC_FREE; |
1848 | spin_unlock(lock: &device->al_lock); |
1849 | } |
1850 | unlock_all_resources(); |
1851 | |
1852 | if (r == SS_SUCCESS) { |
1853 | wake_up(&device->al_wait); /* for lc_reset() above */ |
1854 | /* reset rs_last_bcast when a resync or verify is started, |
1855 | * to deal with potential jiffies wrap. */ |
1856 | device->rs_last_bcast = jiffies - HZ; |
1857 | |
1858 | drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n" , |
1859 | drbd_conn_str(ns.conn), |
1860 | (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), |
1861 | (unsigned long) device->rs_total); |
1862 | if (side == C_SYNC_TARGET) { |
1863 | device->bm_resync_fo = 0; |
1864 | device->use_csums = use_checksum_based_resync(connection, device); |
1865 | } else { |
1866 | device->use_csums = false; |
1867 | } |
1868 | |
1869 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid |
1870 | * with w_send_oos, or the sync target will get confused as to |
1871 | * how much bits to resync. We cannot do that always, because for an |
1872 | * empty resync and protocol < 95, we need to do it here, as we call |
1873 | * drbd_resync_finished from here in that case. |
1874 | * We drbd_gen_and_send_sync_uuid here for protocol < 96, |
1875 | * and from after_state_ch otherwise. */ |
1876 | if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) |
1877 | drbd_gen_and_send_sync_uuid(peer_device); |
1878 | |
1879 | if (connection->agreed_pro_version < 95 && device->rs_total == 0) { |
1880 | /* This still has a race (about when exactly the peers |
1881 | * detect connection loss) that can lead to a full sync |
1882 | * on next handshake. In 8.3.9 we fixed this with explicit |
1883 | * resync-finished notifications, but the fix |
1884 | * introduces a protocol change. Sleeping for some |
1885 | * time longer than the ping interval + timeout on the |
1886 | * SyncSource, to give the SyncTarget the chance to |
1887 | * detect connection loss, then waiting for a ping |
1888 | * response (implicit in drbd_resync_finished) reduces |
1889 | * the race considerably, but does not solve it. */ |
1890 | if (side == C_SYNC_SOURCE) { |
1891 | struct net_conf *nc; |
1892 | int timeo; |
1893 | |
1894 | rcu_read_lock(); |
1895 | nc = rcu_dereference(connection->net_conf); |
1896 | timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; |
1897 | rcu_read_unlock(); |
1898 | schedule_timeout_interruptible(timeout: timeo); |
1899 | } |
1900 | drbd_resync_finished(peer_device); |
1901 | } |
1902 | |
1903 | drbd_rs_controller_reset(peer_device); |
1904 | /* ns.conn may already be != device->state.conn, |
1905 | * we may have been paused in between, or become paused until |
1906 | * the timer triggers. |
1907 | * No matter, that is handled in resync_timer_fn() */ |
1908 | if (ns.conn == C_SYNC_TARGET) |
1909 | mod_timer(timer: &device->resync_timer, expires: jiffies); |
1910 | |
1911 | drbd_md_sync(device); |
1912 | } |
1913 | put_ldev(device); |
1914 | out: |
1915 | mutex_unlock(lock: device->state_mutex); |
1916 | } |
1917 | |
1918 | static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done) |
1919 | { |
1920 | struct drbd_device *device = peer_device->device; |
1921 | struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; |
1922 | device->rs_last_bcast = jiffies; |
1923 | |
1924 | if (!get_ldev(device)) |
1925 | return; |
1926 | |
1927 | drbd_bm_write_lazy(device, upper_idx: 0); |
1928 | if (resync_done && is_sync_state(connection_state: device->state.conn)) |
1929 | drbd_resync_finished(peer_device); |
1930 | |
1931 | drbd_bcast_event(device, sib: &sib); |
1932 | /* update timestamp, in case it took a while to write out stuff */ |
1933 | device->rs_last_bcast = jiffies; |
1934 | put_ldev(device); |
1935 | } |
1936 | |
1937 | static void drbd_ldev_destroy(struct drbd_device *device) |
1938 | { |
1939 | lc_destroy(lc: device->resync); |
1940 | device->resync = NULL; |
1941 | lc_destroy(lc: device->act_log); |
1942 | device->act_log = NULL; |
1943 | |
1944 | __acquire(local); |
1945 | drbd_backing_dev_free(device, ldev: device->ldev); |
1946 | device->ldev = NULL; |
1947 | __release(local); |
1948 | |
1949 | clear_bit(nr: GOING_DISKLESS, addr: &device->flags); |
1950 | wake_up(&device->misc_wait); |
1951 | } |
1952 | |
1953 | static void go_diskless(struct drbd_device *device) |
1954 | { |
1955 | struct drbd_peer_device *peer_device = first_peer_device(device); |
1956 | D_ASSERT(device, device->state.disk == D_FAILED); |
1957 | /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will |
1958 | * inc/dec it frequently. Once we are D_DISKLESS, no one will touch |
1959 | * the protected members anymore, though, so once put_ldev reaches zero |
1960 | * again, it will be safe to free them. */ |
1961 | |
1962 | /* Try to write changed bitmap pages, read errors may have just |
1963 | * set some bits outside the area covered by the activity log. |
1964 | * |
1965 | * If we have an IO error during the bitmap writeout, |
1966 | * we will want a full sync next time, just in case. |
1967 | * (Do we want a specific meta data flag for this?) |
1968 | * |
1969 | * If that does not make it to stable storage either, |
1970 | * we cannot do anything about that anymore. |
1971 | * |
1972 | * We still need to check if both bitmap and ldev are present, we may |
1973 | * end up here after a failed attach, before ldev was even assigned. |
1974 | */ |
1975 | if (device->bitmap && device->ldev) { |
1976 | /* An interrupted resync or similar is allowed to recounts bits |
1977 | * while we detach. |
1978 | * Any modifications would not be expected anymore, though. |
1979 | */ |
1980 | if (drbd_bitmap_io_from_worker(device, io_fn: drbd_bm_write, |
1981 | why: "detach" , flags: BM_LOCKED_TEST_ALLOWED, peer_device)) { |
1982 | if (test_bit(WAS_READ_ERROR, &device->flags)) { |
1983 | drbd_md_set_flag(device, MDF_FULL_SYNC); |
1984 | drbd_md_sync(device); |
1985 | } |
1986 | } |
1987 | } |
1988 | |
1989 | drbd_force_state(device, NS(disk, D_DISKLESS)); |
1990 | } |
1991 | |
1992 | static int do_md_sync(struct drbd_device *device) |
1993 | { |
1994 | drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n" ); |
1995 | drbd_md_sync(device); |
1996 | return 0; |
1997 | } |
1998 | |
1999 | /* only called from drbd_worker thread, no locking */ |
2000 | void __update_timing_details( |
2001 | struct drbd_thread_timing_details *tdp, |
2002 | unsigned int *cb_nr, |
2003 | void *cb, |
2004 | const char *fn, const unsigned int line) |
2005 | { |
2006 | unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST; |
2007 | struct drbd_thread_timing_details *td = tdp + i; |
2008 | |
2009 | td->start_jif = jiffies; |
2010 | td->cb_addr = cb; |
2011 | td->caller_fn = fn; |
2012 | td->line = line; |
2013 | td->cb_nr = *cb_nr; |
2014 | |
2015 | i = (i+1) % DRBD_THREAD_DETAILS_HIST; |
2016 | td = tdp + i; |
2017 | memset(td, 0, sizeof(*td)); |
2018 | |
2019 | ++(*cb_nr); |
2020 | } |
2021 | |
2022 | static void do_device_work(struct drbd_device *device, const unsigned long todo) |
2023 | { |
2024 | if (test_bit(MD_SYNC, &todo)) |
2025 | do_md_sync(device); |
2026 | if (test_bit(RS_DONE, &todo) || |
2027 | test_bit(RS_PROGRESS, &todo)) |
2028 | update_on_disk_bitmap(peer_device: first_peer_device(device), test_bit(RS_DONE, &todo)); |
2029 | if (test_bit(GO_DISKLESS, &todo)) |
2030 | go_diskless(device); |
2031 | if (test_bit(DESTROY_DISK, &todo)) |
2032 | drbd_ldev_destroy(device); |
2033 | if (test_bit(RS_START, &todo)) |
2034 | do_start_resync(device); |
2035 | } |
2036 | |
2037 | #define DRBD_DEVICE_WORK_MASK \ |
2038 | ((1UL << GO_DISKLESS) \ |
2039 | |(1UL << DESTROY_DISK) \ |
2040 | |(1UL << MD_SYNC) \ |
2041 | |(1UL << RS_START) \ |
2042 | |(1UL << RS_PROGRESS) \ |
2043 | |(1UL << RS_DONE) \ |
2044 | ) |
2045 | |
2046 | static unsigned long get_work_bits(unsigned long *flags) |
2047 | { |
2048 | unsigned long old, new; |
2049 | do { |
2050 | old = *flags; |
2051 | new = old & ~DRBD_DEVICE_WORK_MASK; |
2052 | } while (cmpxchg(flags, old, new) != old); |
2053 | return old & DRBD_DEVICE_WORK_MASK; |
2054 | } |
2055 | |
2056 | static void do_unqueued_work(struct drbd_connection *connection) |
2057 | { |
2058 | struct drbd_peer_device *peer_device; |
2059 | int vnr; |
2060 | |
2061 | rcu_read_lock(); |
2062 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
2063 | struct drbd_device *device = peer_device->device; |
2064 | unsigned long todo = get_work_bits(flags: &device->flags); |
2065 | if (!todo) |
2066 | continue; |
2067 | |
2068 | kref_get(kref: &device->kref); |
2069 | rcu_read_unlock(); |
2070 | do_device_work(device, todo); |
2071 | kref_put(kref: &device->kref, release: drbd_destroy_device); |
2072 | rcu_read_lock(); |
2073 | } |
2074 | rcu_read_unlock(); |
2075 | } |
2076 | |
2077 | static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) |
2078 | { |
2079 | spin_lock_irq(lock: &queue->q_lock); |
2080 | list_splice_tail_init(list: &queue->q, head: work_list); |
2081 | spin_unlock_irq(lock: &queue->q_lock); |
2082 | return !list_empty(head: work_list); |
2083 | } |
2084 | |
2085 | static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list) |
2086 | { |
2087 | DEFINE_WAIT(wait); |
2088 | struct net_conf *nc; |
2089 | int uncork, cork; |
2090 | |
2091 | dequeue_work_batch(queue: &connection->sender_work, work_list); |
2092 | if (!list_empty(head: work_list)) |
2093 | return; |
2094 | |
2095 | /* Still nothing to do? |
2096 | * Maybe we still need to close the current epoch, |
2097 | * even if no new requests are queued yet. |
2098 | * |
2099 | * Also, poke TCP, just in case. |
2100 | * Then wait for new work (or signal). */ |
2101 | rcu_read_lock(); |
2102 | nc = rcu_dereference(connection->net_conf); |
2103 | uncork = nc ? nc->tcp_cork : 0; |
2104 | rcu_read_unlock(); |
2105 | if (uncork) { |
2106 | mutex_lock(&connection->data.mutex); |
2107 | if (connection->data.socket) |
2108 | tcp_sock_set_cork(sk: connection->data.socket->sk, on: false); |
2109 | mutex_unlock(lock: &connection->data.mutex); |
2110 | } |
2111 | |
2112 | for (;;) { |
2113 | int send_barrier; |
2114 | prepare_to_wait(wq_head: &connection->sender_work.q_wait, wq_entry: &wait, TASK_INTERRUPTIBLE); |
2115 | spin_lock_irq(lock: &connection->resource->req_lock); |
2116 | spin_lock(lock: &connection->sender_work.q_lock); /* FIXME get rid of this one? */ |
2117 | if (!list_empty(head: &connection->sender_work.q)) |
2118 | list_splice_tail_init(list: &connection->sender_work.q, head: work_list); |
2119 | spin_unlock(lock: &connection->sender_work.q_lock); /* FIXME get rid of this one? */ |
2120 | if (!list_empty(head: work_list) || signal_pending(current)) { |
2121 | spin_unlock_irq(lock: &connection->resource->req_lock); |
2122 | break; |
2123 | } |
2124 | |
2125 | /* We found nothing new to do, no to-be-communicated request, |
2126 | * no other work item. We may still need to close the last |
2127 | * epoch. Next incoming request epoch will be connection -> |
2128 | * current transfer log epoch number. If that is different |
2129 | * from the epoch of the last request we communicated, it is |
2130 | * safe to send the epoch separating barrier now. |
2131 | */ |
2132 | send_barrier = |
2133 | atomic_read(v: &connection->current_tle_nr) != |
2134 | connection->send.current_epoch_nr; |
2135 | spin_unlock_irq(lock: &connection->resource->req_lock); |
2136 | |
2137 | if (send_barrier) |
2138 | maybe_send_barrier(connection, |
2139 | epoch: connection->send.current_epoch_nr + 1); |
2140 | |
2141 | if (test_bit(DEVICE_WORK_PENDING, &connection->flags)) |
2142 | break; |
2143 | |
2144 | /* drbd_send() may have called flush_signals() */ |
2145 | if (get_t_state(thi: &connection->worker) != RUNNING) |
2146 | break; |
2147 | |
2148 | schedule(); |
2149 | /* may be woken up for other things but new work, too, |
2150 | * e.g. if the current epoch got closed. |
2151 | * In which case we send the barrier above. */ |
2152 | } |
2153 | finish_wait(wq_head: &connection->sender_work.q_wait, wq_entry: &wait); |
2154 | |
2155 | /* someone may have changed the config while we have been waiting above. */ |
2156 | rcu_read_lock(); |
2157 | nc = rcu_dereference(connection->net_conf); |
2158 | cork = nc ? nc->tcp_cork : 0; |
2159 | rcu_read_unlock(); |
2160 | mutex_lock(&connection->data.mutex); |
2161 | if (connection->data.socket) { |
2162 | if (cork) |
2163 | tcp_sock_set_cork(sk: connection->data.socket->sk, on: true); |
2164 | else if (!uncork) |
2165 | tcp_sock_set_cork(sk: connection->data.socket->sk, on: false); |
2166 | } |
2167 | mutex_unlock(lock: &connection->data.mutex); |
2168 | } |
2169 | |
2170 | int drbd_worker(struct drbd_thread *thi) |
2171 | { |
2172 | struct drbd_connection *connection = thi->connection; |
2173 | struct drbd_work *w = NULL; |
2174 | struct drbd_peer_device *peer_device; |
2175 | LIST_HEAD(work_list); |
2176 | int vnr; |
2177 | |
2178 | while (get_t_state(thi) == RUNNING) { |
2179 | drbd_thread_current_set_cpu(thi); |
2180 | |
2181 | if (list_empty(head: &work_list)) { |
2182 | update_worker_timing_details(connection, wait_for_work); |
2183 | wait_for_work(connection, work_list: &work_list); |
2184 | } |
2185 | |
2186 | if (test_and_clear_bit(nr: DEVICE_WORK_PENDING, addr: &connection->flags)) { |
2187 | update_worker_timing_details(connection, do_unqueued_work); |
2188 | do_unqueued_work(connection); |
2189 | } |
2190 | |
2191 | if (signal_pending(current)) { |
2192 | flush_signals(current); |
2193 | if (get_t_state(thi) == RUNNING) { |
2194 | drbd_warn(connection, "Worker got an unexpected signal\n" ); |
2195 | continue; |
2196 | } |
2197 | break; |
2198 | } |
2199 | |
2200 | if (get_t_state(thi) != RUNNING) |
2201 | break; |
2202 | |
2203 | if (!list_empty(head: &work_list)) { |
2204 | w = list_first_entry(&work_list, struct drbd_work, list); |
2205 | list_del_init(entry: &w->list); |
2206 | update_worker_timing_details(connection, w->cb); |
2207 | if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) |
2208 | continue; |
2209 | if (connection->cstate >= C_WF_REPORT_PARAMS) |
2210 | conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), flags: CS_HARD); |
2211 | } |
2212 | } |
2213 | |
2214 | do { |
2215 | if (test_and_clear_bit(nr: DEVICE_WORK_PENDING, addr: &connection->flags)) { |
2216 | update_worker_timing_details(connection, do_unqueued_work); |
2217 | do_unqueued_work(connection); |
2218 | } |
2219 | if (!list_empty(head: &work_list)) { |
2220 | w = list_first_entry(&work_list, struct drbd_work, list); |
2221 | list_del_init(entry: &w->list); |
2222 | update_worker_timing_details(connection, w->cb); |
2223 | w->cb(w, 1); |
2224 | } else |
2225 | dequeue_work_batch(queue: &connection->sender_work, work_list: &work_list); |
2226 | } while (!list_empty(head: &work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); |
2227 | |
2228 | rcu_read_lock(); |
2229 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
2230 | struct drbd_device *device = peer_device->device; |
2231 | D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE); |
2232 | kref_get(kref: &device->kref); |
2233 | rcu_read_unlock(); |
2234 | drbd_device_cleanup(device); |
2235 | kref_put(kref: &device->kref, release: drbd_destroy_device); |
2236 | rcu_read_lock(); |
2237 | } |
2238 | rcu_read_unlock(); |
2239 | |
2240 | return 0; |
2241 | } |
2242 | |