1 | /* |
2 | * Copyright (C) 2006-2009 Red Hat, Inc. |
3 | * |
4 | * This file is released under the LGPL. |
5 | */ |
6 | |
7 | #include <linux/bio.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/jiffies.h> |
10 | #include <linux/dm-dirty-log.h> |
11 | #include <linux/device-mapper.h> |
12 | #include <linux/dm-log-userspace.h> |
13 | #include <linux/module.h> |
14 | #include <linux/workqueue.h> |
15 | |
16 | #include "dm-log-userspace-transfer.h" |
17 | |
18 | #define DM_LOG_USERSPACE_VSN "1.3.0" |
19 | |
20 | #define FLUSH_ENTRY_POOL_SIZE 16 |
21 | |
22 | struct dm_dirty_log_flush_entry { |
23 | int type; |
24 | region_t region; |
25 | struct list_head list; |
26 | }; |
27 | |
28 | /* |
29 | * This limit on the number of mark and clear request is, to a degree, |
30 | * arbitrary. However, there is some basis for the choice in the limits |
31 | * imposed on the size of data payload by dm-log-userspace-transfer.c: |
32 | * dm_consult_userspace(). |
33 | */ |
34 | #define MAX_FLUSH_GROUP_COUNT 32 |
35 | |
36 | struct log_c { |
37 | struct dm_target *ti; |
38 | struct dm_dev *log_dev; |
39 | |
40 | char *usr_argv_str; |
41 | uint32_t usr_argc; |
42 | |
43 | uint32_t region_size; |
44 | region_t region_count; |
45 | uint64_t luid; |
46 | char uuid[DM_UUID_LEN]; |
47 | |
48 | /* |
49 | * Mark and clear requests are held until a flush is issued |
50 | * so that we can group, and thereby limit, the amount of |
51 | * network traffic between kernel and userspace. The 'flush_lock' |
52 | * is used to protect these lists. |
53 | */ |
54 | spinlock_t flush_lock; |
55 | struct list_head mark_list; |
56 | struct list_head clear_list; |
57 | |
58 | /* |
59 | * in_sync_hint gets set when doing is_remote_recovering. It |
60 | * represents the first region that needs recovery. IOW, the |
61 | * first zero bit of sync_bits. This can be useful for to limit |
62 | * traffic for calls like is_remote_recovering and get_resync_work, |
63 | * but be take care in its use for anything else. |
64 | */ |
65 | uint64_t in_sync_hint; |
66 | |
67 | /* |
68 | * Workqueue for flush of clear region requests. |
69 | */ |
70 | struct workqueue_struct *dmlog_wq; |
71 | struct delayed_work flush_log_work; |
72 | atomic_t sched_flush; |
73 | |
74 | /* |
75 | * Combine userspace flush and mark requests for efficiency. |
76 | */ |
77 | uint32_t integrated_flush; |
78 | |
79 | mempool_t flush_entry_pool; |
80 | }; |
81 | |
82 | static struct kmem_cache *_flush_entry_cache; |
83 | |
84 | static int userspace_do_request(struct log_c *lc, const char *uuid, |
85 | int request_type, char *data, size_t data_size, |
86 | char *rdata, size_t *rdata_size) |
87 | { |
88 | int r; |
89 | |
90 | /* |
91 | * If the server isn't there, -ESRCH is returned, |
92 | * and we must keep trying until the server is |
93 | * restored. |
94 | */ |
95 | retry: |
96 | r = dm_consult_userspace(uuid, lc->luid, request_type, data, |
97 | data_size, rdata, rdata_size); |
98 | |
99 | if (r != -ESRCH) |
100 | return r; |
101 | |
102 | DMERR(" Userspace log server not found." ); |
103 | while (1) { |
104 | set_current_state(TASK_INTERRUPTIBLE); |
105 | schedule_timeout(2*HZ); |
106 | DMWARN("Attempting to contact userspace log server..." ); |
107 | r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR, |
108 | lc->usr_argv_str, |
109 | strlen(lc->usr_argv_str) + 1, |
110 | NULL, NULL); |
111 | if (!r) |
112 | break; |
113 | } |
114 | DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete" ); |
115 | r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL, |
116 | 0, NULL, NULL); |
117 | if (!r) |
118 | goto retry; |
119 | |
120 | DMERR("Error trying to resume userspace log: %d" , r); |
121 | |
122 | return -ESRCH; |
123 | } |
124 | |
125 | static int build_constructor_string(struct dm_target *ti, |
126 | unsigned argc, char **argv, |
127 | char **ctr_str) |
128 | { |
129 | int i, str_size; |
130 | char *str = NULL; |
131 | |
132 | *ctr_str = NULL; |
133 | |
134 | /* |
135 | * Determine overall size of the string. |
136 | */ |
137 | for (i = 0, str_size = 0; i < argc; i++) |
138 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ |
139 | |
140 | str_size += 20; /* Max number of chars in a printed u64 number */ |
141 | |
142 | str = kzalloc(str_size, GFP_KERNEL); |
143 | if (!str) { |
144 | DMWARN("Unable to allocate memory for constructor string" ); |
145 | return -ENOMEM; |
146 | } |
147 | |
148 | str_size = sprintf(str, "%llu" , (unsigned long long)ti->len); |
149 | for (i = 0; i < argc; i++) |
150 | str_size += sprintf(str + str_size, " %s" , argv[i]); |
151 | |
152 | *ctr_str = str; |
153 | return str_size; |
154 | } |
155 | |
156 | static void do_flush(struct work_struct *work) |
157 | { |
158 | int r; |
159 | struct log_c *lc = container_of(work, struct log_c, flush_log_work.work); |
160 | |
161 | atomic_set(&lc->sched_flush, 0); |
162 | |
163 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL); |
164 | |
165 | if (r) |
166 | dm_table_event(lc->ti->table); |
167 | } |
168 | |
169 | /* |
170 | * userspace_ctr |
171 | * |
172 | * argv contains: |
173 | * <UUID> [integrated_flush] <other args> |
174 | * Where 'other args' are the userspace implementation-specific log |
175 | * arguments. |
176 | * |
177 | * Example: |
178 | * <UUID> [integrated_flush] clustered-disk <arg count> <log dev> |
179 | * <region_size> [[no]sync] |
180 | * |
181 | * This module strips off the <UUID> and uses it for identification |
182 | * purposes when communicating with userspace about a log. |
183 | * |
184 | * If integrated_flush is defined, the kernel combines flush |
185 | * and mark requests. |
186 | * |
187 | * The rest of the line, beginning with 'clustered-disk', is passed |
188 | * to the userspace ctr function. |
189 | */ |
190 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, |
191 | unsigned argc, char **argv) |
192 | { |
193 | int r = 0; |
194 | int str_size; |
195 | char *ctr_str = NULL; |
196 | struct log_c *lc = NULL; |
197 | uint64_t rdata; |
198 | size_t rdata_size = sizeof(rdata); |
199 | char *devices_rdata = NULL; |
200 | size_t devices_rdata_size = DM_NAME_LEN; |
201 | |
202 | if (argc < 3) { |
203 | DMWARN("Too few arguments to userspace dirty log" ); |
204 | return -EINVAL; |
205 | } |
206 | |
207 | lc = kzalloc(sizeof(*lc), GFP_KERNEL); |
208 | if (!lc) { |
209 | DMWARN("Unable to allocate userspace log context." ); |
210 | return -ENOMEM; |
211 | } |
212 | |
213 | /* The ptr value is sufficient for local unique id */ |
214 | lc->luid = (unsigned long)lc; |
215 | |
216 | lc->ti = ti; |
217 | |
218 | if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { |
219 | DMWARN("UUID argument too long." ); |
220 | kfree(lc); |
221 | return -EINVAL; |
222 | } |
223 | |
224 | lc->usr_argc = argc; |
225 | |
226 | strncpy(lc->uuid, argv[0], DM_UUID_LEN); |
227 | argc--; |
228 | argv++; |
229 | spin_lock_init(&lc->flush_lock); |
230 | INIT_LIST_HEAD(&lc->mark_list); |
231 | INIT_LIST_HEAD(&lc->clear_list); |
232 | |
233 | if (!strcasecmp(argv[0], "integrated_flush" )) { |
234 | lc->integrated_flush = 1; |
235 | argc--; |
236 | argv++; |
237 | } |
238 | |
239 | str_size = build_constructor_string(ti, argc, argv, &ctr_str); |
240 | if (str_size < 0) { |
241 | kfree(lc); |
242 | return str_size; |
243 | } |
244 | |
245 | devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL); |
246 | if (!devices_rdata) { |
247 | DMERR("Failed to allocate memory for device information" ); |
248 | r = -ENOMEM; |
249 | goto out; |
250 | } |
251 | |
252 | r = mempool_init_slab_pool(&lc->flush_entry_pool, FLUSH_ENTRY_POOL_SIZE, |
253 | _flush_entry_cache); |
254 | if (r) { |
255 | DMERR("Failed to create flush_entry_pool" ); |
256 | goto out; |
257 | } |
258 | |
259 | /* |
260 | * Send table string and get back any opened device. |
261 | */ |
262 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, |
263 | ctr_str, str_size, |
264 | devices_rdata, &devices_rdata_size); |
265 | |
266 | if (r < 0) { |
267 | if (r == -ESRCH) |
268 | DMERR("Userspace log server not found" ); |
269 | else |
270 | DMERR("Userspace log server failed to create log" ); |
271 | goto out; |
272 | } |
273 | |
274 | /* Since the region size does not change, get it now */ |
275 | rdata_size = sizeof(rdata); |
276 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE, |
277 | NULL, 0, (char *)&rdata, &rdata_size); |
278 | |
279 | if (r) { |
280 | DMERR("Failed to get region size of dirty log" ); |
281 | goto out; |
282 | } |
283 | |
284 | lc->region_size = (uint32_t)rdata; |
285 | lc->region_count = dm_sector_div_up(ti->len, lc->region_size); |
286 | |
287 | if (devices_rdata_size) { |
288 | if (devices_rdata[devices_rdata_size - 1] != '\0') { |
289 | DMERR("DM_ULOG_CTR device return string not properly terminated" ); |
290 | r = -EINVAL; |
291 | goto out; |
292 | } |
293 | r = dm_get_device(ti, devices_rdata, |
294 | dm_table_get_mode(ti->table), &lc->log_dev); |
295 | if (r) |
296 | DMERR("Failed to register %s with device-mapper" , |
297 | devices_rdata); |
298 | } |
299 | |
300 | if (lc->integrated_flush) { |
301 | lc->dmlog_wq = alloc_workqueue("dmlogd" , WQ_MEM_RECLAIM, 0); |
302 | if (!lc->dmlog_wq) { |
303 | DMERR("couldn't start dmlogd" ); |
304 | r = -ENOMEM; |
305 | goto out; |
306 | } |
307 | |
308 | INIT_DELAYED_WORK(&lc->flush_log_work, do_flush); |
309 | atomic_set(&lc->sched_flush, 0); |
310 | } |
311 | |
312 | out: |
313 | kfree(devices_rdata); |
314 | if (r) { |
315 | mempool_exit(&lc->flush_entry_pool); |
316 | kfree(lc); |
317 | kfree(ctr_str); |
318 | } else { |
319 | lc->usr_argv_str = ctr_str; |
320 | log->context = lc; |
321 | } |
322 | |
323 | return r; |
324 | } |
325 | |
326 | static void userspace_dtr(struct dm_dirty_log *log) |
327 | { |
328 | struct log_c *lc = log->context; |
329 | |
330 | if (lc->integrated_flush) { |
331 | /* flush workqueue */ |
332 | if (atomic_read(&lc->sched_flush)) |
333 | flush_delayed_work(&lc->flush_log_work); |
334 | |
335 | destroy_workqueue(lc->dmlog_wq); |
336 | } |
337 | |
338 | (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, |
339 | NULL, 0, NULL, NULL); |
340 | |
341 | if (lc->log_dev) |
342 | dm_put_device(lc->ti, lc->log_dev); |
343 | |
344 | mempool_exit(&lc->flush_entry_pool); |
345 | |
346 | kfree(lc->usr_argv_str); |
347 | kfree(lc); |
348 | |
349 | return; |
350 | } |
351 | |
352 | static int userspace_presuspend(struct dm_dirty_log *log) |
353 | { |
354 | int r; |
355 | struct log_c *lc = log->context; |
356 | |
357 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, |
358 | NULL, 0, NULL, NULL); |
359 | |
360 | return r; |
361 | } |
362 | |
363 | static int userspace_postsuspend(struct dm_dirty_log *log) |
364 | { |
365 | int r; |
366 | struct log_c *lc = log->context; |
367 | |
368 | /* |
369 | * Run planned flush earlier. |
370 | */ |
371 | if (lc->integrated_flush && atomic_read(&lc->sched_flush)) |
372 | flush_delayed_work(&lc->flush_log_work); |
373 | |
374 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, |
375 | NULL, 0, NULL, NULL); |
376 | |
377 | return r; |
378 | } |
379 | |
380 | static int userspace_resume(struct dm_dirty_log *log) |
381 | { |
382 | int r; |
383 | struct log_c *lc = log->context; |
384 | |
385 | lc->in_sync_hint = 0; |
386 | r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, |
387 | NULL, 0, NULL, NULL); |
388 | |
389 | return r; |
390 | } |
391 | |
392 | static uint32_t userspace_get_region_size(struct dm_dirty_log *log) |
393 | { |
394 | struct log_c *lc = log->context; |
395 | |
396 | return lc->region_size; |
397 | } |
398 | |
399 | /* |
400 | * userspace_is_clean |
401 | * |
402 | * Check whether a region is clean. If there is any sort of |
403 | * failure when consulting the server, we return not clean. |
404 | * |
405 | * Returns: 1 if clean, 0 otherwise |
406 | */ |
407 | static int userspace_is_clean(struct dm_dirty_log *log, region_t region) |
408 | { |
409 | int r; |
410 | uint64_t region64 = (uint64_t)region; |
411 | int64_t is_clean; |
412 | size_t rdata_size; |
413 | struct log_c *lc = log->context; |
414 | |
415 | rdata_size = sizeof(is_clean); |
416 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, |
417 | (char *)®ion64, sizeof(region64), |
418 | (char *)&is_clean, &rdata_size); |
419 | |
420 | return (r) ? 0 : (int)is_clean; |
421 | } |
422 | |
423 | /* |
424 | * userspace_in_sync |
425 | * |
426 | * Check if the region is in-sync. If there is any sort |
427 | * of failure when consulting the server, we assume that |
428 | * the region is not in sync. |
429 | * |
430 | * If 'can_block' is set, return immediately |
431 | * |
432 | * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK |
433 | */ |
434 | static int userspace_in_sync(struct dm_dirty_log *log, region_t region, |
435 | int can_block) |
436 | { |
437 | int r; |
438 | uint64_t region64 = region; |
439 | int64_t in_sync; |
440 | size_t rdata_size; |
441 | struct log_c *lc = log->context; |
442 | |
443 | /* |
444 | * We can never respond directly - even if in_sync_hint is |
445 | * set. This is because another machine could see a device |
446 | * failure and mark the region out-of-sync. If we don't go |
447 | * to userspace to ask, we might think the region is in-sync |
448 | * and allow a read to pick up data that is stale. (This is |
449 | * very unlikely if a device actually fails; but it is very |
450 | * likely if a connection to one device from one machine fails.) |
451 | * |
452 | * There still might be a problem if the mirror caches the region |
453 | * state as in-sync... but then this call would not be made. So, |
454 | * that is a mirror problem. |
455 | */ |
456 | if (!can_block) |
457 | return -EWOULDBLOCK; |
458 | |
459 | rdata_size = sizeof(in_sync); |
460 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, |
461 | (char *)®ion64, sizeof(region64), |
462 | (char *)&in_sync, &rdata_size); |
463 | return (r) ? 0 : (int)in_sync; |
464 | } |
465 | |
466 | static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) |
467 | { |
468 | int r = 0; |
469 | struct dm_dirty_log_flush_entry *fe; |
470 | |
471 | list_for_each_entry(fe, flush_list, list) { |
472 | r = userspace_do_request(lc, lc->uuid, fe->type, |
473 | (char *)&fe->region, |
474 | sizeof(fe->region), |
475 | NULL, NULL); |
476 | if (r) |
477 | break; |
478 | } |
479 | |
480 | return r; |
481 | } |
482 | |
483 | static int flush_by_group(struct log_c *lc, struct list_head *flush_list, |
484 | int flush_with_payload) |
485 | { |
486 | int r = 0; |
487 | int count; |
488 | uint32_t type = 0; |
489 | struct dm_dirty_log_flush_entry *fe, *tmp_fe; |
490 | LIST_HEAD(tmp_list); |
491 | uint64_t group[MAX_FLUSH_GROUP_COUNT]; |
492 | |
493 | /* |
494 | * Group process the requests |
495 | */ |
496 | while (!list_empty(flush_list)) { |
497 | count = 0; |
498 | |
499 | list_for_each_entry_safe(fe, tmp_fe, flush_list, list) { |
500 | group[count] = fe->region; |
501 | count++; |
502 | |
503 | list_move(&fe->list, &tmp_list); |
504 | |
505 | type = fe->type; |
506 | if (count >= MAX_FLUSH_GROUP_COUNT) |
507 | break; |
508 | } |
509 | |
510 | if (flush_with_payload) { |
511 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, |
512 | (char *)(group), |
513 | count * sizeof(uint64_t), |
514 | NULL, NULL); |
515 | /* |
516 | * Integrated flush failed. |
517 | */ |
518 | if (r) |
519 | break; |
520 | } else { |
521 | r = userspace_do_request(lc, lc->uuid, type, |
522 | (char *)(group), |
523 | count * sizeof(uint64_t), |
524 | NULL, NULL); |
525 | if (r) { |
526 | /* |
527 | * Group send failed. Attempt one-by-one. |
528 | */ |
529 | list_splice_init(&tmp_list, flush_list); |
530 | r = flush_one_by_one(lc, flush_list); |
531 | break; |
532 | } |
533 | } |
534 | } |
535 | |
536 | /* |
537 | * Must collect flush_entrys that were successfully processed |
538 | * as a group so that they will be free'd by the caller. |
539 | */ |
540 | list_splice_init(&tmp_list, flush_list); |
541 | |
542 | return r; |
543 | } |
544 | |
545 | /* |
546 | * userspace_flush |
547 | * |
548 | * This function is ok to block. |
549 | * The flush happens in two stages. First, it sends all |
550 | * clear/mark requests that are on the list. Then it |
551 | * tells the server to commit them. This gives the |
552 | * server a chance to optimise the commit, instead of |
553 | * doing it for every request. |
554 | * |
555 | * Additionally, we could implement another thread that |
556 | * sends the requests up to the server - reducing the |
557 | * load on flush. Then the flush would have less in |
558 | * the list and be responsible for the finishing commit. |
559 | * |
560 | * Returns: 0 on success, < 0 on failure |
561 | */ |
562 | static int userspace_flush(struct dm_dirty_log *log) |
563 | { |
564 | int r = 0; |
565 | unsigned long flags; |
566 | struct log_c *lc = log->context; |
567 | LIST_HEAD(mark_list); |
568 | LIST_HEAD(clear_list); |
569 | int mark_list_is_empty; |
570 | int clear_list_is_empty; |
571 | struct dm_dirty_log_flush_entry *fe, *tmp_fe; |
572 | mempool_t *flush_entry_pool = &lc->flush_entry_pool; |
573 | |
574 | spin_lock_irqsave(&lc->flush_lock, flags); |
575 | list_splice_init(&lc->mark_list, &mark_list); |
576 | list_splice_init(&lc->clear_list, &clear_list); |
577 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
578 | |
579 | mark_list_is_empty = list_empty(&mark_list); |
580 | clear_list_is_empty = list_empty(&clear_list); |
581 | |
582 | if (mark_list_is_empty && clear_list_is_empty) |
583 | return 0; |
584 | |
585 | r = flush_by_group(lc, &clear_list, 0); |
586 | if (r) |
587 | goto out; |
588 | |
589 | if (!lc->integrated_flush) { |
590 | r = flush_by_group(lc, &mark_list, 0); |
591 | if (r) |
592 | goto out; |
593 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, |
594 | NULL, 0, NULL, NULL); |
595 | goto out; |
596 | } |
597 | |
598 | /* |
599 | * Send integrated flush request with mark_list as payload. |
600 | */ |
601 | r = flush_by_group(lc, &mark_list, 1); |
602 | if (r) |
603 | goto out; |
604 | |
605 | if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) { |
606 | /* |
607 | * When there are only clear region requests, |
608 | * we schedule a flush in the future. |
609 | */ |
610 | queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ); |
611 | atomic_set(&lc->sched_flush, 1); |
612 | } else { |
613 | /* |
614 | * Cancel pending flush because we |
615 | * have already flushed in mark_region. |
616 | */ |
617 | cancel_delayed_work(&lc->flush_log_work); |
618 | atomic_set(&lc->sched_flush, 0); |
619 | } |
620 | |
621 | out: |
622 | /* |
623 | * We can safely remove these entries, even after failure. |
624 | * Calling code will receive an error and will know that |
625 | * the log facility has failed. |
626 | */ |
627 | list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) { |
628 | list_del(&fe->list); |
629 | mempool_free(fe, flush_entry_pool); |
630 | } |
631 | list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) { |
632 | list_del(&fe->list); |
633 | mempool_free(fe, flush_entry_pool); |
634 | } |
635 | |
636 | if (r) |
637 | dm_table_event(lc->ti->table); |
638 | |
639 | return r; |
640 | } |
641 | |
642 | /* |
643 | * userspace_mark_region |
644 | * |
645 | * This function should avoid blocking unless absolutely required. |
646 | * (Memory allocation is valid for blocking.) |
647 | */ |
648 | static void userspace_mark_region(struct dm_dirty_log *log, region_t region) |
649 | { |
650 | unsigned long flags; |
651 | struct log_c *lc = log->context; |
652 | struct dm_dirty_log_flush_entry *fe; |
653 | |
654 | /* Wait for an allocation, but _never_ fail */ |
655 | fe = mempool_alloc(&lc->flush_entry_pool, GFP_NOIO); |
656 | BUG_ON(!fe); |
657 | |
658 | spin_lock_irqsave(&lc->flush_lock, flags); |
659 | fe->type = DM_ULOG_MARK_REGION; |
660 | fe->region = region; |
661 | list_add(&fe->list, &lc->mark_list); |
662 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
663 | |
664 | return; |
665 | } |
666 | |
667 | /* |
668 | * userspace_clear_region |
669 | * |
670 | * This function must not block. |
671 | * So, the alloc can't block. In the worst case, it is ok to |
672 | * fail. It would simply mean we can't clear the region. |
673 | * Does nothing to current sync context, but does mean |
674 | * the region will be re-sync'ed on a reload of the mirror |
675 | * even though it is in-sync. |
676 | */ |
677 | static void userspace_clear_region(struct dm_dirty_log *log, region_t region) |
678 | { |
679 | unsigned long flags; |
680 | struct log_c *lc = log->context; |
681 | struct dm_dirty_log_flush_entry *fe; |
682 | |
683 | /* |
684 | * If we fail to allocate, we skip the clearing of |
685 | * the region. This doesn't hurt us in any way, except |
686 | * to cause the region to be resync'ed when the |
687 | * device is activated next time. |
688 | */ |
689 | fe = mempool_alloc(&lc->flush_entry_pool, GFP_ATOMIC); |
690 | if (!fe) { |
691 | DMERR("Failed to allocate memory to clear region." ); |
692 | return; |
693 | } |
694 | |
695 | spin_lock_irqsave(&lc->flush_lock, flags); |
696 | fe->type = DM_ULOG_CLEAR_REGION; |
697 | fe->region = region; |
698 | list_add(&fe->list, &lc->clear_list); |
699 | spin_unlock_irqrestore(&lc->flush_lock, flags); |
700 | |
701 | return; |
702 | } |
703 | |
704 | /* |
705 | * userspace_get_resync_work |
706 | * |
707 | * Get a region that needs recovery. It is valid to return |
708 | * an error for this function. |
709 | * |
710 | * Returns: 1 if region filled, 0 if no work, <0 on error |
711 | */ |
712 | static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) |
713 | { |
714 | int r; |
715 | size_t rdata_size; |
716 | struct log_c *lc = log->context; |
717 | struct { |
718 | int64_t i; /* 64-bit for mix arch compatibility */ |
719 | region_t r; |
720 | } pkg; |
721 | |
722 | if (lc->in_sync_hint >= lc->region_count) |
723 | return 0; |
724 | |
725 | rdata_size = sizeof(pkg); |
726 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, |
727 | NULL, 0, (char *)&pkg, &rdata_size); |
728 | |
729 | *region = pkg.r; |
730 | return (r) ? r : (int)pkg.i; |
731 | } |
732 | |
733 | /* |
734 | * userspace_set_region_sync |
735 | * |
736 | * Set the sync status of a given region. This function |
737 | * must not fail. |
738 | */ |
739 | static void userspace_set_region_sync(struct dm_dirty_log *log, |
740 | region_t region, int in_sync) |
741 | { |
742 | struct log_c *lc = log->context; |
743 | struct { |
744 | region_t r; |
745 | int64_t i; |
746 | } pkg; |
747 | |
748 | pkg.r = region; |
749 | pkg.i = (int64_t)in_sync; |
750 | |
751 | (void) userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, |
752 | (char *)&pkg, sizeof(pkg), NULL, NULL); |
753 | |
754 | /* |
755 | * It would be nice to be able to report failures. |
756 | * However, it is easy enough to detect and resolve. |
757 | */ |
758 | return; |
759 | } |
760 | |
761 | /* |
762 | * userspace_get_sync_count |
763 | * |
764 | * If there is any sort of failure when consulting the server, |
765 | * we assume that the sync count is zero. |
766 | * |
767 | * Returns: sync count on success, 0 on failure |
768 | */ |
769 | static region_t userspace_get_sync_count(struct dm_dirty_log *log) |
770 | { |
771 | int r; |
772 | size_t rdata_size; |
773 | uint64_t sync_count; |
774 | struct log_c *lc = log->context; |
775 | |
776 | rdata_size = sizeof(sync_count); |
777 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, |
778 | NULL, 0, (char *)&sync_count, &rdata_size); |
779 | |
780 | if (r) |
781 | return 0; |
782 | |
783 | if (sync_count >= lc->region_count) |
784 | lc->in_sync_hint = lc->region_count; |
785 | |
786 | return (region_t)sync_count; |
787 | } |
788 | |
789 | /* |
790 | * userspace_status |
791 | * |
792 | * Returns: amount of space consumed |
793 | */ |
794 | static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, |
795 | char *result, unsigned maxlen) |
796 | { |
797 | int r = 0; |
798 | char *table_args; |
799 | size_t sz = (size_t)maxlen; |
800 | struct log_c *lc = log->context; |
801 | |
802 | switch (status_type) { |
803 | case STATUSTYPE_INFO: |
804 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, |
805 | NULL, 0, result, &sz); |
806 | |
807 | if (r) { |
808 | sz = 0; |
809 | DMEMIT("%s 1 COM_FAILURE" , log->type->name); |
810 | } |
811 | break; |
812 | case STATUSTYPE_TABLE: |
813 | sz = 0; |
814 | table_args = strchr(lc->usr_argv_str, ' '); |
815 | BUG_ON(!table_args); /* There will always be a ' ' */ |
816 | table_args++; |
817 | |
818 | DMEMIT("%s %u %s " , log->type->name, lc->usr_argc, lc->uuid); |
819 | if (lc->integrated_flush) |
820 | DMEMIT("integrated_flush " ); |
821 | DMEMIT("%s " , table_args); |
822 | break; |
823 | } |
824 | return (r) ? 0 : (int)sz; |
825 | } |
826 | |
827 | /* |
828 | * userspace_is_remote_recovering |
829 | * |
830 | * Returns: 1 if region recovering, 0 otherwise |
831 | */ |
832 | static int userspace_is_remote_recovering(struct dm_dirty_log *log, |
833 | region_t region) |
834 | { |
835 | int r; |
836 | uint64_t region64 = region; |
837 | struct log_c *lc = log->context; |
838 | static unsigned long limit; |
839 | struct { |
840 | int64_t is_recovering; |
841 | uint64_t in_sync_hint; |
842 | } pkg; |
843 | size_t rdata_size = sizeof(pkg); |
844 | |
845 | /* |
846 | * Once the mirror has been reported to be in-sync, |
847 | * it will never again ask for recovery work. So, |
848 | * we can safely say there is not a remote machine |
849 | * recovering if the device is in-sync. (in_sync_hint |
850 | * must be reset at resume time.) |
851 | */ |
852 | if (region < lc->in_sync_hint) |
853 | return 0; |
854 | else if (time_after(limit, jiffies)) |
855 | return 1; |
856 | |
857 | limit = jiffies + (HZ / 4); |
858 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, |
859 | (char *)®ion64, sizeof(region64), |
860 | (char *)&pkg, &rdata_size); |
861 | if (r) |
862 | return 1; |
863 | |
864 | lc->in_sync_hint = pkg.in_sync_hint; |
865 | |
866 | return (int)pkg.is_recovering; |
867 | } |
868 | |
869 | static struct dm_dirty_log_type _userspace_type = { |
870 | .name = "userspace" , |
871 | .module = THIS_MODULE, |
872 | .ctr = userspace_ctr, |
873 | .dtr = userspace_dtr, |
874 | .presuspend = userspace_presuspend, |
875 | .postsuspend = userspace_postsuspend, |
876 | .resume = userspace_resume, |
877 | .get_region_size = userspace_get_region_size, |
878 | .is_clean = userspace_is_clean, |
879 | .in_sync = userspace_in_sync, |
880 | .flush = userspace_flush, |
881 | .mark_region = userspace_mark_region, |
882 | .clear_region = userspace_clear_region, |
883 | .get_resync_work = userspace_get_resync_work, |
884 | .set_region_sync = userspace_set_region_sync, |
885 | .get_sync_count = userspace_get_sync_count, |
886 | .status = userspace_status, |
887 | .is_remote_recovering = userspace_is_remote_recovering, |
888 | }; |
889 | |
890 | static int __init userspace_dirty_log_init(void) |
891 | { |
892 | int r = 0; |
893 | |
894 | _flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0); |
895 | if (!_flush_entry_cache) { |
896 | DMWARN("Unable to create flush_entry_cache: No memory." ); |
897 | return -ENOMEM; |
898 | } |
899 | |
900 | r = dm_ulog_tfr_init(); |
901 | if (r) { |
902 | DMWARN("Unable to initialize userspace log communications" ); |
903 | kmem_cache_destroy(_flush_entry_cache); |
904 | return r; |
905 | } |
906 | |
907 | r = dm_dirty_log_type_register(&_userspace_type); |
908 | if (r) { |
909 | DMWARN("Couldn't register userspace dirty log type" ); |
910 | dm_ulog_tfr_exit(); |
911 | kmem_cache_destroy(_flush_entry_cache); |
912 | return r; |
913 | } |
914 | |
915 | DMINFO("version " DM_LOG_USERSPACE_VSN " loaded" ); |
916 | return 0; |
917 | } |
918 | |
919 | static void __exit userspace_dirty_log_exit(void) |
920 | { |
921 | dm_dirty_log_type_unregister(&_userspace_type); |
922 | dm_ulog_tfr_exit(); |
923 | kmem_cache_destroy(_flush_entry_cache); |
924 | |
925 | DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded" ); |
926 | return; |
927 | } |
928 | |
929 | module_init(userspace_dirty_log_init); |
930 | module_exit(userspace_dirty_log_exit); |
931 | |
932 | MODULE_DESCRIPTION(DM_NAME " userspace dirty log link" ); |
933 | MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>" ); |
934 | MODULE_LICENSE("GPL" ); |
935 | |