1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2006-2009 Red Hat, Inc. |
4 | * |
5 | * This file is released under the LGPL. |
6 | */ |
7 | |
8 | #include <linux/bio.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/jiffies.h> |
11 | #include <linux/dm-dirty-log.h> |
12 | #include <linux/device-mapper.h> |
13 | #include <linux/dm-log-userspace.h> |
14 | #include <linux/module.h> |
15 | #include <linux/workqueue.h> |
16 | |
17 | #include "dm-log-userspace-transfer.h" |
18 | |
19 | #define DM_LOG_USERSPACE_VSN "1.3.0" |
20 | |
21 | #define FLUSH_ENTRY_POOL_SIZE 16 |
22 | |
23 | struct dm_dirty_log_flush_entry { |
24 | int type; |
25 | region_t region; |
26 | struct list_head list; |
27 | }; |
28 | |
29 | /* |
30 | * This limit on the number of mark and clear request is, to a degree, |
31 | * arbitrary. However, there is some basis for the choice in the limits |
32 | * imposed on the size of data payload by dm-log-userspace-transfer.c: |
33 | * dm_consult_userspace(). |
34 | */ |
35 | #define MAX_FLUSH_GROUP_COUNT 32 |
36 | |
37 | struct log_c { |
38 | struct dm_target *ti; |
39 | struct dm_dev *log_dev; |
40 | |
41 | char *usr_argv_str; |
42 | uint32_t usr_argc; |
43 | |
44 | uint32_t region_size; |
45 | region_t region_count; |
46 | uint64_t luid; |
47 | char uuid[DM_UUID_LEN]; |
48 | |
49 | /* |
50 | * Mark and clear requests are held until a flush is issued |
51 | * so that we can group, and thereby limit, the amount of |
52 | * network traffic between kernel and userspace. The 'flush_lock' |
53 | * is used to protect these lists. |
54 | */ |
55 | spinlock_t flush_lock; |
56 | struct list_head mark_list; |
57 | struct list_head clear_list; |
58 | |
59 | /* |
60 | * in_sync_hint gets set when doing is_remote_recovering. It |
61 | * represents the first region that needs recovery. IOW, the |
62 | * first zero bit of sync_bits. This can be useful for to limit |
63 | * traffic for calls like is_remote_recovering and get_resync_work, |
64 | * but be take care in its use for anything else. |
65 | */ |
66 | uint64_t in_sync_hint; |
67 | |
68 | /* |
69 | * Workqueue for flush of clear region requests. |
70 | */ |
71 | struct workqueue_struct *dmlog_wq; |
72 | struct delayed_work flush_log_work; |
73 | atomic_t sched_flush; |
74 | |
75 | /* |
76 | * Combine userspace flush and mark requests for efficiency. |
77 | */ |
78 | uint32_t integrated_flush; |
79 | |
80 | mempool_t flush_entry_pool; |
81 | }; |
82 | |
83 | static struct kmem_cache *_flush_entry_cache; |
84 | |
85 | static int userspace_do_request(struct log_c *lc, const char *uuid, |
86 | int request_type, char *data, size_t data_size, |
87 | char *rdata, size_t *rdata_size) |
88 | { |
89 | int r; |
90 | |
91 | /* |
92 | * If the server isn't there, -ESRCH is returned, |
93 | * and we must keep trying until the server is |
94 | * restored. |
95 | */ |
96 | retry: |
97 | r = dm_consult_userspace(uuid, luid: lc->luid, request_type, data, |
98 | data_size, rdata, rdata_size); |
99 | |
100 | if (r != -ESRCH) |
101 | return r; |
102 | |
103 | DMERR(" Userspace log server not found." ); |
104 | while (1) { |
105 | set_current_state(TASK_INTERRUPTIBLE); |
106 | schedule_timeout(timeout: 2*HZ); |
107 | DMWARN("Attempting to contact userspace log server..." ); |
108 | r = dm_consult_userspace(uuid, luid: lc->luid, DM_ULOG_CTR, |
109 | data: lc->usr_argv_str, |
110 | strlen(lc->usr_argv_str) + 1, |
111 | NULL, NULL); |
112 | if (!r) |
113 | break; |
114 | } |
115 | DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete" ); |
116 | r = dm_consult_userspace(uuid, luid: lc->luid, DM_ULOG_RESUME, NULL, |
117 | data_size: 0, NULL, NULL); |
118 | if (!r) |
119 | goto retry; |
120 | |
121 | DMERR("Error trying to resume userspace log: %d" , r); |
122 | |
123 | return -ESRCH; |
124 | } |
125 | |
126 | static int build_constructor_string(struct dm_target *ti, |
127 | unsigned int argc, char **argv, |
128 | char **ctr_str) |
129 | { |
130 | int i, str_size; |
131 | char *str = NULL; |
132 | |
133 | *ctr_str = NULL; |
134 | |
135 | /* |
136 | * Determine overall size of the string. |
137 | */ |
138 | for (i = 0, str_size = 0; i < argc; i++) |
139 | str_size += strlen(argv[i]) + 1; /* +1 for space between args */ |
140 | |
141 | str_size += 20; /* Max number of chars in a printed u64 number */ |
142 | |
143 | str = kzalloc(size: str_size, GFP_KERNEL); |
144 | if (!str) { |
145 | DMWARN("Unable to allocate memory for constructor string" ); |
146 | return -ENOMEM; |
147 | } |
148 | |
149 | str_size = sprintf(buf: str, fmt: "%llu" , (unsigned long long)ti->len); |
150 | for (i = 0; i < argc; i++) |
151 | str_size += sprintf(buf: str + str_size, fmt: " %s" , argv[i]); |
152 | |
153 | *ctr_str = str; |
154 | return str_size; |
155 | } |
156 | |
157 | static void do_flush(struct work_struct *work) |
158 | { |
159 | int r; |
160 | struct log_c *lc = container_of(work, struct log_c, flush_log_work.work); |
161 | |
162 | atomic_set(v: &lc->sched_flush, i: 0); |
163 | |
164 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_FLUSH, NULL, data_size: 0, NULL, NULL); |
165 | |
166 | if (r) |
167 | dm_table_event(t: lc->ti->table); |
168 | } |
169 | |
170 | /* |
171 | * userspace_ctr |
172 | * |
173 | * argv contains: |
174 | * <UUID> [integrated_flush] <other args> |
175 | * Where 'other args' are the userspace implementation-specific log |
176 | * arguments. |
177 | * |
178 | * Example: |
179 | * <UUID> [integrated_flush] clustered-disk <arg count> <log dev> |
180 | * <region_size> [[no]sync] |
181 | * |
182 | * This module strips off the <UUID> and uses it for identification |
183 | * purposes when communicating with userspace about a log. |
184 | * |
185 | * If integrated_flush is defined, the kernel combines flush |
186 | * and mark requests. |
187 | * |
188 | * The rest of the line, beginning with 'clustered-disk', is passed |
189 | * to the userspace ctr function. |
190 | */ |
191 | static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, |
192 | unsigned int argc, char **argv) |
193 | { |
194 | int r = 0; |
195 | int str_size; |
196 | char *ctr_str = NULL; |
197 | struct log_c *lc = NULL; |
198 | uint64_t rdata; |
199 | size_t rdata_size = sizeof(rdata); |
200 | char *devices_rdata = NULL; |
201 | size_t devices_rdata_size = DM_NAME_LEN; |
202 | |
203 | if (argc < 3) { |
204 | DMWARN("Too few arguments to userspace dirty log" ); |
205 | return -EINVAL; |
206 | } |
207 | |
208 | lc = kzalloc(size: sizeof(*lc), GFP_KERNEL); |
209 | if (!lc) { |
210 | DMWARN("Unable to allocate userspace log context." ); |
211 | return -ENOMEM; |
212 | } |
213 | |
214 | /* The ptr value is sufficient for local unique id */ |
215 | lc->luid = (unsigned long)lc; |
216 | |
217 | lc->ti = ti; |
218 | |
219 | if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { |
220 | DMWARN("UUID argument too long." ); |
221 | kfree(objp: lc); |
222 | return -EINVAL; |
223 | } |
224 | |
225 | lc->usr_argc = argc; |
226 | |
227 | strscpy(p: lc->uuid, q: argv[0], size: sizeof(lc->uuid)); |
228 | argc--; |
229 | argv++; |
230 | spin_lock_init(&lc->flush_lock); |
231 | INIT_LIST_HEAD(list: &lc->mark_list); |
232 | INIT_LIST_HEAD(list: &lc->clear_list); |
233 | |
234 | if (!strcasecmp(s1: argv[0], s2: "integrated_flush" )) { |
235 | lc->integrated_flush = 1; |
236 | argc--; |
237 | argv++; |
238 | } |
239 | |
240 | str_size = build_constructor_string(ti, argc, argv, ctr_str: &ctr_str); |
241 | if (str_size < 0) { |
242 | kfree(objp: lc); |
243 | return str_size; |
244 | } |
245 | |
246 | devices_rdata = kzalloc(size: devices_rdata_size, GFP_KERNEL); |
247 | if (!devices_rdata) { |
248 | DMERR("Failed to allocate memory for device information" ); |
249 | r = -ENOMEM; |
250 | goto out; |
251 | } |
252 | |
253 | r = mempool_init_slab_pool(pool: &lc->flush_entry_pool, FLUSH_ENTRY_POOL_SIZE, |
254 | kc: _flush_entry_cache); |
255 | if (r) { |
256 | DMERR("Failed to create flush_entry_pool" ); |
257 | goto out; |
258 | } |
259 | |
260 | /* |
261 | * Send table string and get back any opened device. |
262 | */ |
263 | r = dm_consult_userspace(uuid: lc->uuid, luid: lc->luid, DM_ULOG_CTR, |
264 | data: ctr_str, data_size: str_size, |
265 | rdata: devices_rdata, rdata_size: &devices_rdata_size); |
266 | |
267 | if (r < 0) { |
268 | if (r == -ESRCH) |
269 | DMERR("Userspace log server not found" ); |
270 | else |
271 | DMERR("Userspace log server failed to create log" ); |
272 | goto out; |
273 | } |
274 | |
275 | /* Since the region size does not change, get it now */ |
276 | rdata_size = sizeof(rdata); |
277 | r = dm_consult_userspace(uuid: lc->uuid, luid: lc->luid, DM_ULOG_GET_REGION_SIZE, |
278 | NULL, data_size: 0, rdata: (char *)&rdata, rdata_size: &rdata_size); |
279 | |
280 | if (r) { |
281 | DMERR("Failed to get region size of dirty log" ); |
282 | goto out; |
283 | } |
284 | |
285 | lc->region_size = (uint32_t)rdata; |
286 | lc->region_count = dm_sector_div_up(ti->len, lc->region_size); |
287 | |
288 | if (devices_rdata_size) { |
289 | if (devices_rdata[devices_rdata_size - 1] != '\0') { |
290 | DMERR("DM_ULOG_CTR device return string not properly terminated" ); |
291 | r = -EINVAL; |
292 | goto out; |
293 | } |
294 | r = dm_get_device(ti, path: devices_rdata, |
295 | mode: dm_table_get_mode(t: ti->table), result: &lc->log_dev); |
296 | if (r) |
297 | DMERR("Failed to register %s with device-mapper" , |
298 | devices_rdata); |
299 | } |
300 | |
301 | if (lc->integrated_flush) { |
302 | lc->dmlog_wq = alloc_workqueue(fmt: "dmlogd" , flags: WQ_MEM_RECLAIM, max_active: 0); |
303 | if (!lc->dmlog_wq) { |
304 | DMERR("couldn't start dmlogd" ); |
305 | r = -ENOMEM; |
306 | goto out; |
307 | } |
308 | |
309 | INIT_DELAYED_WORK(&lc->flush_log_work, do_flush); |
310 | atomic_set(v: &lc->sched_flush, i: 0); |
311 | } |
312 | |
313 | out: |
314 | kfree(objp: devices_rdata); |
315 | if (r) { |
316 | mempool_exit(pool: &lc->flush_entry_pool); |
317 | kfree(objp: lc); |
318 | kfree(objp: ctr_str); |
319 | } else { |
320 | lc->usr_argv_str = ctr_str; |
321 | log->context = lc; |
322 | } |
323 | |
324 | return r; |
325 | } |
326 | |
327 | static void userspace_dtr(struct dm_dirty_log *log) |
328 | { |
329 | struct log_c *lc = log->context; |
330 | |
331 | if (lc->integrated_flush) { |
332 | /* flush workqueue */ |
333 | if (atomic_read(v: &lc->sched_flush)) |
334 | flush_delayed_work(dwork: &lc->flush_log_work); |
335 | |
336 | destroy_workqueue(wq: lc->dmlog_wq); |
337 | } |
338 | |
339 | (void) dm_consult_userspace(uuid: lc->uuid, luid: lc->luid, DM_ULOG_DTR, |
340 | NULL, data_size: 0, NULL, NULL); |
341 | |
342 | if (lc->log_dev) |
343 | dm_put_device(ti: lc->ti, d: lc->log_dev); |
344 | |
345 | mempool_exit(pool: &lc->flush_entry_pool); |
346 | |
347 | kfree(objp: lc->usr_argv_str); |
348 | kfree(objp: lc); |
349 | } |
350 | |
351 | static int userspace_presuspend(struct dm_dirty_log *log) |
352 | { |
353 | int r; |
354 | struct log_c *lc = log->context; |
355 | |
356 | r = dm_consult_userspace(uuid: lc->uuid, luid: lc->luid, DM_ULOG_PRESUSPEND, |
357 | NULL, data_size: 0, NULL, NULL); |
358 | |
359 | return r; |
360 | } |
361 | |
362 | static int userspace_postsuspend(struct dm_dirty_log *log) |
363 | { |
364 | int r; |
365 | struct log_c *lc = log->context; |
366 | |
367 | /* |
368 | * Run planned flush earlier. |
369 | */ |
370 | if (lc->integrated_flush && atomic_read(v: &lc->sched_flush)) |
371 | flush_delayed_work(dwork: &lc->flush_log_work); |
372 | |
373 | r = dm_consult_userspace(uuid: lc->uuid, luid: lc->luid, DM_ULOG_POSTSUSPEND, |
374 | NULL, data_size: 0, NULL, NULL); |
375 | |
376 | return r; |
377 | } |
378 | |
379 | static int userspace_resume(struct dm_dirty_log *log) |
380 | { |
381 | int r; |
382 | struct log_c *lc = log->context; |
383 | |
384 | lc->in_sync_hint = 0; |
385 | r = dm_consult_userspace(uuid: lc->uuid, luid: lc->luid, DM_ULOG_RESUME, |
386 | NULL, data_size: 0, NULL, NULL); |
387 | |
388 | return r; |
389 | } |
390 | |
391 | static uint32_t userspace_get_region_size(struct dm_dirty_log *log) |
392 | { |
393 | struct log_c *lc = log->context; |
394 | |
395 | return lc->region_size; |
396 | } |
397 | |
398 | /* |
399 | * userspace_is_clean |
400 | * |
401 | * Check whether a region is clean. If there is any sort of |
402 | * failure when consulting the server, we return not clean. |
403 | * |
404 | * Returns: 1 if clean, 0 otherwise |
405 | */ |
406 | static int userspace_is_clean(struct dm_dirty_log *log, region_t region) |
407 | { |
408 | int r; |
409 | uint64_t region64 = (uint64_t)region; |
410 | int64_t is_clean; |
411 | size_t rdata_size; |
412 | struct log_c *lc = log->context; |
413 | |
414 | rdata_size = sizeof(is_clean); |
415 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_IS_CLEAN, |
416 | data: (char *)®ion64, data_size: sizeof(region64), |
417 | rdata: (char *)&is_clean, rdata_size: &rdata_size); |
418 | |
419 | return (r) ? 0 : (int)is_clean; |
420 | } |
421 | |
422 | /* |
423 | * userspace_in_sync |
424 | * |
425 | * Check if the region is in-sync. If there is any sort |
426 | * of failure when consulting the server, we assume that |
427 | * the region is not in sync. |
428 | * |
429 | * If 'can_block' is set, return immediately |
430 | * |
431 | * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK |
432 | */ |
433 | static int userspace_in_sync(struct dm_dirty_log *log, region_t region, |
434 | int can_block) |
435 | { |
436 | int r; |
437 | uint64_t region64 = region; |
438 | int64_t in_sync; |
439 | size_t rdata_size; |
440 | struct log_c *lc = log->context; |
441 | |
442 | /* |
443 | * We can never respond directly - even if in_sync_hint is |
444 | * set. This is because another machine could see a device |
445 | * failure and mark the region out-of-sync. If we don't go |
446 | * to userspace to ask, we might think the region is in-sync |
447 | * and allow a read to pick up data that is stale. (This is |
448 | * very unlikely if a device actually fails; but it is very |
449 | * likely if a connection to one device from one machine fails.) |
450 | * |
451 | * There still might be a problem if the mirror caches the region |
452 | * state as in-sync... but then this call would not be made. So, |
453 | * that is a mirror problem. |
454 | */ |
455 | if (!can_block) |
456 | return -EWOULDBLOCK; |
457 | |
458 | rdata_size = sizeof(in_sync); |
459 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_IN_SYNC, |
460 | data: (char *)®ion64, data_size: sizeof(region64), |
461 | rdata: (char *)&in_sync, rdata_size: &rdata_size); |
462 | return (r) ? 0 : (int)in_sync; |
463 | } |
464 | |
465 | static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) |
466 | { |
467 | int r = 0; |
468 | struct dm_dirty_log_flush_entry *fe; |
469 | |
470 | list_for_each_entry(fe, flush_list, list) { |
471 | r = userspace_do_request(lc, uuid: lc->uuid, request_type: fe->type, |
472 | data: (char *)&fe->region, |
473 | data_size: sizeof(fe->region), |
474 | NULL, NULL); |
475 | if (r) |
476 | break; |
477 | } |
478 | |
479 | return r; |
480 | } |
481 | |
482 | static int flush_by_group(struct log_c *lc, struct list_head *flush_list, |
483 | int flush_with_payload) |
484 | { |
485 | int r = 0; |
486 | int count; |
487 | uint32_t type = 0; |
488 | struct dm_dirty_log_flush_entry *fe, *tmp_fe; |
489 | LIST_HEAD(tmp_list); |
490 | uint64_t group[MAX_FLUSH_GROUP_COUNT]; |
491 | |
492 | /* |
493 | * Group process the requests |
494 | */ |
495 | while (!list_empty(head: flush_list)) { |
496 | count = 0; |
497 | |
498 | list_for_each_entry_safe(fe, tmp_fe, flush_list, list) { |
499 | group[count] = fe->region; |
500 | count++; |
501 | |
502 | list_move(list: &fe->list, head: &tmp_list); |
503 | |
504 | type = fe->type; |
505 | if (count >= MAX_FLUSH_GROUP_COUNT) |
506 | break; |
507 | } |
508 | |
509 | if (flush_with_payload) { |
510 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_FLUSH, |
511 | data: (char *)(group), |
512 | data_size: count * sizeof(uint64_t), |
513 | NULL, NULL); |
514 | /* |
515 | * Integrated flush failed. |
516 | */ |
517 | if (r) |
518 | break; |
519 | } else { |
520 | r = userspace_do_request(lc, uuid: lc->uuid, request_type: type, |
521 | data: (char *)(group), |
522 | data_size: count * sizeof(uint64_t), |
523 | NULL, NULL); |
524 | if (r) { |
525 | /* |
526 | * Group send failed. Attempt one-by-one. |
527 | */ |
528 | list_splice_init(list: &tmp_list, head: flush_list); |
529 | r = flush_one_by_one(lc, flush_list); |
530 | break; |
531 | } |
532 | } |
533 | } |
534 | |
535 | /* |
536 | * Must collect flush_entrys that were successfully processed |
537 | * as a group so that they will be free'd by the caller. |
538 | */ |
539 | list_splice_init(list: &tmp_list, head: flush_list); |
540 | |
541 | return r; |
542 | } |
543 | |
544 | /* |
545 | * userspace_flush |
546 | * |
547 | * This function is ok to block. |
548 | * The flush happens in two stages. First, it sends all |
549 | * clear/mark requests that are on the list. Then it |
550 | * tells the server to commit them. This gives the |
551 | * server a chance to optimise the commit, instead of |
552 | * doing it for every request. |
553 | * |
554 | * Additionally, we could implement another thread that |
555 | * sends the requests up to the server - reducing the |
556 | * load on flush. Then the flush would have less in |
557 | * the list and be responsible for the finishing commit. |
558 | * |
559 | * Returns: 0 on success, < 0 on failure |
560 | */ |
561 | static int userspace_flush(struct dm_dirty_log *log) |
562 | { |
563 | int r = 0; |
564 | unsigned long flags; |
565 | struct log_c *lc = log->context; |
566 | LIST_HEAD(mark_list); |
567 | LIST_HEAD(clear_list); |
568 | int mark_list_is_empty; |
569 | int clear_list_is_empty; |
570 | struct dm_dirty_log_flush_entry *fe, *tmp_fe; |
571 | mempool_t *flush_entry_pool = &lc->flush_entry_pool; |
572 | |
573 | spin_lock_irqsave(&lc->flush_lock, flags); |
574 | list_splice_init(list: &lc->mark_list, head: &mark_list); |
575 | list_splice_init(list: &lc->clear_list, head: &clear_list); |
576 | spin_unlock_irqrestore(lock: &lc->flush_lock, flags); |
577 | |
578 | mark_list_is_empty = list_empty(head: &mark_list); |
579 | clear_list_is_empty = list_empty(head: &clear_list); |
580 | |
581 | if (mark_list_is_empty && clear_list_is_empty) |
582 | return 0; |
583 | |
584 | r = flush_by_group(lc, flush_list: &clear_list, flush_with_payload: 0); |
585 | if (r) |
586 | goto out; |
587 | |
588 | if (!lc->integrated_flush) { |
589 | r = flush_by_group(lc, flush_list: &mark_list, flush_with_payload: 0); |
590 | if (r) |
591 | goto out; |
592 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_FLUSH, |
593 | NULL, data_size: 0, NULL, NULL); |
594 | goto out; |
595 | } |
596 | |
597 | /* |
598 | * Send integrated flush request with mark_list as payload. |
599 | */ |
600 | r = flush_by_group(lc, flush_list: &mark_list, flush_with_payload: 1); |
601 | if (r) |
602 | goto out; |
603 | |
604 | if (mark_list_is_empty && !atomic_read(v: &lc->sched_flush)) { |
605 | /* |
606 | * When there are only clear region requests, |
607 | * we schedule a flush in the future. |
608 | */ |
609 | queue_delayed_work(wq: lc->dmlog_wq, dwork: &lc->flush_log_work, delay: 3 * HZ); |
610 | atomic_set(v: &lc->sched_flush, i: 1); |
611 | } else { |
612 | /* |
613 | * Cancel pending flush because we |
614 | * have already flushed in mark_region. |
615 | */ |
616 | cancel_delayed_work(dwork: &lc->flush_log_work); |
617 | atomic_set(v: &lc->sched_flush, i: 0); |
618 | } |
619 | |
620 | out: |
621 | /* |
622 | * We can safely remove these entries, even after failure. |
623 | * Calling code will receive an error and will know that |
624 | * the log facility has failed. |
625 | */ |
626 | list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) { |
627 | list_del(entry: &fe->list); |
628 | mempool_free(element: fe, pool: flush_entry_pool); |
629 | } |
630 | list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) { |
631 | list_del(entry: &fe->list); |
632 | mempool_free(element: fe, pool: flush_entry_pool); |
633 | } |
634 | |
635 | if (r) |
636 | dm_table_event(t: lc->ti->table); |
637 | |
638 | return r; |
639 | } |
640 | |
641 | /* |
642 | * userspace_mark_region |
643 | * |
644 | * This function should avoid blocking unless absolutely required. |
645 | * (Memory allocation is valid for blocking.) |
646 | */ |
647 | static void userspace_mark_region(struct dm_dirty_log *log, region_t region) |
648 | { |
649 | unsigned long flags; |
650 | struct log_c *lc = log->context; |
651 | struct dm_dirty_log_flush_entry *fe; |
652 | |
653 | /* Wait for an allocation, but _never_ fail */ |
654 | fe = mempool_alloc(pool: &lc->flush_entry_pool, GFP_NOIO); |
655 | BUG_ON(!fe); |
656 | |
657 | spin_lock_irqsave(&lc->flush_lock, flags); |
658 | fe->type = DM_ULOG_MARK_REGION; |
659 | fe->region = region; |
660 | list_add(new: &fe->list, head: &lc->mark_list); |
661 | spin_unlock_irqrestore(lock: &lc->flush_lock, flags); |
662 | } |
663 | |
664 | /* |
665 | * userspace_clear_region |
666 | * |
667 | * This function must not block. |
668 | * So, the alloc can't block. In the worst case, it is ok to |
669 | * fail. It would simply mean we can't clear the region. |
670 | * Does nothing to current sync context, but does mean |
671 | * the region will be re-sync'ed on a reload of the mirror |
672 | * even though it is in-sync. |
673 | */ |
674 | static void userspace_clear_region(struct dm_dirty_log *log, region_t region) |
675 | { |
676 | unsigned long flags; |
677 | struct log_c *lc = log->context; |
678 | struct dm_dirty_log_flush_entry *fe; |
679 | |
680 | /* |
681 | * If we fail to allocate, we skip the clearing of |
682 | * the region. This doesn't hurt us in any way, except |
683 | * to cause the region to be resync'ed when the |
684 | * device is activated next time. |
685 | */ |
686 | fe = mempool_alloc(pool: &lc->flush_entry_pool, GFP_ATOMIC); |
687 | if (!fe) { |
688 | DMERR("Failed to allocate memory to clear region." ); |
689 | return; |
690 | } |
691 | |
692 | spin_lock_irqsave(&lc->flush_lock, flags); |
693 | fe->type = DM_ULOG_CLEAR_REGION; |
694 | fe->region = region; |
695 | list_add(new: &fe->list, head: &lc->clear_list); |
696 | spin_unlock_irqrestore(lock: &lc->flush_lock, flags); |
697 | } |
698 | |
699 | /* |
700 | * userspace_get_resync_work |
701 | * |
702 | * Get a region that needs recovery. It is valid to return |
703 | * an error for this function. |
704 | * |
705 | * Returns: 1 if region filled, 0 if no work, <0 on error |
706 | */ |
707 | static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) |
708 | { |
709 | int r; |
710 | size_t rdata_size; |
711 | struct log_c *lc = log->context; |
712 | struct { |
713 | int64_t i; /* 64-bit for mix arch compatibility */ |
714 | region_t r; |
715 | } pkg; |
716 | |
717 | if (lc->in_sync_hint >= lc->region_count) |
718 | return 0; |
719 | |
720 | rdata_size = sizeof(pkg); |
721 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_GET_RESYNC_WORK, |
722 | NULL, data_size: 0, rdata: (char *)&pkg, rdata_size: &rdata_size); |
723 | |
724 | *region = pkg.r; |
725 | return (r) ? r : (int)pkg.i; |
726 | } |
727 | |
728 | /* |
729 | * userspace_set_region_sync |
730 | * |
731 | * Set the sync status of a given region. This function |
732 | * must not fail. |
733 | */ |
734 | static void userspace_set_region_sync(struct dm_dirty_log *log, |
735 | region_t region, int in_sync) |
736 | { |
737 | struct log_c *lc = log->context; |
738 | struct { |
739 | region_t r; |
740 | int64_t i; |
741 | } pkg; |
742 | |
743 | pkg.r = region; |
744 | pkg.i = (int64_t)in_sync; |
745 | |
746 | (void) userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_SET_REGION_SYNC, |
747 | data: (char *)&pkg, data_size: sizeof(pkg), NULL, NULL); |
748 | |
749 | /* |
750 | * It would be nice to be able to report failures. |
751 | * However, it is easy enough to detect and resolve. |
752 | */ |
753 | } |
754 | |
755 | /* |
756 | * userspace_get_sync_count |
757 | * |
758 | * If there is any sort of failure when consulting the server, |
759 | * we assume that the sync count is zero. |
760 | * |
761 | * Returns: sync count on success, 0 on failure |
762 | */ |
763 | static region_t userspace_get_sync_count(struct dm_dirty_log *log) |
764 | { |
765 | int r; |
766 | size_t rdata_size; |
767 | uint64_t sync_count; |
768 | struct log_c *lc = log->context; |
769 | |
770 | rdata_size = sizeof(sync_count); |
771 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_GET_SYNC_COUNT, |
772 | NULL, data_size: 0, rdata: (char *)&sync_count, rdata_size: &rdata_size); |
773 | |
774 | if (r) |
775 | return 0; |
776 | |
777 | if (sync_count >= lc->region_count) |
778 | lc->in_sync_hint = lc->region_count; |
779 | |
780 | return (region_t)sync_count; |
781 | } |
782 | |
783 | /* |
784 | * userspace_status |
785 | * |
786 | * Returns: amount of space consumed |
787 | */ |
788 | static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, |
789 | char *result, unsigned int maxlen) |
790 | { |
791 | int r = 0; |
792 | char *table_args; |
793 | size_t sz = (size_t)maxlen; |
794 | struct log_c *lc = log->context; |
795 | |
796 | switch (status_type) { |
797 | case STATUSTYPE_INFO: |
798 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_STATUS_INFO, |
799 | NULL, data_size: 0, rdata: result, rdata_size: &sz); |
800 | |
801 | if (r) { |
802 | sz = 0; |
803 | DMEMIT("%s 1 COM_FAILURE" , log->type->name); |
804 | } |
805 | break; |
806 | case STATUSTYPE_TABLE: |
807 | sz = 0; |
808 | table_args = strchr(lc->usr_argv_str, ' '); |
809 | BUG_ON(!table_args); /* There will always be a ' ' */ |
810 | table_args++; |
811 | |
812 | DMEMIT("%s %u %s " , log->type->name, lc->usr_argc, lc->uuid); |
813 | if (lc->integrated_flush) |
814 | DMEMIT("integrated_flush " ); |
815 | DMEMIT("%s " , table_args); |
816 | break; |
817 | case STATUSTYPE_IMA: |
818 | *result = '\0'; |
819 | break; |
820 | } |
821 | return (r) ? 0 : (int)sz; |
822 | } |
823 | |
824 | /* |
825 | * userspace_is_remote_recovering |
826 | * |
827 | * Returns: 1 if region recovering, 0 otherwise |
828 | */ |
829 | static int userspace_is_remote_recovering(struct dm_dirty_log *log, |
830 | region_t region) |
831 | { |
832 | int r; |
833 | uint64_t region64 = region; |
834 | struct log_c *lc = log->context; |
835 | static unsigned long limit; |
836 | struct { |
837 | int64_t is_recovering; |
838 | uint64_t in_sync_hint; |
839 | } pkg; |
840 | size_t rdata_size = sizeof(pkg); |
841 | |
842 | /* |
843 | * Once the mirror has been reported to be in-sync, |
844 | * it will never again ask for recovery work. So, |
845 | * we can safely say there is not a remote machine |
846 | * recovering if the device is in-sync. (in_sync_hint |
847 | * must be reset at resume time.) |
848 | */ |
849 | if (region < lc->in_sync_hint) |
850 | return 0; |
851 | else if (time_after(limit, jiffies)) |
852 | return 1; |
853 | |
854 | limit = jiffies + (HZ / 4); |
855 | r = userspace_do_request(lc, uuid: lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, |
856 | data: (char *)®ion64, data_size: sizeof(region64), |
857 | rdata: (char *)&pkg, rdata_size: &rdata_size); |
858 | if (r) |
859 | return 1; |
860 | |
861 | lc->in_sync_hint = pkg.in_sync_hint; |
862 | |
863 | return (int)pkg.is_recovering; |
864 | } |
865 | |
866 | static struct dm_dirty_log_type _userspace_type = { |
867 | .name = "userspace" , |
868 | .module = THIS_MODULE, |
869 | .ctr = userspace_ctr, |
870 | .dtr = userspace_dtr, |
871 | .presuspend = userspace_presuspend, |
872 | .postsuspend = userspace_postsuspend, |
873 | .resume = userspace_resume, |
874 | .get_region_size = userspace_get_region_size, |
875 | .is_clean = userspace_is_clean, |
876 | .in_sync = userspace_in_sync, |
877 | .flush = userspace_flush, |
878 | .mark_region = userspace_mark_region, |
879 | .clear_region = userspace_clear_region, |
880 | .get_resync_work = userspace_get_resync_work, |
881 | .set_region_sync = userspace_set_region_sync, |
882 | .get_sync_count = userspace_get_sync_count, |
883 | .status = userspace_status, |
884 | .is_remote_recovering = userspace_is_remote_recovering, |
885 | }; |
886 | |
887 | static int __init userspace_dirty_log_init(void) |
888 | { |
889 | int r = 0; |
890 | |
891 | _flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0); |
892 | if (!_flush_entry_cache) { |
893 | DMWARN("Unable to create flush_entry_cache: No memory." ); |
894 | return -ENOMEM; |
895 | } |
896 | |
897 | r = dm_ulog_tfr_init(); |
898 | if (r) { |
899 | DMWARN("Unable to initialize userspace log communications" ); |
900 | kmem_cache_destroy(s: _flush_entry_cache); |
901 | return r; |
902 | } |
903 | |
904 | r = dm_dirty_log_type_register(type: &_userspace_type); |
905 | if (r) { |
906 | DMWARN("Couldn't register userspace dirty log type" ); |
907 | dm_ulog_tfr_exit(); |
908 | kmem_cache_destroy(s: _flush_entry_cache); |
909 | return r; |
910 | } |
911 | |
912 | DMINFO("version " DM_LOG_USERSPACE_VSN " loaded" ); |
913 | return 0; |
914 | } |
915 | |
916 | static void __exit userspace_dirty_log_exit(void) |
917 | { |
918 | dm_dirty_log_type_unregister(type: &_userspace_type); |
919 | dm_ulog_tfr_exit(); |
920 | kmem_cache_destroy(s: _flush_entry_cache); |
921 | |
922 | DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded" ); |
923 | } |
924 | |
925 | module_init(userspace_dirty_log_init); |
926 | module_exit(userspace_dirty_log_exit); |
927 | |
928 | MODULE_DESCRIPTION(DM_NAME " userspace dirty log link" ); |
929 | MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>" ); |
930 | MODULE_LICENSE("GPL" ); |
931 | |