1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
4 */
5
6#include <linux/fs.h>
7#include <linux/filelock.h>
8#include <linux/miscdevice.h>
9#include <linux/poll.h>
10#include <linux/dlm.h>
11#include <linux/dlm_plock.h>
12#include <linux/slab.h>
13
14#include <trace/events/dlm.h>
15
16#include "dlm_internal.h"
17#include "lockspace.h"
18
19static DEFINE_SPINLOCK(ops_lock);
20static LIST_HEAD(send_list);
21static LIST_HEAD(recv_list);
22static DECLARE_WAIT_QUEUE_HEAD(send_wq);
23static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
24
25struct plock_async_data {
26 void *fl;
27 void *file;
28 struct file_lock flc;
29 int (*callback)(struct file_lock *fl, int result);
30};
31
32struct plock_op {
33 struct list_head list;
34 int done;
35 struct dlm_plock_info info;
36 /* if set indicates async handling */
37 struct plock_async_data *data;
38};
39
40static inline void set_version(struct dlm_plock_info *info)
41{
42 info->version[0] = DLM_PLOCK_VERSION_MAJOR;
43 info->version[1] = DLM_PLOCK_VERSION_MINOR;
44 info->version[2] = DLM_PLOCK_VERSION_PATCH;
45}
46
47static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info *info)
48{
49 struct plock_op *op = NULL, *iter;
50
51 list_for_each_entry(iter, &recv_list, list) {
52 if (iter->info.fsid == info->fsid &&
53 iter->info.number == info->number &&
54 iter->info.owner == info->owner &&
55 iter->info.pid == info->pid &&
56 iter->info.start == info->start &&
57 iter->info.end == info->end &&
58 iter->info.ex == info->ex &&
59 iter->info.wait) {
60 op = iter;
61 break;
62 }
63 }
64
65 return op;
66}
67
68static int check_version(struct dlm_plock_info *info)
69{
70 if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
71 (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
72 log_print("plock device version mismatch: "
73 "kernel (%u.%u.%u), user (%u.%u.%u)",
74 DLM_PLOCK_VERSION_MAJOR,
75 DLM_PLOCK_VERSION_MINOR,
76 DLM_PLOCK_VERSION_PATCH,
77 info->version[0],
78 info->version[1],
79 info->version[2]);
80 return -EINVAL;
81 }
82 return 0;
83}
84
85static void dlm_release_plock_op(struct plock_op *op)
86{
87 kfree(objp: op->data);
88 kfree(objp: op);
89}
90
91static void send_op(struct plock_op *op)
92{
93 set_version(&op->info);
94 spin_lock(lock: &ops_lock);
95 list_add_tail(new: &op->list, head: &send_list);
96 spin_unlock(lock: &ops_lock);
97 wake_up(&send_wq);
98}
99
100static int do_lock_cancel(const struct dlm_plock_info *orig_info)
101{
102 struct plock_op *op;
103 int rv;
104
105 op = kzalloc(size: sizeof(*op), GFP_NOFS);
106 if (!op)
107 return -ENOMEM;
108
109 op->info = *orig_info;
110 op->info.optype = DLM_PLOCK_OP_CANCEL;
111 op->info.wait = 0;
112
113 send_op(op);
114 wait_event(recv_wq, (op->done != 0));
115
116 rv = op->info.rv;
117
118 dlm_release_plock_op(op);
119 return rv;
120}
121
122int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
123 int cmd, struct file_lock *fl)
124{
125 struct plock_async_data *op_data;
126 struct dlm_ls *ls;
127 struct plock_op *op;
128 int rv;
129
130 ls = dlm_find_lockspace_local(id: lockspace);
131 if (!ls)
132 return -EINVAL;
133
134 op = kzalloc(size: sizeof(*op), GFP_NOFS);
135 if (!op) {
136 rv = -ENOMEM;
137 goto out;
138 }
139
140 op->info.optype = DLM_PLOCK_OP_LOCK;
141 op->info.pid = fl->fl_pid;
142 op->info.ex = (fl->fl_type == F_WRLCK);
143 op->info.wait = IS_SETLKW(cmd);
144 op->info.fsid = ls->ls_global_id;
145 op->info.number = number;
146 op->info.start = fl->fl_start;
147 op->info.end = fl->fl_end;
148 /* async handling */
149 if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
150 op_data = kzalloc(size: sizeof(*op_data), GFP_NOFS);
151 if (!op_data) {
152 dlm_release_plock_op(op);
153 rv = -ENOMEM;
154 goto out;
155 }
156
157 /* fl_owner is lockd which doesn't distinguish
158 processes on the nfs client */
159 op->info.owner = (__u64) fl->fl_pid;
160 op_data->callback = fl->fl_lmops->lm_grant;
161 locks_init_lock(&op_data->flc);
162 locks_copy_lock(&op_data->flc, fl);
163 op_data->fl = fl;
164 op_data->file = file;
165
166 op->data = op_data;
167
168 send_op(op);
169 rv = FILE_LOCK_DEFERRED;
170 goto out;
171 } else {
172 op->info.owner = (__u64)(long) fl->fl_owner;
173 }
174
175 send_op(op);
176
177 if (op->info.wait) {
178 rv = wait_event_interruptible(recv_wq, (op->done != 0));
179 if (rv == -ERESTARTSYS) {
180 spin_lock(lock: &ops_lock);
181 /* recheck under ops_lock if we got a done != 0,
182 * if so this interrupt case should be ignored
183 */
184 if (op->done != 0) {
185 spin_unlock(lock: &ops_lock);
186 goto do_lock_wait;
187 }
188 spin_unlock(lock: &ops_lock);
189
190 rv = do_lock_cancel(orig_info: &op->info);
191 switch (rv) {
192 case 0:
193 /* waiter was deleted in user space, answer will never come
194 * remove original request. The original request must be
195 * on recv_list because the answer of do_lock_cancel()
196 * synchronized it.
197 */
198 spin_lock(lock: &ops_lock);
199 list_del(entry: &op->list);
200 spin_unlock(lock: &ops_lock);
201 rv = -EINTR;
202 break;
203 case -ENOENT:
204 /* cancellation wasn't successful but op should be done */
205 fallthrough;
206 default:
207 /* internal error doing cancel we need to wait */
208 goto wait;
209 }
210
211 log_debug(ls, "%s: wait interrupted %x %llx pid %d",
212 __func__, ls->ls_global_id,
213 (unsigned long long)number, op->info.pid);
214 dlm_release_plock_op(op);
215 goto out;
216 }
217 } else {
218wait:
219 wait_event(recv_wq, (op->done != 0));
220 }
221
222do_lock_wait:
223
224 WARN_ON(!list_empty(&op->list));
225
226 rv = op->info.rv;
227
228 if (!rv) {
229 if (locks_lock_file_wait(filp: file, fl) < 0)
230 log_error(ls, "dlm_posix_lock: vfs lock error %llx",
231 (unsigned long long)number);
232 }
233
234 dlm_release_plock_op(op);
235out:
236 dlm_put_lockspace(ls);
237 return rv;
238}
239EXPORT_SYMBOL_GPL(dlm_posix_lock);
240
241/* Returns failure iff a successful lock operation should be canceled */
242static int dlm_plock_callback(struct plock_op *op)
243{
244 struct plock_async_data *op_data = op->data;
245 struct file *file;
246 struct file_lock *fl;
247 struct file_lock *flc;
248 int (*notify)(struct file_lock *fl, int result) = NULL;
249 int rv = 0;
250
251 WARN_ON(!list_empty(&op->list));
252
253 /* check if the following 2 are still valid or make a copy */
254 file = op_data->file;
255 flc = &op_data->flc;
256 fl = op_data->fl;
257 notify = op_data->callback;
258
259 if (op->info.rv) {
260 notify(fl, op->info.rv);
261 goto out;
262 }
263
264 /* got fs lock; bookkeep locally as well: */
265 flc->fl_flags &= ~FL_SLEEP;
266 if (posix_lock_file(file, flc, NULL)) {
267 /*
268 * This can only happen in the case of kmalloc() failure.
269 * The filesystem's own lock is the authoritative lock,
270 * so a failure to get the lock locally is not a disaster.
271 * As long as the fs cannot reliably cancel locks (especially
272 * in a low-memory situation), we're better off ignoring
273 * this failure than trying to recover.
274 */
275 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
276 (unsigned long long)op->info.number, file, fl);
277 }
278
279 rv = notify(fl, 0);
280 if (rv) {
281 /* XXX: We need to cancel the fs lock here: */
282 log_print("%s: lock granted after lock request failed; dangling lock!",
283 __func__);
284 goto out;
285 }
286
287out:
288 dlm_release_plock_op(op);
289 return rv;
290}
291
292int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
293 struct file_lock *fl)
294{
295 struct dlm_ls *ls;
296 struct plock_op *op;
297 int rv;
298 unsigned char fl_flags = fl->fl_flags;
299
300 ls = dlm_find_lockspace_local(id: lockspace);
301 if (!ls)
302 return -EINVAL;
303
304 op = kzalloc(size: sizeof(*op), GFP_NOFS);
305 if (!op) {
306 rv = -ENOMEM;
307 goto out;
308 }
309
310 /* cause the vfs unlock to return ENOENT if lock is not found */
311 fl->fl_flags |= FL_EXISTS;
312
313 rv = locks_lock_file_wait(filp: file, fl);
314 if (rv == -ENOENT) {
315 rv = 0;
316 goto out_free;
317 }
318 if (rv < 0) {
319 log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
320 rv, (unsigned long long)number);
321 }
322
323 op->info.optype = DLM_PLOCK_OP_UNLOCK;
324 op->info.pid = fl->fl_pid;
325 op->info.fsid = ls->ls_global_id;
326 op->info.number = number;
327 op->info.start = fl->fl_start;
328 op->info.end = fl->fl_end;
329 if (fl->fl_lmops && fl->fl_lmops->lm_grant)
330 op->info.owner = (__u64) fl->fl_pid;
331 else
332 op->info.owner = (__u64)(long) fl->fl_owner;
333
334 if (fl->fl_flags & FL_CLOSE) {
335 op->info.flags |= DLM_PLOCK_FL_CLOSE;
336 send_op(op);
337 rv = 0;
338 goto out;
339 }
340
341 send_op(op);
342 wait_event(recv_wq, (op->done != 0));
343
344 WARN_ON(!list_empty(&op->list));
345
346 rv = op->info.rv;
347
348 if (rv == -ENOENT)
349 rv = 0;
350
351out_free:
352 dlm_release_plock_op(op);
353out:
354 dlm_put_lockspace(ls);
355 fl->fl_flags = fl_flags;
356 return rv;
357}
358EXPORT_SYMBOL_GPL(dlm_posix_unlock);
359
360/*
361 * NOTE: This implementation can only handle async lock requests as nfs
362 * do it. It cannot handle cancellation of a pending lock request sitting
363 * in wait_event(), but for now only nfs is the only user local kernel
364 * user.
365 */
366int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
367 struct file_lock *fl)
368{
369 struct dlm_plock_info info;
370 struct plock_op *op;
371 struct dlm_ls *ls;
372 int rv;
373
374 /* this only works for async request for now and nfs is the only
375 * kernel user right now.
376 */
377 if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant))
378 return -EOPNOTSUPP;
379
380 ls = dlm_find_lockspace_local(id: lockspace);
381 if (!ls)
382 return -EINVAL;
383
384 memset(&info, 0, sizeof(info));
385 info.pid = fl->fl_pid;
386 info.ex = (fl->fl_type == F_WRLCK);
387 info.fsid = ls->ls_global_id;
388 dlm_put_lockspace(ls);
389 info.number = number;
390 info.start = fl->fl_start;
391 info.end = fl->fl_end;
392 info.owner = (__u64)fl->fl_pid;
393
394 rv = do_lock_cancel(orig_info: &info);
395 switch (rv) {
396 case 0:
397 spin_lock(lock: &ops_lock);
398 /* lock request to cancel must be on recv_list because
399 * do_lock_cancel() synchronizes it.
400 */
401 op = plock_lookup_waiter(info: &info);
402 if (WARN_ON_ONCE(!op)) {
403 spin_unlock(lock: &ops_lock);
404 rv = -ENOLCK;
405 break;
406 }
407
408 list_del(entry: &op->list);
409 spin_unlock(lock: &ops_lock);
410 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
411 op->data->callback(op->data->fl, -EINTR);
412 dlm_release_plock_op(op);
413 rv = -EINTR;
414 break;
415 case -ENOENT:
416 /* if cancel wasn't successful we probably were to late
417 * or it was a non-blocking lock request, so just unlock it.
418 */
419 rv = dlm_posix_unlock(lockspace, number, file, fl);
420 break;
421 default:
422 break;
423 }
424
425 return rv;
426}
427EXPORT_SYMBOL_GPL(dlm_posix_cancel);
428
429int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
430 struct file_lock *fl)
431{
432 struct dlm_ls *ls;
433 struct plock_op *op;
434 int rv;
435
436 ls = dlm_find_lockspace_local(id: lockspace);
437 if (!ls)
438 return -EINVAL;
439
440 op = kzalloc(size: sizeof(*op), GFP_NOFS);
441 if (!op) {
442 rv = -ENOMEM;
443 goto out;
444 }
445
446 op->info.optype = DLM_PLOCK_OP_GET;
447 op->info.pid = fl->fl_pid;
448 op->info.ex = (fl->fl_type == F_WRLCK);
449 op->info.fsid = ls->ls_global_id;
450 op->info.number = number;
451 op->info.start = fl->fl_start;
452 op->info.end = fl->fl_end;
453 if (fl->fl_lmops && fl->fl_lmops->lm_grant)
454 op->info.owner = (__u64) fl->fl_pid;
455 else
456 op->info.owner = (__u64)(long) fl->fl_owner;
457
458 send_op(op);
459 wait_event(recv_wq, (op->done != 0));
460
461 WARN_ON(!list_empty(&op->list));
462
463 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
464 -ENOENT if there are no locks on the file */
465
466 rv = op->info.rv;
467
468 fl->fl_type = F_UNLCK;
469 if (rv == -ENOENT)
470 rv = 0;
471 else if (rv > 0) {
472 locks_init_lock(fl);
473 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
474 fl->fl_flags = FL_POSIX;
475 fl->fl_pid = op->info.pid;
476 if (op->info.nodeid != dlm_our_nodeid())
477 fl->fl_pid = -fl->fl_pid;
478 fl->fl_start = op->info.start;
479 fl->fl_end = op->info.end;
480 rv = 0;
481 }
482
483 dlm_release_plock_op(op);
484out:
485 dlm_put_lockspace(ls);
486 return rv;
487}
488EXPORT_SYMBOL_GPL(dlm_posix_get);
489
490/* a read copies out one plock request from the send list */
491static ssize_t dev_read(struct file *file, char __user *u, size_t count,
492 loff_t *ppos)
493{
494 struct dlm_plock_info info;
495 struct plock_op *op = NULL;
496
497 if (count < sizeof(info))
498 return -EINVAL;
499
500 spin_lock(lock: &ops_lock);
501 if (!list_empty(head: &send_list)) {
502 op = list_first_entry(&send_list, struct plock_op, list);
503 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
504 list_del(entry: &op->list);
505 else
506 list_move_tail(list: &op->list, head: &recv_list);
507 memcpy(&info, &op->info, sizeof(info));
508 }
509 spin_unlock(lock: &ops_lock);
510
511 if (!op)
512 return -EAGAIN;
513
514 trace_dlm_plock_read(info: &info);
515
516 /* there is no need to get a reply from userspace for unlocks
517 that were generated by the vfs cleaning up for a close
518 (the process did not make an unlock call). */
519
520 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
521 dlm_release_plock_op(op);
522
523 if (copy_to_user(to: u, from: &info, n: sizeof(info)))
524 return -EFAULT;
525 return sizeof(info);
526}
527
528/* a write copies in one plock result that should match a plock_op
529 on the recv list */
530static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
531 loff_t *ppos)
532{
533 struct plock_op *op = NULL, *iter;
534 struct dlm_plock_info info;
535 int do_callback = 0;
536
537 if (count != sizeof(info))
538 return -EINVAL;
539
540 if (copy_from_user(to: &info, from: u, n: sizeof(info)))
541 return -EFAULT;
542
543 trace_dlm_plock_write(info: &info);
544
545 if (check_version(info: &info))
546 return -EINVAL;
547
548 /*
549 * The results for waiting ops (SETLKW) can be returned in any
550 * order, so match all fields to find the op. The results for
551 * non-waiting ops are returned in the order that they were sent
552 * to userspace, so match the result with the first non-waiting op.
553 */
554 spin_lock(lock: &ops_lock);
555 if (info.wait) {
556 op = plock_lookup_waiter(info: &info);
557 } else {
558 list_for_each_entry(iter, &recv_list, list) {
559 if (!iter->info.wait &&
560 iter->info.fsid == info.fsid) {
561 op = iter;
562 break;
563 }
564 }
565 }
566
567 if (op) {
568 /* Sanity check that op and info match. */
569 if (info.wait)
570 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
571 else
572 WARN_ON(op->info.number != info.number ||
573 op->info.owner != info.owner ||
574 op->info.optype != info.optype);
575
576 list_del_init(entry: &op->list);
577 memcpy(&op->info, &info, sizeof(info));
578 if (op->data)
579 do_callback = 1;
580 else
581 op->done = 1;
582 }
583 spin_unlock(lock: &ops_lock);
584
585 if (op) {
586 if (do_callback)
587 dlm_plock_callback(op);
588 else
589 wake_up(&recv_wq);
590 } else
591 pr_debug("%s: no op %x %llx", __func__,
592 info.fsid, (unsigned long long)info.number);
593 return count;
594}
595
596static __poll_t dev_poll(struct file *file, poll_table *wait)
597{
598 __poll_t mask = 0;
599
600 poll_wait(filp: file, wait_address: &send_wq, p: wait);
601
602 spin_lock(lock: &ops_lock);
603 if (!list_empty(head: &send_list))
604 mask = EPOLLIN | EPOLLRDNORM;
605 spin_unlock(lock: &ops_lock);
606
607 return mask;
608}
609
610static const struct file_operations dev_fops = {
611 .read = dev_read,
612 .write = dev_write,
613 .poll = dev_poll,
614 .owner = THIS_MODULE,
615 .llseek = noop_llseek,
616};
617
618static struct miscdevice plock_dev_misc = {
619 .minor = MISC_DYNAMIC_MINOR,
620 .name = DLM_PLOCK_MISC_NAME,
621 .fops = &dev_fops
622};
623
624int dlm_plock_init(void)
625{
626 int rv;
627
628 rv = misc_register(misc: &plock_dev_misc);
629 if (rv)
630 log_print("dlm_plock_init: misc_register failed %d", rv);
631 return rv;
632}
633
634void dlm_plock_exit(void)
635{
636 misc_deregister(misc: &plock_dev_misc);
637 WARN_ON(!list_empty(&send_list));
638 WARN_ON(!list_empty(&recv_list));
639}
640
641

source code of linux/fs/dlm/plock.c