1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * fs/inotify_user.c - inotify support for userspace |
4 | * |
5 | * Authors: |
6 | * John McCutchan <ttb@tentacle.dhs.org> |
7 | * Robert Love <rml@novell.com> |
8 | * |
9 | * Copyright (C) 2005 John McCutchan |
10 | * Copyright 2006 Hewlett-Packard Development Company, L.P. |
11 | * |
12 | * Copyright (C) 2009 Eric Paris <Red Hat Inc> |
13 | * inotify was largely rewriten to make use of the fsnotify infrastructure |
14 | */ |
15 | |
16 | #include <linux/file.h> |
17 | #include <linux/fs.h> /* struct inode */ |
18 | #include <linux/fsnotify_backend.h> |
19 | #include <linux/idr.h> |
20 | #include <linux/init.h> /* fs_initcall */ |
21 | #include <linux/inotify.h> |
22 | #include <linux/kernel.h> /* roundup() */ |
23 | #include <linux/namei.h> /* LOOKUP_FOLLOW */ |
24 | #include <linux/sched/signal.h> |
25 | #include <linux/slab.h> /* struct kmem_cache */ |
26 | #include <linux/syscalls.h> |
27 | #include <linux/types.h> |
28 | #include <linux/anon_inodes.h> |
29 | #include <linux/uaccess.h> |
30 | #include <linux/poll.h> |
31 | #include <linux/wait.h> |
32 | #include <linux/memcontrol.h> |
33 | #include <linux/security.h> |
34 | |
35 | #include "inotify.h" |
36 | #include "../fdinfo.h" |
37 | |
38 | #include <asm/ioctls.h> |
39 | |
40 | /* |
41 | * An inotify watch requires allocating an inotify_inode_mark structure as |
42 | * well as pinning the watched inode. Doubling the size of a VFS inode |
43 | * should be more than enough to cover the additional filesystem inode |
44 | * size increase. |
45 | */ |
46 | #define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ |
47 | 2 * sizeof(struct inode)) |
48 | |
49 | /* configurable via /proc/sys/fs/inotify/ */ |
50 | static int inotify_max_queued_events __read_mostly; |
51 | |
52 | struct kmem_cache *inotify_inode_mark_cachep __ro_after_init; |
53 | |
54 | #ifdef CONFIG_SYSCTL |
55 | |
56 | #include <linux/sysctl.h> |
57 | |
58 | static long it_zero = 0; |
59 | static long it_int_max = INT_MAX; |
60 | |
61 | static struct ctl_table inotify_table[] = { |
62 | { |
63 | .procname = "max_user_instances" , |
64 | .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], |
65 | .maxlen = sizeof(long), |
66 | .mode = 0644, |
67 | .proc_handler = proc_doulongvec_minmax, |
68 | .extra1 = &it_zero, |
69 | .extra2 = &it_int_max, |
70 | }, |
71 | { |
72 | .procname = "max_user_watches" , |
73 | .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], |
74 | .maxlen = sizeof(long), |
75 | .mode = 0644, |
76 | .proc_handler = proc_doulongvec_minmax, |
77 | .extra1 = &it_zero, |
78 | .extra2 = &it_int_max, |
79 | }, |
80 | { |
81 | .procname = "max_queued_events" , |
82 | .data = &inotify_max_queued_events, |
83 | .maxlen = sizeof(int), |
84 | .mode = 0644, |
85 | .proc_handler = proc_dointvec_minmax, |
86 | .extra1 = SYSCTL_ZERO |
87 | }, |
88 | }; |
89 | |
90 | static void __init inotify_sysctls_init(void) |
91 | { |
92 | register_sysctl("fs/inotify" , inotify_table); |
93 | } |
94 | |
95 | #else |
96 | #define inotify_sysctls_init() do { } while (0) |
97 | #endif /* CONFIG_SYSCTL */ |
98 | |
99 | static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) |
100 | { |
101 | __u32 mask; |
102 | |
103 | /* |
104 | * Everything should receive events when the inode is unmounted. |
105 | * All directories care about children. |
106 | */ |
107 | mask = (FS_UNMOUNT); |
108 | if (S_ISDIR(inode->i_mode)) |
109 | mask |= FS_EVENT_ON_CHILD; |
110 | |
111 | /* mask off the flags used to open the fd */ |
112 | mask |= (arg & INOTIFY_USER_MASK); |
113 | |
114 | return mask; |
115 | } |
116 | |
117 | #define INOTIFY_MARK_FLAGS \ |
118 | (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) |
119 | |
120 | static inline unsigned int inotify_arg_to_flags(u32 arg) |
121 | { |
122 | unsigned int flags = 0; |
123 | |
124 | if (arg & IN_EXCL_UNLINK) |
125 | flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; |
126 | if (arg & IN_ONESHOT) |
127 | flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; |
128 | |
129 | return flags; |
130 | } |
131 | |
132 | static inline u32 inotify_mask_to_arg(__u32 mask) |
133 | { |
134 | return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | |
135 | IN_Q_OVERFLOW); |
136 | } |
137 | |
138 | /* inotify userspace file descriptor functions */ |
139 | static __poll_t inotify_poll(struct file *file, poll_table *wait) |
140 | { |
141 | struct fsnotify_group *group = file->private_data; |
142 | __poll_t ret = 0; |
143 | |
144 | poll_wait(filp: file, wait_address: &group->notification_waitq, p: wait); |
145 | spin_lock(lock: &group->notification_lock); |
146 | if (!fsnotify_notify_queue_is_empty(group)) |
147 | ret = EPOLLIN | EPOLLRDNORM; |
148 | spin_unlock(lock: &group->notification_lock); |
149 | |
150 | return ret; |
151 | } |
152 | |
153 | static int round_event_name_len(struct fsnotify_event *fsn_event) |
154 | { |
155 | struct inotify_event_info *event; |
156 | |
157 | event = INOTIFY_E(fse: fsn_event); |
158 | if (!event->name_len) |
159 | return 0; |
160 | return roundup(event->name_len + 1, sizeof(struct inotify_event)); |
161 | } |
162 | |
163 | /* |
164 | * Get an inotify_kernel_event if one exists and is small |
165 | * enough to fit in "count". Return an error pointer if |
166 | * not large enough. |
167 | * |
168 | * Called with the group->notification_lock held. |
169 | */ |
170 | static struct fsnotify_event *get_one_event(struct fsnotify_group *group, |
171 | size_t count) |
172 | { |
173 | size_t event_size = sizeof(struct inotify_event); |
174 | struct fsnotify_event *event; |
175 | |
176 | event = fsnotify_peek_first_event(group); |
177 | if (!event) |
178 | return NULL; |
179 | |
180 | pr_debug("%s: group=%p event=%p\n" , __func__, group, event); |
181 | |
182 | event_size += round_event_name_len(fsn_event: event); |
183 | if (event_size > count) |
184 | return ERR_PTR(error: -EINVAL); |
185 | |
186 | /* held the notification_lock the whole time, so this is the |
187 | * same event we peeked above */ |
188 | fsnotify_remove_first_event(group); |
189 | |
190 | return event; |
191 | } |
192 | |
193 | /* |
194 | * Copy an event to user space, returning how much we copied. |
195 | * |
196 | * We already checked that the event size is smaller than the |
197 | * buffer we had in "get_one_event()" above. |
198 | */ |
199 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
200 | struct fsnotify_event *fsn_event, |
201 | char __user *buf) |
202 | { |
203 | struct inotify_event inotify_event; |
204 | struct inotify_event_info *event; |
205 | size_t event_size = sizeof(struct inotify_event); |
206 | size_t name_len; |
207 | size_t pad_name_len; |
208 | |
209 | pr_debug("%s: group=%p event=%p\n" , __func__, group, fsn_event); |
210 | |
211 | event = INOTIFY_E(fse: fsn_event); |
212 | name_len = event->name_len; |
213 | /* |
214 | * round up name length so it is a multiple of event_size |
215 | * plus an extra byte for the terminating '\0'. |
216 | */ |
217 | pad_name_len = round_event_name_len(fsn_event); |
218 | inotify_event.len = pad_name_len; |
219 | inotify_event.mask = inotify_mask_to_arg(mask: event->mask); |
220 | inotify_event.wd = event->wd; |
221 | inotify_event.cookie = event->sync_cookie; |
222 | |
223 | /* send the main event */ |
224 | if (copy_to_user(to: buf, from: &inotify_event, n: event_size)) |
225 | return -EFAULT; |
226 | |
227 | buf += event_size; |
228 | |
229 | /* |
230 | * fsnotify only stores the pathname, so here we have to send the pathname |
231 | * and then pad that pathname out to a multiple of sizeof(inotify_event) |
232 | * with zeros. |
233 | */ |
234 | if (pad_name_len) { |
235 | /* copy the path name */ |
236 | if (copy_to_user(to: buf, from: event->name, n: name_len)) |
237 | return -EFAULT; |
238 | buf += name_len; |
239 | |
240 | /* fill userspace with 0's */ |
241 | if (clear_user(to: buf, n: pad_name_len - name_len)) |
242 | return -EFAULT; |
243 | event_size += pad_name_len; |
244 | } |
245 | |
246 | return event_size; |
247 | } |
248 | |
249 | static ssize_t inotify_read(struct file *file, char __user *buf, |
250 | size_t count, loff_t *pos) |
251 | { |
252 | struct fsnotify_group *group; |
253 | struct fsnotify_event *kevent; |
254 | char __user *start; |
255 | int ret; |
256 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
257 | |
258 | start = buf; |
259 | group = file->private_data; |
260 | |
261 | add_wait_queue(wq_head: &group->notification_waitq, wq_entry: &wait); |
262 | while (1) { |
263 | spin_lock(lock: &group->notification_lock); |
264 | kevent = get_one_event(group, count); |
265 | spin_unlock(lock: &group->notification_lock); |
266 | |
267 | pr_debug("%s: group=%p kevent=%p\n" , __func__, group, kevent); |
268 | |
269 | if (kevent) { |
270 | ret = PTR_ERR(ptr: kevent); |
271 | if (IS_ERR(ptr: kevent)) |
272 | break; |
273 | ret = copy_event_to_user(group, fsn_event: kevent, buf); |
274 | fsnotify_destroy_event(group, event: kevent); |
275 | if (ret < 0) |
276 | break; |
277 | buf += ret; |
278 | count -= ret; |
279 | continue; |
280 | } |
281 | |
282 | ret = -EAGAIN; |
283 | if (file->f_flags & O_NONBLOCK) |
284 | break; |
285 | ret = -ERESTARTSYS; |
286 | if (signal_pending(current)) |
287 | break; |
288 | |
289 | if (start != buf) |
290 | break; |
291 | |
292 | wait_woken(wq_entry: &wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); |
293 | } |
294 | remove_wait_queue(wq_head: &group->notification_waitq, wq_entry: &wait); |
295 | |
296 | if (start != buf && ret != -EFAULT) |
297 | ret = buf - start; |
298 | return ret; |
299 | } |
300 | |
301 | static int inotify_release(struct inode *ignored, struct file *file) |
302 | { |
303 | struct fsnotify_group *group = file->private_data; |
304 | |
305 | pr_debug("%s: group=%p\n" , __func__, group); |
306 | |
307 | /* free this group, matching get was inotify_init->fsnotify_obtain_group */ |
308 | fsnotify_destroy_group(group); |
309 | |
310 | return 0; |
311 | } |
312 | |
313 | static long inotify_ioctl(struct file *file, unsigned int cmd, |
314 | unsigned long arg) |
315 | { |
316 | struct fsnotify_group *group; |
317 | struct fsnotify_event *fsn_event; |
318 | void __user *p; |
319 | int ret = -ENOTTY; |
320 | size_t send_len = 0; |
321 | |
322 | group = file->private_data; |
323 | p = (void __user *) arg; |
324 | |
325 | pr_debug("%s: group=%p cmd=%u\n" , __func__, group, cmd); |
326 | |
327 | switch (cmd) { |
328 | case FIONREAD: |
329 | spin_lock(lock: &group->notification_lock); |
330 | list_for_each_entry(fsn_event, &group->notification_list, |
331 | list) { |
332 | send_len += sizeof(struct inotify_event); |
333 | send_len += round_event_name_len(fsn_event); |
334 | } |
335 | spin_unlock(lock: &group->notification_lock); |
336 | ret = put_user(send_len, (int __user *) p); |
337 | break; |
338 | #ifdef CONFIG_CHECKPOINT_RESTORE |
339 | case INOTIFY_IOC_SETNEXTWD: |
340 | ret = -EINVAL; |
341 | if (arg >= 1 && arg <= INT_MAX) { |
342 | struct inotify_group_private_data *data; |
343 | |
344 | data = &group->inotify_data; |
345 | spin_lock(lock: &data->idr_lock); |
346 | idr_set_cursor(idr: &data->idr, val: (unsigned int)arg); |
347 | spin_unlock(lock: &data->idr_lock); |
348 | ret = 0; |
349 | } |
350 | break; |
351 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
352 | } |
353 | |
354 | return ret; |
355 | } |
356 | |
357 | static const struct file_operations inotify_fops = { |
358 | .show_fdinfo = inotify_show_fdinfo, |
359 | .poll = inotify_poll, |
360 | .read = inotify_read, |
361 | .fasync = fsnotify_fasync, |
362 | .release = inotify_release, |
363 | .unlocked_ioctl = inotify_ioctl, |
364 | .compat_ioctl = inotify_ioctl, |
365 | .llseek = noop_llseek, |
366 | }; |
367 | |
368 | |
369 | /* |
370 | * find_inode - resolve a user-given path to a specific inode |
371 | */ |
372 | static int inotify_find_inode(const char __user *dirname, struct path *path, |
373 | unsigned int flags, __u64 mask) |
374 | { |
375 | int error; |
376 | |
377 | error = user_path_at(AT_FDCWD, name: dirname, flags, path); |
378 | if (error) |
379 | return error; |
380 | /* you can only watch an inode if you have read permissions on it */ |
381 | error = path_permission(path, MAY_READ); |
382 | if (error) { |
383 | path_put(path); |
384 | return error; |
385 | } |
386 | error = security_path_notify(path, mask, |
387 | obj_type: FSNOTIFY_OBJ_TYPE_INODE); |
388 | if (error) |
389 | path_put(path); |
390 | |
391 | return error; |
392 | } |
393 | |
394 | static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, |
395 | struct inotify_inode_mark *i_mark) |
396 | { |
397 | int ret; |
398 | |
399 | idr_preload(GFP_KERNEL); |
400 | spin_lock(lock: idr_lock); |
401 | |
402 | ret = idr_alloc_cyclic(idr, ptr: i_mark, start: 1, end: 0, GFP_NOWAIT); |
403 | if (ret >= 0) { |
404 | /* we added the mark to the idr, take a reference */ |
405 | i_mark->wd = ret; |
406 | fsnotify_get_mark(mark: &i_mark->fsn_mark); |
407 | } |
408 | |
409 | spin_unlock(lock: idr_lock); |
410 | idr_preload_end(); |
411 | return ret < 0 ? ret : 0; |
412 | } |
413 | |
414 | static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, |
415 | int wd) |
416 | { |
417 | struct idr *idr = &group->inotify_data.idr; |
418 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; |
419 | struct inotify_inode_mark *i_mark; |
420 | |
421 | assert_spin_locked(idr_lock); |
422 | |
423 | i_mark = idr_find(idr, id: wd); |
424 | if (i_mark) { |
425 | struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark; |
426 | |
427 | fsnotify_get_mark(mark: fsn_mark); |
428 | /* One ref for being in the idr, one ref we just took */ |
429 | BUG_ON(refcount_read(&fsn_mark->refcnt) < 2); |
430 | } |
431 | |
432 | return i_mark; |
433 | } |
434 | |
435 | static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, |
436 | int wd) |
437 | { |
438 | struct inotify_inode_mark *i_mark; |
439 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; |
440 | |
441 | spin_lock(lock: idr_lock); |
442 | i_mark = inotify_idr_find_locked(group, wd); |
443 | spin_unlock(lock: idr_lock); |
444 | |
445 | return i_mark; |
446 | } |
447 | |
448 | /* |
449 | * Remove the mark from the idr (if present) and drop the reference |
450 | * on the mark because it was in the idr. |
451 | */ |
452 | static void inotify_remove_from_idr(struct fsnotify_group *group, |
453 | struct inotify_inode_mark *i_mark) |
454 | { |
455 | struct idr *idr = &group->inotify_data.idr; |
456 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; |
457 | struct inotify_inode_mark *found_i_mark = NULL; |
458 | int wd; |
459 | |
460 | spin_lock(lock: idr_lock); |
461 | wd = i_mark->wd; |
462 | |
463 | /* |
464 | * does this i_mark think it is in the idr? we shouldn't get called |
465 | * if it wasn't.... |
466 | */ |
467 | if (wd == -1) { |
468 | WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n" , |
469 | __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); |
470 | goto out; |
471 | } |
472 | |
473 | /* Lets look in the idr to see if we find it */ |
474 | found_i_mark = inotify_idr_find_locked(group, wd); |
475 | if (unlikely(!found_i_mark)) { |
476 | WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n" , |
477 | __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); |
478 | goto out; |
479 | } |
480 | |
481 | /* |
482 | * We found an mark in the idr at the right wd, but it's |
483 | * not the mark we were told to remove. eparis seriously |
484 | * fucked up somewhere. |
485 | */ |
486 | if (unlikely(found_i_mark != i_mark)) { |
487 | WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " |
488 | "found_i_mark=%p found_i_mark->wd=%d " |
489 | "found_i_mark->group=%p\n" , __func__, i_mark, |
490 | i_mark->wd, i_mark->fsn_mark.group, found_i_mark, |
491 | found_i_mark->wd, found_i_mark->fsn_mark.group); |
492 | goto out; |
493 | } |
494 | |
495 | /* |
496 | * One ref for being in the idr |
497 | * one ref grabbed by inotify_idr_find |
498 | */ |
499 | if (unlikely(refcount_read(&i_mark->fsn_mark.refcnt) < 2)) { |
500 | printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n" , |
501 | __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); |
502 | /* we can't really recover with bad ref cnting.. */ |
503 | BUG(); |
504 | } |
505 | |
506 | idr_remove(idr, id: wd); |
507 | /* Removed from the idr, drop that ref. */ |
508 | fsnotify_put_mark(mark: &i_mark->fsn_mark); |
509 | out: |
510 | i_mark->wd = -1; |
511 | spin_unlock(lock: idr_lock); |
512 | /* match the ref taken by inotify_idr_find_locked() */ |
513 | if (found_i_mark) |
514 | fsnotify_put_mark(mark: &found_i_mark->fsn_mark); |
515 | } |
516 | |
517 | /* |
518 | * Send IN_IGNORED for this wd, remove this wd from the idr. |
519 | */ |
520 | void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, |
521 | struct fsnotify_group *group) |
522 | { |
523 | struct inotify_inode_mark *i_mark; |
524 | |
525 | /* Queue ignore event for the watch */ |
526 | inotify_handle_inode_event(inode_mark: fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, |
527 | cookie: 0); |
528 | |
529 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); |
530 | /* remove this mark from the idr */ |
531 | inotify_remove_from_idr(group, i_mark); |
532 | |
533 | dec_inotify_watches(ucounts: group->inotify_data.ucounts); |
534 | } |
535 | |
536 | static int inotify_update_existing_watch(struct fsnotify_group *group, |
537 | struct inode *inode, |
538 | u32 arg) |
539 | { |
540 | struct fsnotify_mark *fsn_mark; |
541 | struct inotify_inode_mark *i_mark; |
542 | __u32 old_mask, new_mask; |
543 | int replace = !(arg & IN_MASK_ADD); |
544 | int create = (arg & IN_MASK_CREATE); |
545 | int ret; |
546 | |
547 | fsn_mark = fsnotify_find_mark(connp: &inode->i_fsnotify_marks, group); |
548 | if (!fsn_mark) |
549 | return -ENOENT; |
550 | else if (create) { |
551 | ret = -EEXIST; |
552 | goto out; |
553 | } |
554 | |
555 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); |
556 | |
557 | spin_lock(lock: &fsn_mark->lock); |
558 | old_mask = fsn_mark->mask; |
559 | if (replace) { |
560 | fsn_mark->mask = 0; |
561 | fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; |
562 | } |
563 | fsn_mark->mask |= inotify_arg_to_mask(inode, arg); |
564 | fsn_mark->flags |= inotify_arg_to_flags(arg); |
565 | new_mask = fsn_mark->mask; |
566 | spin_unlock(lock: &fsn_mark->lock); |
567 | |
568 | if (old_mask != new_mask) { |
569 | /* more bits in old than in new? */ |
570 | int dropped = (old_mask & ~new_mask); |
571 | /* more bits in this fsn_mark than the inode's mask? */ |
572 | int do_inode = (new_mask & ~inode->i_fsnotify_mask); |
573 | |
574 | /* update the inode with this new fsn_mark */ |
575 | if (dropped || do_inode) |
576 | fsnotify_recalc_mask(conn: inode->i_fsnotify_marks); |
577 | |
578 | } |
579 | |
580 | /* return the wd */ |
581 | ret = i_mark->wd; |
582 | |
583 | out: |
584 | /* match the get from fsnotify_find_mark() */ |
585 | fsnotify_put_mark(mark: fsn_mark); |
586 | |
587 | return ret; |
588 | } |
589 | |
590 | static int inotify_new_watch(struct fsnotify_group *group, |
591 | struct inode *inode, |
592 | u32 arg) |
593 | { |
594 | struct inotify_inode_mark *tmp_i_mark; |
595 | int ret; |
596 | struct idr *idr = &group->inotify_data.idr; |
597 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; |
598 | |
599 | tmp_i_mark = kmem_cache_alloc(cachep: inotify_inode_mark_cachep, GFP_KERNEL); |
600 | if (unlikely(!tmp_i_mark)) |
601 | return -ENOMEM; |
602 | |
603 | fsnotify_init_mark(mark: &tmp_i_mark->fsn_mark, group); |
604 | tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); |
605 | tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); |
606 | tmp_i_mark->wd = -1; |
607 | |
608 | ret = inotify_add_to_idr(idr, idr_lock, i_mark: tmp_i_mark); |
609 | if (ret) |
610 | goto out_err; |
611 | |
612 | /* increment the number of watches the user has */ |
613 | if (!inc_inotify_watches(ucounts: group->inotify_data.ucounts)) { |
614 | inotify_remove_from_idr(group, i_mark: tmp_i_mark); |
615 | ret = -ENOSPC; |
616 | goto out_err; |
617 | } |
618 | |
619 | /* we are on the idr, now get on the inode */ |
620 | ret = fsnotify_add_inode_mark_locked(mark: &tmp_i_mark->fsn_mark, inode, add_flags: 0); |
621 | if (ret) { |
622 | /* we failed to get on the inode, get off the idr */ |
623 | inotify_remove_from_idr(group, i_mark: tmp_i_mark); |
624 | goto out_err; |
625 | } |
626 | |
627 | |
628 | /* return the watch descriptor for this new mark */ |
629 | ret = tmp_i_mark->wd; |
630 | |
631 | out_err: |
632 | /* match the ref from fsnotify_init_mark() */ |
633 | fsnotify_put_mark(mark: &tmp_i_mark->fsn_mark); |
634 | |
635 | return ret; |
636 | } |
637 | |
638 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) |
639 | { |
640 | int ret = 0; |
641 | |
642 | fsnotify_group_lock(group); |
643 | /* try to update and existing watch with the new arg */ |
644 | ret = inotify_update_existing_watch(group, inode, arg); |
645 | /* no mark present, try to add a new one */ |
646 | if (ret == -ENOENT) |
647 | ret = inotify_new_watch(group, inode, arg); |
648 | fsnotify_group_unlock(group); |
649 | |
650 | return ret; |
651 | } |
652 | |
653 | static struct fsnotify_group *inotify_new_group(unsigned int max_events) |
654 | { |
655 | struct fsnotify_group *group; |
656 | struct inotify_event_info *oevent; |
657 | |
658 | group = fsnotify_alloc_group(ops: &inotify_fsnotify_ops, |
659 | FSNOTIFY_GROUP_USER); |
660 | if (IS_ERR(ptr: group)) |
661 | return group; |
662 | |
663 | oevent = kmalloc(size: sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); |
664 | if (unlikely(!oevent)) { |
665 | fsnotify_destroy_group(group); |
666 | return ERR_PTR(error: -ENOMEM); |
667 | } |
668 | group->overflow_event = &oevent->fse; |
669 | fsnotify_init_event(event: group->overflow_event); |
670 | oevent->mask = FS_Q_OVERFLOW; |
671 | oevent->wd = -1; |
672 | oevent->sync_cookie = 0; |
673 | oevent->name_len = 0; |
674 | |
675 | group->max_events = max_events; |
676 | group->memcg = get_mem_cgroup_from_mm(current->mm); |
677 | |
678 | spin_lock_init(&group->inotify_data.idr_lock); |
679 | idr_init(idr: &group->inotify_data.idr); |
680 | group->inotify_data.ucounts = inc_ucount(current_user_ns(), |
681 | current_euid(), |
682 | type: UCOUNT_INOTIFY_INSTANCES); |
683 | |
684 | if (!group->inotify_data.ucounts) { |
685 | fsnotify_destroy_group(group); |
686 | return ERR_PTR(error: -EMFILE); |
687 | } |
688 | |
689 | return group; |
690 | } |
691 | |
692 | |
693 | /* inotify syscalls */ |
694 | static int do_inotify_init(int flags) |
695 | { |
696 | struct fsnotify_group *group; |
697 | int ret; |
698 | |
699 | /* Check the IN_* constants for consistency. */ |
700 | BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC); |
701 | BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK); |
702 | |
703 | if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) |
704 | return -EINVAL; |
705 | |
706 | /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ |
707 | group = inotify_new_group(max_events: inotify_max_queued_events); |
708 | if (IS_ERR(ptr: group)) |
709 | return PTR_ERR(ptr: group); |
710 | |
711 | ret = anon_inode_getfd(name: "inotify" , fops: &inotify_fops, priv: group, |
712 | O_RDONLY | flags); |
713 | if (ret < 0) |
714 | fsnotify_destroy_group(group); |
715 | |
716 | return ret; |
717 | } |
718 | |
719 | SYSCALL_DEFINE1(inotify_init1, int, flags) |
720 | { |
721 | return do_inotify_init(flags); |
722 | } |
723 | |
724 | SYSCALL_DEFINE0(inotify_init) |
725 | { |
726 | return do_inotify_init(flags: 0); |
727 | } |
728 | |
729 | SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, |
730 | u32, mask) |
731 | { |
732 | struct fsnotify_group *group; |
733 | struct inode *inode; |
734 | struct path path; |
735 | struct fd f; |
736 | int ret; |
737 | unsigned flags = 0; |
738 | |
739 | /* |
740 | * We share a lot of code with fs/dnotify. We also share |
741 | * the bit layout between inotify's IN_* and the fsnotify |
742 | * FS_*. This check ensures that only the inotify IN_* |
743 | * bits get passed in and set in watches/events. |
744 | */ |
745 | if (unlikely(mask & ~ALL_INOTIFY_BITS)) |
746 | return -EINVAL; |
747 | /* |
748 | * Require at least one valid bit set in the mask. |
749 | * Without _something_ set, we would have no events to |
750 | * watch for. |
751 | */ |
752 | if (unlikely(!(mask & ALL_INOTIFY_BITS))) |
753 | return -EINVAL; |
754 | |
755 | f = fdget(fd); |
756 | if (unlikely(!f.file)) |
757 | return -EBADF; |
758 | |
759 | /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ |
760 | if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { |
761 | ret = -EINVAL; |
762 | goto fput_and_out; |
763 | } |
764 | |
765 | /* verify that this is indeed an inotify instance */ |
766 | if (unlikely(f.file->f_op != &inotify_fops)) { |
767 | ret = -EINVAL; |
768 | goto fput_and_out; |
769 | } |
770 | |
771 | if (!(mask & IN_DONT_FOLLOW)) |
772 | flags |= LOOKUP_FOLLOW; |
773 | if (mask & IN_ONLYDIR) |
774 | flags |= LOOKUP_DIRECTORY; |
775 | |
776 | ret = inotify_find_inode(dirname: pathname, path: &path, flags, |
777 | mask: (mask & IN_ALL_EVENTS)); |
778 | if (ret) |
779 | goto fput_and_out; |
780 | |
781 | /* inode held in place by reference to path; group by fget on fd */ |
782 | inode = path.dentry->d_inode; |
783 | group = f.file->private_data; |
784 | |
785 | /* create/update an inode mark */ |
786 | ret = inotify_update_watch(group, inode, arg: mask); |
787 | path_put(&path); |
788 | fput_and_out: |
789 | fdput(fd: f); |
790 | return ret; |
791 | } |
792 | |
793 | SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) |
794 | { |
795 | struct fsnotify_group *group; |
796 | struct inotify_inode_mark *i_mark; |
797 | struct fd f; |
798 | int ret = -EINVAL; |
799 | |
800 | f = fdget(fd); |
801 | if (unlikely(!f.file)) |
802 | return -EBADF; |
803 | |
804 | /* verify that this is indeed an inotify instance */ |
805 | if (unlikely(f.file->f_op != &inotify_fops)) |
806 | goto out; |
807 | |
808 | group = f.file->private_data; |
809 | |
810 | i_mark = inotify_idr_find(group, wd); |
811 | if (unlikely(!i_mark)) |
812 | goto out; |
813 | |
814 | ret = 0; |
815 | |
816 | fsnotify_destroy_mark(mark: &i_mark->fsn_mark, group); |
817 | |
818 | /* match ref taken by inotify_idr_find */ |
819 | fsnotify_put_mark(mark: &i_mark->fsn_mark); |
820 | |
821 | out: |
822 | fdput(fd: f); |
823 | return ret; |
824 | } |
825 | |
826 | /* |
827 | * inotify_user_setup - Our initialization function. Note that we cannot return |
828 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here |
829 | * must result in panic(). |
830 | */ |
831 | static int __init inotify_user_setup(void) |
832 | { |
833 | unsigned long watches_max; |
834 | struct sysinfo si; |
835 | |
836 | si_meminfo(val: &si); |
837 | /* |
838 | * Allow up to 1% of addressable memory to be allocated for inotify |
839 | * watches (per user) limited to the range [8192, 1048576]. |
840 | */ |
841 | watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / |
842 | INOTIFY_WATCH_COST; |
843 | watches_max = clamp(watches_max, 8192UL, 1048576UL); |
844 | |
845 | BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); |
846 | BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); |
847 | BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); |
848 | BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); |
849 | BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); |
850 | BUILD_BUG_ON(IN_OPEN != FS_OPEN); |
851 | BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); |
852 | BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); |
853 | BUILD_BUG_ON(IN_CREATE != FS_CREATE); |
854 | BUILD_BUG_ON(IN_DELETE != FS_DELETE); |
855 | BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); |
856 | BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); |
857 | BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); |
858 | BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); |
859 | BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); |
860 | BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); |
861 | |
862 | BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); |
863 | |
864 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, |
865 | SLAB_PANIC|SLAB_ACCOUNT); |
866 | |
867 | inotify_max_queued_events = 16384; |
868 | init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; |
869 | init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; |
870 | inotify_sysctls_init(); |
871 | |
872 | return 0; |
873 | } |
874 | fs_initcall(inotify_user_setup); |
875 | |