1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/fanotify.h> |
3 | #include <linux/fcntl.h> |
4 | #include <linux/fdtable.h> |
5 | #include <linux/file.h> |
6 | #include <linux/fs.h> |
7 | #include <linux/anon_inodes.h> |
8 | #include <linux/fsnotify_backend.h> |
9 | #include <linux/init.h> |
10 | #include <linux/mount.h> |
11 | #include <linux/namei.h> |
12 | #include <linux/poll.h> |
13 | #include <linux/security.h> |
14 | #include <linux/syscalls.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/types.h> |
17 | #include <linux/uaccess.h> |
18 | #include <linux/compat.h> |
19 | #include <linux/sched/signal.h> |
20 | #include <linux/memcontrol.h> |
21 | #include <linux/statfs.h> |
22 | #include <linux/exportfs.h> |
23 | |
24 | #include <asm/ioctls.h> |
25 | |
26 | #include "../fsnotify.h" |
27 | #include "../fdinfo.h" |
28 | #include "fanotify.h" |
29 | |
30 | #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 |
31 | #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 |
32 | #define FANOTIFY_DEFAULT_MAX_GROUPS 128 |
33 | #define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 |
34 | |
35 | /* |
36 | * Legacy fanotify marks limits (8192) is per group and we introduced a tunable |
37 | * limit of marks per user, similar to inotify. Effectively, the legacy limit |
38 | * of fanotify marks per user is <max marks per group> * <max groups per user>. |
39 | * This default limit (1M) also happens to match the increased limit of inotify |
40 | * max_user_watches since v5.10. |
41 | */ |
42 | #define FANOTIFY_DEFAULT_MAX_USER_MARKS \ |
43 | (FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS) |
44 | |
45 | /* |
46 | * Most of the memory cost of adding an inode mark is pinning the marked inode. |
47 | * The size of the filesystem inode struct is not uniform across filesystems, |
48 | * so double the size of a VFS inode is used as a conservative approximation. |
49 | */ |
50 | #define INODE_MARK_COST (2 * sizeof(struct inode)) |
51 | |
52 | /* configurable via /proc/sys/fs/fanotify/ */ |
53 | static int fanotify_max_queued_events __read_mostly; |
54 | |
55 | #ifdef CONFIG_SYSCTL |
56 | |
57 | #include <linux/sysctl.h> |
58 | |
59 | static long ft_zero = 0; |
60 | static long ft_int_max = INT_MAX; |
61 | |
62 | static struct ctl_table fanotify_table[] = { |
63 | { |
64 | .procname = "max_user_groups" , |
65 | .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], |
66 | .maxlen = sizeof(long), |
67 | .mode = 0644, |
68 | .proc_handler = proc_doulongvec_minmax, |
69 | .extra1 = &ft_zero, |
70 | .extra2 = &ft_int_max, |
71 | }, |
72 | { |
73 | .procname = "max_user_marks" , |
74 | .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], |
75 | .maxlen = sizeof(long), |
76 | .mode = 0644, |
77 | .proc_handler = proc_doulongvec_minmax, |
78 | .extra1 = &ft_zero, |
79 | .extra2 = &ft_int_max, |
80 | }, |
81 | { |
82 | .procname = "max_queued_events" , |
83 | .data = &fanotify_max_queued_events, |
84 | .maxlen = sizeof(int), |
85 | .mode = 0644, |
86 | .proc_handler = proc_dointvec_minmax, |
87 | .extra1 = SYSCTL_ZERO |
88 | }, |
89 | }; |
90 | |
91 | static void __init fanotify_sysctls_init(void) |
92 | { |
93 | register_sysctl("fs/fanotify" , fanotify_table); |
94 | } |
95 | #else |
96 | #define fanotify_sysctls_init() do { } while (0) |
97 | #endif /* CONFIG_SYSCTL */ |
98 | |
99 | /* |
100 | * All flags that may be specified in parameter event_f_flags of fanotify_init. |
101 | * |
102 | * Internal and external open flags are stored together in field f_flags of |
103 | * struct file. Only external open flags shall be allowed in event_f_flags. |
104 | * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be |
105 | * excluded. |
106 | */ |
107 | #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ |
108 | O_ACCMODE | O_APPEND | O_NONBLOCK | \ |
109 | __O_SYNC | O_DSYNC | O_CLOEXEC | \ |
110 | O_LARGEFILE | O_NOATIME ) |
111 | |
112 | extern const struct fsnotify_ops fanotify_fsnotify_ops; |
113 | |
114 | struct kmem_cache *fanotify_mark_cache __ro_after_init; |
115 | struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; |
116 | struct kmem_cache *fanotify_path_event_cachep __ro_after_init; |
117 | struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; |
118 | |
119 | #define FANOTIFY_EVENT_ALIGN 4 |
120 | #define FANOTIFY_FID_INFO_HDR_LEN \ |
121 | (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) |
122 | #define FANOTIFY_PIDFD_INFO_HDR_LEN \ |
123 | sizeof(struct fanotify_event_info_pidfd) |
124 | #define FANOTIFY_ERROR_INFO_LEN \ |
125 | (sizeof(struct fanotify_event_info_error)) |
126 | |
127 | static int fanotify_fid_info_len(int fh_len, int name_len) |
128 | { |
129 | int info_len = fh_len; |
130 | |
131 | if (name_len) |
132 | info_len += name_len + 1; |
133 | |
134 | return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len, |
135 | FANOTIFY_EVENT_ALIGN); |
136 | } |
137 | |
138 | /* FAN_RENAME may have one or two dir+name info records */ |
139 | static int fanotify_dir_name_info_len(struct fanotify_event *event) |
140 | { |
141 | struct fanotify_info *info = fanotify_event_info(event); |
142 | int dir_fh_len = fanotify_event_dir_fh_len(event); |
143 | int dir2_fh_len = fanotify_event_dir2_fh_len(event); |
144 | int info_len = 0; |
145 | |
146 | if (dir_fh_len) |
147 | info_len += fanotify_fid_info_len(fh_len: dir_fh_len, |
148 | name_len: info->name_len); |
149 | if (dir2_fh_len) |
150 | info_len += fanotify_fid_info_len(fh_len: dir2_fh_len, |
151 | name_len: info->name2_len); |
152 | |
153 | return info_len; |
154 | } |
155 | |
156 | static size_t fanotify_event_len(unsigned int info_mode, |
157 | struct fanotify_event *event) |
158 | { |
159 | size_t event_len = FAN_EVENT_METADATA_LEN; |
160 | int fh_len; |
161 | int dot_len = 0; |
162 | |
163 | if (!info_mode) |
164 | return event_len; |
165 | |
166 | if (fanotify_is_error_event(mask: event->mask)) |
167 | event_len += FANOTIFY_ERROR_INFO_LEN; |
168 | |
169 | if (fanotify_event_has_any_dir_fh(event)) { |
170 | event_len += fanotify_dir_name_info_len(event); |
171 | } else if ((info_mode & FAN_REPORT_NAME) && |
172 | (event->mask & FAN_ONDIR)) { |
173 | /* |
174 | * With group flag FAN_REPORT_NAME, if name was not recorded in |
175 | * event on a directory, we will report the name ".". |
176 | */ |
177 | dot_len = 1; |
178 | } |
179 | |
180 | if (info_mode & FAN_REPORT_PIDFD) |
181 | event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; |
182 | |
183 | if (fanotify_event_has_object_fh(event)) { |
184 | fh_len = fanotify_event_object_fh_len(event); |
185 | event_len += fanotify_fid_info_len(fh_len, name_len: dot_len); |
186 | } |
187 | |
188 | return event_len; |
189 | } |
190 | |
191 | /* |
192 | * Remove an hashed event from merge hash table. |
193 | */ |
194 | static void fanotify_unhash_event(struct fsnotify_group *group, |
195 | struct fanotify_event *event) |
196 | { |
197 | assert_spin_locked(&group->notification_lock); |
198 | |
199 | pr_debug("%s: group=%p event=%p bucket=%u\n" , __func__, |
200 | group, event, fanotify_event_hash_bucket(group, event)); |
201 | |
202 | if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) |
203 | return; |
204 | |
205 | hlist_del_init(n: &event->merge_list); |
206 | } |
207 | |
208 | /* |
209 | * Get an fanotify notification event if one exists and is small |
210 | * enough to fit in "count". Return an error pointer if the count |
211 | * is not large enough. When permission event is dequeued, its state is |
212 | * updated accordingly. |
213 | */ |
214 | static struct fanotify_event *get_one_event(struct fsnotify_group *group, |
215 | size_t count) |
216 | { |
217 | size_t event_size; |
218 | struct fanotify_event *event = NULL; |
219 | struct fsnotify_event *fsn_event; |
220 | unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); |
221 | |
222 | pr_debug("%s: group=%p count=%zd\n" , __func__, group, count); |
223 | |
224 | spin_lock(lock: &group->notification_lock); |
225 | fsn_event = fsnotify_peek_first_event(group); |
226 | if (!fsn_event) |
227 | goto out; |
228 | |
229 | event = FANOTIFY_E(fse: fsn_event); |
230 | event_size = fanotify_event_len(info_mode, event); |
231 | |
232 | if (event_size > count) { |
233 | event = ERR_PTR(error: -EINVAL); |
234 | goto out; |
235 | } |
236 | |
237 | /* |
238 | * Held the notification_lock the whole time, so this is the |
239 | * same event we peeked above. |
240 | */ |
241 | fsnotify_remove_first_event(group); |
242 | if (fanotify_is_perm_event(mask: event->mask)) |
243 | FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; |
244 | if (fanotify_is_hashed_event(mask: event->mask)) |
245 | fanotify_unhash_event(group, event); |
246 | out: |
247 | spin_unlock(lock: &group->notification_lock); |
248 | return event; |
249 | } |
250 | |
251 | static int create_fd(struct fsnotify_group *group, const struct path *path, |
252 | struct file **file) |
253 | { |
254 | int client_fd; |
255 | struct file *new_file; |
256 | |
257 | client_fd = get_unused_fd_flags(flags: group->fanotify_data.f_flags); |
258 | if (client_fd < 0) |
259 | return client_fd; |
260 | |
261 | /* |
262 | * we need a new file handle for the userspace program so it can read even if it was |
263 | * originally opened O_WRONLY. |
264 | */ |
265 | new_file = dentry_open(path, |
266 | flags: group->fanotify_data.f_flags | __FMODE_NONOTIFY, |
267 | current_cred()); |
268 | if (IS_ERR(ptr: new_file)) { |
269 | /* |
270 | * we still send an event even if we can't open the file. this |
271 | * can happen when say tasks are gone and we try to open their |
272 | * /proc files or we try to open a WRONLY file like in sysfs |
273 | * we just send the errno to userspace since there isn't much |
274 | * else we can do. |
275 | */ |
276 | put_unused_fd(fd: client_fd); |
277 | client_fd = PTR_ERR(ptr: new_file); |
278 | } else { |
279 | *file = new_file; |
280 | } |
281 | |
282 | return client_fd; |
283 | } |
284 | |
285 | static int process_access_response_info(const char __user *info, |
286 | size_t info_len, |
287 | struct fanotify_response_info_audit_rule *friar) |
288 | { |
289 | if (info_len != sizeof(*friar)) |
290 | return -EINVAL; |
291 | |
292 | if (copy_from_user(to: friar, from: info, n: sizeof(*friar))) |
293 | return -EFAULT; |
294 | |
295 | if (friar->hdr.type != FAN_RESPONSE_INFO_AUDIT_RULE) |
296 | return -EINVAL; |
297 | if (friar->hdr.pad != 0) |
298 | return -EINVAL; |
299 | if (friar->hdr.len != sizeof(*friar)) |
300 | return -EINVAL; |
301 | |
302 | return info_len; |
303 | } |
304 | |
305 | /* |
306 | * Finish processing of permission event by setting it to ANSWERED state and |
307 | * drop group->notification_lock. |
308 | */ |
309 | static void finish_permission_event(struct fsnotify_group *group, |
310 | struct fanotify_perm_event *event, u32 response, |
311 | struct fanotify_response_info_audit_rule *friar) |
312 | __releases(&group->notification_lock) |
313 | { |
314 | bool destroy = false; |
315 | |
316 | assert_spin_locked(&group->notification_lock); |
317 | event->response = response & ~FAN_INFO; |
318 | if (response & FAN_INFO) |
319 | memcpy(&event->audit_rule, friar, sizeof(*friar)); |
320 | |
321 | if (event->state == FAN_EVENT_CANCELED) |
322 | destroy = true; |
323 | else |
324 | event->state = FAN_EVENT_ANSWERED; |
325 | spin_unlock(lock: &group->notification_lock); |
326 | if (destroy) |
327 | fsnotify_destroy_event(group, event: &event->fae.fse); |
328 | } |
329 | |
330 | static int process_access_response(struct fsnotify_group *group, |
331 | struct fanotify_response *response_struct, |
332 | const char __user *info, |
333 | size_t info_len) |
334 | { |
335 | struct fanotify_perm_event *event; |
336 | int fd = response_struct->fd; |
337 | u32 response = response_struct->response; |
338 | int ret = info_len; |
339 | struct fanotify_response_info_audit_rule friar; |
340 | |
341 | pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n" , __func__, |
342 | group, fd, response, info, info_len); |
343 | /* |
344 | * make sure the response is valid, if invalid we do nothing and either |
345 | * userspace can send a valid response or we will clean it up after the |
346 | * timeout |
347 | */ |
348 | if (response & ~FANOTIFY_RESPONSE_VALID_MASK) |
349 | return -EINVAL; |
350 | |
351 | switch (response & FANOTIFY_RESPONSE_ACCESS) { |
352 | case FAN_ALLOW: |
353 | case FAN_DENY: |
354 | break; |
355 | default: |
356 | return -EINVAL; |
357 | } |
358 | |
359 | if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) |
360 | return -EINVAL; |
361 | |
362 | if (response & FAN_INFO) { |
363 | ret = process_access_response_info(info, info_len, friar: &friar); |
364 | if (ret < 0) |
365 | return ret; |
366 | if (fd == FAN_NOFD) |
367 | return ret; |
368 | } else { |
369 | ret = 0; |
370 | } |
371 | |
372 | if (fd < 0) |
373 | return -EINVAL; |
374 | |
375 | spin_lock(lock: &group->notification_lock); |
376 | list_for_each_entry(event, &group->fanotify_data.access_list, |
377 | fae.fse.list) { |
378 | if (event->fd != fd) |
379 | continue; |
380 | |
381 | list_del_init(entry: &event->fae.fse.list); |
382 | finish_permission_event(group, event, response, friar: &friar); |
383 | wake_up(&group->fanotify_data.access_waitq); |
384 | return ret; |
385 | } |
386 | spin_unlock(lock: &group->notification_lock); |
387 | |
388 | return -ENOENT; |
389 | } |
390 | |
391 | static size_t copy_error_info_to_user(struct fanotify_event *event, |
392 | char __user *buf, int count) |
393 | { |
394 | struct fanotify_event_info_error info = { }; |
395 | struct fanotify_error_event *fee = FANOTIFY_EE(event); |
396 | |
397 | info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; |
398 | info.hdr.len = FANOTIFY_ERROR_INFO_LEN; |
399 | |
400 | if (WARN_ON(count < info.hdr.len)) |
401 | return -EFAULT; |
402 | |
403 | info.error = fee->error; |
404 | info.error_count = fee->err_count; |
405 | |
406 | if (copy_to_user(to: buf, from: &info, n: sizeof(info))) |
407 | return -EFAULT; |
408 | |
409 | return info.hdr.len; |
410 | } |
411 | |
412 | static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, |
413 | int info_type, const char *name, |
414 | size_t name_len, |
415 | char __user *buf, size_t count) |
416 | { |
417 | struct fanotify_event_info_fid info = { }; |
418 | struct file_handle handle = { }; |
419 | unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; |
420 | size_t fh_len = fh ? fh->len : 0; |
421 | size_t info_len = fanotify_fid_info_len(fh_len, name_len); |
422 | size_t len = info_len; |
423 | |
424 | pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n" , |
425 | __func__, fh_len, name_len, info_len, count); |
426 | |
427 | if (WARN_ON_ONCE(len < sizeof(info) || len > count)) |
428 | return -EFAULT; |
429 | |
430 | /* |
431 | * Copy event info fid header followed by variable sized file handle |
432 | * and optionally followed by variable sized filename. |
433 | */ |
434 | switch (info_type) { |
435 | case FAN_EVENT_INFO_TYPE_FID: |
436 | case FAN_EVENT_INFO_TYPE_DFID: |
437 | if (WARN_ON_ONCE(name_len)) |
438 | return -EFAULT; |
439 | break; |
440 | case FAN_EVENT_INFO_TYPE_DFID_NAME: |
441 | case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: |
442 | case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: |
443 | if (WARN_ON_ONCE(!name || !name_len)) |
444 | return -EFAULT; |
445 | break; |
446 | default: |
447 | return -EFAULT; |
448 | } |
449 | |
450 | info.hdr.info_type = info_type; |
451 | info.hdr.len = len; |
452 | info.fsid = *fsid; |
453 | if (copy_to_user(to: buf, from: &info, n: sizeof(info))) |
454 | return -EFAULT; |
455 | |
456 | buf += sizeof(info); |
457 | len -= sizeof(info); |
458 | if (WARN_ON_ONCE(len < sizeof(handle))) |
459 | return -EFAULT; |
460 | |
461 | handle.handle_type = fh->type; |
462 | handle.handle_bytes = fh_len; |
463 | |
464 | /* Mangle handle_type for bad file_handle */ |
465 | if (!fh_len) |
466 | handle.handle_type = FILEID_INVALID; |
467 | |
468 | if (copy_to_user(to: buf, from: &handle, n: sizeof(handle))) |
469 | return -EFAULT; |
470 | |
471 | buf += sizeof(handle); |
472 | len -= sizeof(handle); |
473 | if (WARN_ON_ONCE(len < fh_len)) |
474 | return -EFAULT; |
475 | |
476 | /* |
477 | * For an inline fh and inline file name, copy through stack to exclude |
478 | * the copy from usercopy hardening protections. |
479 | */ |
480 | fh_buf = fanotify_fh_buf(fh); |
481 | if (fh_len <= FANOTIFY_INLINE_FH_LEN) { |
482 | memcpy(bounce, fh_buf, fh_len); |
483 | fh_buf = bounce; |
484 | } |
485 | if (copy_to_user(to: buf, from: fh_buf, n: fh_len)) |
486 | return -EFAULT; |
487 | |
488 | buf += fh_len; |
489 | len -= fh_len; |
490 | |
491 | if (name_len) { |
492 | /* Copy the filename with terminating null */ |
493 | name_len++; |
494 | if (WARN_ON_ONCE(len < name_len)) |
495 | return -EFAULT; |
496 | |
497 | if (copy_to_user(to: buf, from: name, n: name_len)) |
498 | return -EFAULT; |
499 | |
500 | buf += name_len; |
501 | len -= name_len; |
502 | } |
503 | |
504 | /* Pad with 0's */ |
505 | WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); |
506 | if (len > 0 && clear_user(to: buf, n: len)) |
507 | return -EFAULT; |
508 | |
509 | return info_len; |
510 | } |
511 | |
512 | static int copy_pidfd_info_to_user(int pidfd, |
513 | char __user *buf, |
514 | size_t count) |
515 | { |
516 | struct fanotify_event_info_pidfd info = { }; |
517 | size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN; |
518 | |
519 | if (WARN_ON_ONCE(info_len > count)) |
520 | return -EFAULT; |
521 | |
522 | info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD; |
523 | info.hdr.len = info_len; |
524 | info.pidfd = pidfd; |
525 | |
526 | if (copy_to_user(to: buf, from: &info, n: info_len)) |
527 | return -EFAULT; |
528 | |
529 | return info_len; |
530 | } |
531 | |
532 | static int copy_info_records_to_user(struct fanotify_event *event, |
533 | struct fanotify_info *info, |
534 | unsigned int info_mode, int pidfd, |
535 | char __user *buf, size_t count) |
536 | { |
537 | int ret, total_bytes = 0, info_type = 0; |
538 | unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS; |
539 | unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; |
540 | |
541 | /* |
542 | * Event info records order is as follows: |
543 | * 1. dir fid + name |
544 | * 2. (optional) new dir fid + new name |
545 | * 3. (optional) child fid |
546 | */ |
547 | if (fanotify_event_has_dir_fh(event)) { |
548 | info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : |
549 | FAN_EVENT_INFO_TYPE_DFID; |
550 | |
551 | /* FAN_RENAME uses special info types */ |
552 | if (event->mask & FAN_RENAME) |
553 | info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; |
554 | |
555 | ret = copy_fid_info_to_user(fsid: fanotify_event_fsid(event), |
556 | fh: fanotify_info_dir_fh(info), |
557 | info_type, |
558 | name: fanotify_info_name(info), |
559 | name_len: info->name_len, buf, count); |
560 | if (ret < 0) |
561 | return ret; |
562 | |
563 | buf += ret; |
564 | count -= ret; |
565 | total_bytes += ret; |
566 | } |
567 | |
568 | /* New dir fid+name may be reported in addition to old dir fid+name */ |
569 | if (fanotify_event_has_dir2_fh(event)) { |
570 | info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; |
571 | ret = copy_fid_info_to_user(fsid: fanotify_event_fsid(event), |
572 | fh: fanotify_info_dir2_fh(info), |
573 | info_type, |
574 | name: fanotify_info_name2(info), |
575 | name_len: info->name2_len, buf, count); |
576 | if (ret < 0) |
577 | return ret; |
578 | |
579 | buf += ret; |
580 | count -= ret; |
581 | total_bytes += ret; |
582 | } |
583 | |
584 | if (fanotify_event_has_object_fh(event)) { |
585 | const char *dot = NULL; |
586 | int dot_len = 0; |
587 | |
588 | if (fid_mode == FAN_REPORT_FID || info_type) { |
589 | /* |
590 | * With only group flag FAN_REPORT_FID only type FID is |
591 | * reported. Second info record type is always FID. |
592 | */ |
593 | info_type = FAN_EVENT_INFO_TYPE_FID; |
594 | } else if ((fid_mode & FAN_REPORT_NAME) && |
595 | (event->mask & FAN_ONDIR)) { |
596 | /* |
597 | * With group flag FAN_REPORT_NAME, if name was not |
598 | * recorded in an event on a directory, report the name |
599 | * "." with info type DFID_NAME. |
600 | */ |
601 | info_type = FAN_EVENT_INFO_TYPE_DFID_NAME; |
602 | dot = "." ; |
603 | dot_len = 1; |
604 | } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) || |
605 | (event->mask & FAN_ONDIR)) { |
606 | /* |
607 | * With group flag FAN_REPORT_DIR_FID, a single info |
608 | * record has type DFID for directory entry modification |
609 | * event and for event on a directory. |
610 | */ |
611 | info_type = FAN_EVENT_INFO_TYPE_DFID; |
612 | } else { |
613 | /* |
614 | * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID, |
615 | * a single info record has type FID for event on a |
616 | * non-directory, when there is no directory to report. |
617 | * For example, on FAN_DELETE_SELF event. |
618 | */ |
619 | info_type = FAN_EVENT_INFO_TYPE_FID; |
620 | } |
621 | |
622 | ret = copy_fid_info_to_user(fsid: fanotify_event_fsid(event), |
623 | fh: fanotify_event_object_fh(event), |
624 | info_type, name: dot, name_len: dot_len, |
625 | buf, count); |
626 | if (ret < 0) |
627 | return ret; |
628 | |
629 | buf += ret; |
630 | count -= ret; |
631 | total_bytes += ret; |
632 | } |
633 | |
634 | if (pidfd_mode) { |
635 | ret = copy_pidfd_info_to_user(pidfd, buf, count); |
636 | if (ret < 0) |
637 | return ret; |
638 | |
639 | buf += ret; |
640 | count -= ret; |
641 | total_bytes += ret; |
642 | } |
643 | |
644 | if (fanotify_is_error_event(mask: event->mask)) { |
645 | ret = copy_error_info_to_user(event, buf, count); |
646 | if (ret < 0) |
647 | return ret; |
648 | buf += ret; |
649 | count -= ret; |
650 | total_bytes += ret; |
651 | } |
652 | |
653 | return total_bytes; |
654 | } |
655 | |
656 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
657 | struct fanotify_event *event, |
658 | char __user *buf, size_t count) |
659 | { |
660 | struct fanotify_event_metadata metadata; |
661 | const struct path *path = fanotify_event_path(event); |
662 | struct fanotify_info *info = fanotify_event_info(event); |
663 | unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); |
664 | unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; |
665 | struct file *f = NULL, *pidfd_file = NULL; |
666 | int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD; |
667 | |
668 | pr_debug("%s: group=%p event=%p\n" , __func__, group, event); |
669 | |
670 | metadata.event_len = fanotify_event_len(info_mode, event); |
671 | metadata.metadata_len = FAN_EVENT_METADATA_LEN; |
672 | metadata.vers = FANOTIFY_METADATA_VERSION; |
673 | metadata.reserved = 0; |
674 | metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; |
675 | metadata.pid = pid_vnr(pid: event->pid); |
676 | /* |
677 | * For an unprivileged listener, event->pid can be used to identify the |
678 | * events generated by the listener process itself, without disclosing |
679 | * the pids of other processes. |
680 | */ |
681 | if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && |
682 | task_tgid(current) != event->pid) |
683 | metadata.pid = 0; |
684 | |
685 | /* |
686 | * For now, fid mode is required for an unprivileged listener and |
687 | * fid mode does not report fd in events. Keep this check anyway |
688 | * for safety in case fid mode requirement is relaxed in the future |
689 | * to allow unprivileged listener to get events with no fd and no fid. |
690 | */ |
691 | if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && |
692 | path && path->mnt && path->dentry) { |
693 | fd = create_fd(group, path, file: &f); |
694 | if (fd < 0) |
695 | return fd; |
696 | } |
697 | metadata.fd = fd; |
698 | |
699 | if (pidfd_mode) { |
700 | /* |
701 | * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual |
702 | * exclusion is ever lifted. At the time of incoporating pidfd |
703 | * support within fanotify, the pidfd API only supported the |
704 | * creation of pidfds for thread-group leaders. |
705 | */ |
706 | WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID)); |
707 | |
708 | /* |
709 | * The PIDTYPE_TGID check for an event->pid is performed |
710 | * preemptively in an attempt to catch out cases where the event |
711 | * listener reads events after the event generating process has |
712 | * already terminated. Report FAN_NOPIDFD to the event listener |
713 | * in those cases, with all other pidfd creation errors being |
714 | * reported as FAN_EPIDFD. |
715 | */ |
716 | if (metadata.pid == 0 || |
717 | !pid_has_task(pid: event->pid, type: PIDTYPE_TGID)) { |
718 | pidfd = FAN_NOPIDFD; |
719 | } else { |
720 | pidfd = pidfd_prepare(pid: event->pid, flags: 0, ret: &pidfd_file); |
721 | if (pidfd < 0) |
722 | pidfd = FAN_EPIDFD; |
723 | } |
724 | } |
725 | |
726 | ret = -EFAULT; |
727 | /* |
728 | * Sanity check copy size in case get_one_event() and |
729 | * event_len sizes ever get out of sync. |
730 | */ |
731 | if (WARN_ON_ONCE(metadata.event_len > count)) |
732 | goto out_close_fd; |
733 | |
734 | if (copy_to_user(to: buf, from: &metadata, FAN_EVENT_METADATA_LEN)) |
735 | goto out_close_fd; |
736 | |
737 | buf += FAN_EVENT_METADATA_LEN; |
738 | count -= FAN_EVENT_METADATA_LEN; |
739 | |
740 | if (fanotify_is_perm_event(mask: event->mask)) |
741 | FANOTIFY_PERM(event)->fd = fd; |
742 | |
743 | if (info_mode) { |
744 | ret = copy_info_records_to_user(event, info, info_mode, pidfd, |
745 | buf, count); |
746 | if (ret < 0) |
747 | goto out_close_fd; |
748 | } |
749 | |
750 | if (f) |
751 | fd_install(fd, file: f); |
752 | |
753 | if (pidfd_file) |
754 | fd_install(fd: pidfd, file: pidfd_file); |
755 | |
756 | return metadata.event_len; |
757 | |
758 | out_close_fd: |
759 | if (fd != FAN_NOFD) { |
760 | put_unused_fd(fd); |
761 | fput(f); |
762 | } |
763 | |
764 | if (pidfd >= 0) { |
765 | put_unused_fd(fd: pidfd); |
766 | fput(pidfd_file); |
767 | } |
768 | |
769 | return ret; |
770 | } |
771 | |
772 | /* intofiy userspace file descriptor functions */ |
773 | static __poll_t fanotify_poll(struct file *file, poll_table *wait) |
774 | { |
775 | struct fsnotify_group *group = file->private_data; |
776 | __poll_t ret = 0; |
777 | |
778 | poll_wait(filp: file, wait_address: &group->notification_waitq, p: wait); |
779 | spin_lock(lock: &group->notification_lock); |
780 | if (!fsnotify_notify_queue_is_empty(group)) |
781 | ret = EPOLLIN | EPOLLRDNORM; |
782 | spin_unlock(lock: &group->notification_lock); |
783 | |
784 | return ret; |
785 | } |
786 | |
787 | static ssize_t fanotify_read(struct file *file, char __user *buf, |
788 | size_t count, loff_t *pos) |
789 | { |
790 | struct fsnotify_group *group; |
791 | struct fanotify_event *event; |
792 | char __user *start; |
793 | int ret; |
794 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
795 | |
796 | start = buf; |
797 | group = file->private_data; |
798 | |
799 | pr_debug("%s: group=%p\n" , __func__, group); |
800 | |
801 | add_wait_queue(wq_head: &group->notification_waitq, wq_entry: &wait); |
802 | while (1) { |
803 | /* |
804 | * User can supply arbitrarily large buffer. Avoid softlockups |
805 | * in case there are lots of available events. |
806 | */ |
807 | cond_resched(); |
808 | event = get_one_event(group, count); |
809 | if (IS_ERR(ptr: event)) { |
810 | ret = PTR_ERR(ptr: event); |
811 | break; |
812 | } |
813 | |
814 | if (!event) { |
815 | ret = -EAGAIN; |
816 | if (file->f_flags & O_NONBLOCK) |
817 | break; |
818 | |
819 | ret = -ERESTARTSYS; |
820 | if (signal_pending(current)) |
821 | break; |
822 | |
823 | if (start != buf) |
824 | break; |
825 | |
826 | wait_woken(wq_entry: &wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); |
827 | continue; |
828 | } |
829 | |
830 | ret = copy_event_to_user(group, event, buf, count); |
831 | if (unlikely(ret == -EOPENSTALE)) { |
832 | /* |
833 | * We cannot report events with stale fd so drop it. |
834 | * Setting ret to 0 will continue the event loop and |
835 | * do the right thing if there are no more events to |
836 | * read (i.e. return bytes read, -EAGAIN or wait). |
837 | */ |
838 | ret = 0; |
839 | } |
840 | |
841 | /* |
842 | * Permission events get queued to wait for response. Other |
843 | * events can be destroyed now. |
844 | */ |
845 | if (!fanotify_is_perm_event(mask: event->mask)) { |
846 | fsnotify_destroy_event(group, event: &event->fse); |
847 | } else { |
848 | if (ret <= 0) { |
849 | spin_lock(lock: &group->notification_lock); |
850 | finish_permission_event(group, |
851 | event: FANOTIFY_PERM(event), FAN_DENY, NULL); |
852 | wake_up(&group->fanotify_data.access_waitq); |
853 | } else { |
854 | spin_lock(lock: &group->notification_lock); |
855 | list_add_tail(new: &event->fse.list, |
856 | head: &group->fanotify_data.access_list); |
857 | spin_unlock(lock: &group->notification_lock); |
858 | } |
859 | } |
860 | if (ret < 0) |
861 | break; |
862 | buf += ret; |
863 | count -= ret; |
864 | } |
865 | remove_wait_queue(wq_head: &group->notification_waitq, wq_entry: &wait); |
866 | |
867 | if (start != buf && ret != -EFAULT) |
868 | ret = buf - start; |
869 | return ret; |
870 | } |
871 | |
872 | static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) |
873 | { |
874 | struct fanotify_response response; |
875 | struct fsnotify_group *group; |
876 | int ret; |
877 | const char __user *info_buf = buf + sizeof(struct fanotify_response); |
878 | size_t info_len; |
879 | |
880 | if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) |
881 | return -EINVAL; |
882 | |
883 | group = file->private_data; |
884 | |
885 | pr_debug("%s: group=%p count=%zu\n" , __func__, group, count); |
886 | |
887 | if (count < sizeof(response)) |
888 | return -EINVAL; |
889 | |
890 | if (copy_from_user(to: &response, from: buf, n: sizeof(response))) |
891 | return -EFAULT; |
892 | |
893 | info_len = count - sizeof(response); |
894 | |
895 | ret = process_access_response(group, response_struct: &response, info: info_buf, info_len); |
896 | if (ret < 0) |
897 | count = ret; |
898 | else |
899 | count = sizeof(response) + ret; |
900 | |
901 | return count; |
902 | } |
903 | |
904 | static int fanotify_release(struct inode *ignored, struct file *file) |
905 | { |
906 | struct fsnotify_group *group = file->private_data; |
907 | struct fsnotify_event *fsn_event; |
908 | |
909 | /* |
910 | * Stop new events from arriving in the notification queue. since |
911 | * userspace cannot use fanotify fd anymore, no event can enter or |
912 | * leave access_list by now either. |
913 | */ |
914 | fsnotify_group_stop_queueing(group); |
915 | |
916 | /* |
917 | * Process all permission events on access_list and notification queue |
918 | * and simulate reply from userspace. |
919 | */ |
920 | spin_lock(lock: &group->notification_lock); |
921 | while (!list_empty(head: &group->fanotify_data.access_list)) { |
922 | struct fanotify_perm_event *event; |
923 | |
924 | event = list_first_entry(&group->fanotify_data.access_list, |
925 | struct fanotify_perm_event, fae.fse.list); |
926 | list_del_init(entry: &event->fae.fse.list); |
927 | finish_permission_event(group, event, FAN_ALLOW, NULL); |
928 | spin_lock(lock: &group->notification_lock); |
929 | } |
930 | |
931 | /* |
932 | * Destroy all non-permission events. For permission events just |
933 | * dequeue them and set the response. They will be freed once the |
934 | * response is consumed and fanotify_get_response() returns. |
935 | */ |
936 | while ((fsn_event = fsnotify_remove_first_event(group))) { |
937 | struct fanotify_event *event = FANOTIFY_E(fse: fsn_event); |
938 | |
939 | if (!(event->mask & FANOTIFY_PERM_EVENTS)) { |
940 | spin_unlock(lock: &group->notification_lock); |
941 | fsnotify_destroy_event(group, event: fsn_event); |
942 | } else { |
943 | finish_permission_event(group, event: FANOTIFY_PERM(event), |
944 | FAN_ALLOW, NULL); |
945 | } |
946 | spin_lock(lock: &group->notification_lock); |
947 | } |
948 | spin_unlock(lock: &group->notification_lock); |
949 | |
950 | /* Response for all permission events it set, wakeup waiters */ |
951 | wake_up(&group->fanotify_data.access_waitq); |
952 | |
953 | /* matches the fanotify_init->fsnotify_alloc_group */ |
954 | fsnotify_destroy_group(group); |
955 | |
956 | return 0; |
957 | } |
958 | |
959 | static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
960 | { |
961 | struct fsnotify_group *group; |
962 | struct fsnotify_event *fsn_event; |
963 | void __user *p; |
964 | int ret = -ENOTTY; |
965 | size_t send_len = 0; |
966 | |
967 | group = file->private_data; |
968 | |
969 | p = (void __user *) arg; |
970 | |
971 | switch (cmd) { |
972 | case FIONREAD: |
973 | spin_lock(lock: &group->notification_lock); |
974 | list_for_each_entry(fsn_event, &group->notification_list, list) |
975 | send_len += FAN_EVENT_METADATA_LEN; |
976 | spin_unlock(lock: &group->notification_lock); |
977 | ret = put_user(send_len, (int __user *) p); |
978 | break; |
979 | } |
980 | |
981 | return ret; |
982 | } |
983 | |
984 | static const struct file_operations fanotify_fops = { |
985 | .show_fdinfo = fanotify_show_fdinfo, |
986 | .poll = fanotify_poll, |
987 | .read = fanotify_read, |
988 | .write = fanotify_write, |
989 | .fasync = NULL, |
990 | .release = fanotify_release, |
991 | .unlocked_ioctl = fanotify_ioctl, |
992 | .compat_ioctl = compat_ptr_ioctl, |
993 | .llseek = noop_llseek, |
994 | }; |
995 | |
996 | static int fanotify_find_path(int dfd, const char __user *filename, |
997 | struct path *path, unsigned int flags, __u64 mask, |
998 | unsigned int obj_type) |
999 | { |
1000 | int ret; |
1001 | |
1002 | pr_debug("%s: dfd=%d filename=%p flags=%x\n" , __func__, |
1003 | dfd, filename, flags); |
1004 | |
1005 | if (filename == NULL) { |
1006 | struct fd f = fdget(fd: dfd); |
1007 | |
1008 | ret = -EBADF; |
1009 | if (!f.file) |
1010 | goto out; |
1011 | |
1012 | ret = -ENOTDIR; |
1013 | if ((flags & FAN_MARK_ONLYDIR) && |
1014 | !(S_ISDIR(file_inode(f.file)->i_mode))) { |
1015 | fdput(fd: f); |
1016 | goto out; |
1017 | } |
1018 | |
1019 | *path = f.file->f_path; |
1020 | path_get(path); |
1021 | fdput(fd: f); |
1022 | } else { |
1023 | unsigned int lookup_flags = 0; |
1024 | |
1025 | if (!(flags & FAN_MARK_DONT_FOLLOW)) |
1026 | lookup_flags |= LOOKUP_FOLLOW; |
1027 | if (flags & FAN_MARK_ONLYDIR) |
1028 | lookup_flags |= LOOKUP_DIRECTORY; |
1029 | |
1030 | ret = user_path_at(dfd, name: filename, flags: lookup_flags, path); |
1031 | if (ret) |
1032 | goto out; |
1033 | } |
1034 | |
1035 | /* you can only watch an inode if you have read permissions on it */ |
1036 | ret = path_permission(path, MAY_READ); |
1037 | if (ret) { |
1038 | path_put(path); |
1039 | goto out; |
1040 | } |
1041 | |
1042 | ret = security_path_notify(path, mask, obj_type); |
1043 | if (ret) |
1044 | path_put(path); |
1045 | |
1046 | out: |
1047 | return ret; |
1048 | } |
1049 | |
1050 | static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, |
1051 | __u32 mask, unsigned int flags, |
1052 | __u32 umask, int *destroy) |
1053 | { |
1054 | __u32 oldmask, newmask; |
1055 | |
1056 | /* umask bits cannot be removed by user */ |
1057 | mask &= ~umask; |
1058 | spin_lock(lock: &fsn_mark->lock); |
1059 | oldmask = fsnotify_calc_mask(mark: fsn_mark); |
1060 | if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) { |
1061 | fsn_mark->mask &= ~mask; |
1062 | } else { |
1063 | fsn_mark->ignore_mask &= ~mask; |
1064 | } |
1065 | newmask = fsnotify_calc_mask(mark: fsn_mark); |
1066 | /* |
1067 | * We need to keep the mark around even if remaining mask cannot |
1068 | * result in any events (e.g. mask == FAN_ONDIR) to support incremenal |
1069 | * changes to the mask. |
1070 | * Destroy mark when only umask bits remain. |
1071 | */ |
1072 | *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask); |
1073 | spin_unlock(lock: &fsn_mark->lock); |
1074 | |
1075 | return oldmask & ~newmask; |
1076 | } |
1077 | |
1078 | static int fanotify_remove_mark(struct fsnotify_group *group, |
1079 | fsnotify_connp_t *connp, __u32 mask, |
1080 | unsigned int flags, __u32 umask) |
1081 | { |
1082 | struct fsnotify_mark *fsn_mark = NULL; |
1083 | __u32 removed; |
1084 | int destroy_mark; |
1085 | |
1086 | fsnotify_group_lock(group); |
1087 | fsn_mark = fsnotify_find_mark(connp, group); |
1088 | if (!fsn_mark) { |
1089 | fsnotify_group_unlock(group); |
1090 | return -ENOENT; |
1091 | } |
1092 | |
1093 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
1094 | umask, destroy: &destroy_mark); |
1095 | if (removed & fsnotify_conn_mask(conn: fsn_mark->connector)) |
1096 | fsnotify_recalc_mask(conn: fsn_mark->connector); |
1097 | if (destroy_mark) |
1098 | fsnotify_detach_mark(mark: fsn_mark); |
1099 | fsnotify_group_unlock(group); |
1100 | if (destroy_mark) |
1101 | fsnotify_free_mark(mark: fsn_mark); |
1102 | |
1103 | /* matches the fsnotify_find_mark() */ |
1104 | fsnotify_put_mark(mark: fsn_mark); |
1105 | return 0; |
1106 | } |
1107 | |
1108 | static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, |
1109 | struct vfsmount *mnt, __u32 mask, |
1110 | unsigned int flags, __u32 umask) |
1111 | { |
1112 | return fanotify_remove_mark(group, connp: &real_mount(mnt)->mnt_fsnotify_marks, |
1113 | mask, flags, umask); |
1114 | } |
1115 | |
1116 | static int fanotify_remove_sb_mark(struct fsnotify_group *group, |
1117 | struct super_block *sb, __u32 mask, |
1118 | unsigned int flags, __u32 umask) |
1119 | { |
1120 | return fanotify_remove_mark(group, connp: &sb->s_fsnotify_marks, mask, |
1121 | flags, umask); |
1122 | } |
1123 | |
1124 | static int fanotify_remove_inode_mark(struct fsnotify_group *group, |
1125 | struct inode *inode, __u32 mask, |
1126 | unsigned int flags, __u32 umask) |
1127 | { |
1128 | return fanotify_remove_mark(group, connp: &inode->i_fsnotify_marks, mask, |
1129 | flags, umask); |
1130 | } |
1131 | |
1132 | static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, |
1133 | unsigned int fan_flags) |
1134 | { |
1135 | bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); |
1136 | unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS; |
1137 | bool recalc = false; |
1138 | |
1139 | /* |
1140 | * When using FAN_MARK_IGNORE for the first time, mark starts using |
1141 | * independent event flags in ignore mask. After that, trying to |
1142 | * update the ignore mask with the old FAN_MARK_IGNORED_MASK API |
1143 | * will result in EEXIST error. |
1144 | */ |
1145 | if (ignore == FAN_MARK_IGNORE) |
1146 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS; |
1147 | |
1148 | /* |
1149 | * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to |
1150 | * the removal of the FS_MODIFY bit in calculated mask if it was set |
1151 | * because of an ignore mask that is now going to survive FS_MODIFY. |
1152 | */ |
1153 | if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && |
1154 | !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { |
1155 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; |
1156 | if (!(fsn_mark->mask & FS_MODIFY)) |
1157 | recalc = true; |
1158 | } |
1159 | |
1160 | if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || |
1161 | want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) |
1162 | return recalc; |
1163 | |
1164 | /* |
1165 | * NO_IREF may be removed from a mark, but not added. |
1166 | * When removed, fsnotify_recalc_mask() will take the inode ref. |
1167 | */ |
1168 | WARN_ON_ONCE(!want_iref); |
1169 | fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; |
1170 | |
1171 | return true; |
1172 | } |
1173 | |
1174 | static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, |
1175 | __u32 mask, unsigned int fan_flags) |
1176 | { |
1177 | bool recalc; |
1178 | |
1179 | spin_lock(lock: &fsn_mark->lock); |
1180 | if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS)) |
1181 | fsn_mark->mask |= mask; |
1182 | else |
1183 | fsn_mark->ignore_mask |= mask; |
1184 | |
1185 | recalc = fsnotify_calc_mask(mark: fsn_mark) & |
1186 | ~fsnotify_conn_mask(conn: fsn_mark->connector); |
1187 | |
1188 | recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); |
1189 | spin_unlock(lock: &fsn_mark->lock); |
1190 | |
1191 | return recalc; |
1192 | } |
1193 | |
1194 | struct fan_fsid { |
1195 | struct super_block *sb; |
1196 | __kernel_fsid_t id; |
1197 | bool weak; |
1198 | }; |
1199 | |
1200 | static int fanotify_set_mark_fsid(struct fsnotify_group *group, |
1201 | struct fsnotify_mark *mark, |
1202 | struct fan_fsid *fsid) |
1203 | { |
1204 | struct fsnotify_mark_connector *conn; |
1205 | struct fsnotify_mark *old; |
1206 | struct super_block *old_sb = NULL; |
1207 | |
1208 | FANOTIFY_MARK(mark)->fsid = fsid->id; |
1209 | mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; |
1210 | if (fsid->weak) |
1211 | mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID; |
1212 | |
1213 | /* First mark added will determine if group is single or multi fsid */ |
1214 | if (list_empty(head: &group->marks_list)) |
1215 | return 0; |
1216 | |
1217 | /* Find sb of an existing mark */ |
1218 | list_for_each_entry(old, &group->marks_list, g_list) { |
1219 | conn = READ_ONCE(old->connector); |
1220 | if (!conn) |
1221 | continue; |
1222 | old_sb = fsnotify_connector_sb(conn); |
1223 | if (old_sb) |
1224 | break; |
1225 | } |
1226 | |
1227 | /* Only detached marks left? */ |
1228 | if (!old_sb) |
1229 | return 0; |
1230 | |
1231 | /* Do not allow mixing of marks with weak and strong fsid */ |
1232 | if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID) |
1233 | return -EXDEV; |
1234 | |
1235 | /* Allow mixing of marks with strong fsid from different fs */ |
1236 | if (!fsid->weak) |
1237 | return 0; |
1238 | |
1239 | /* Do not allow mixing marks with weak fsid from different fs */ |
1240 | if (old_sb != fsid->sb) |
1241 | return -EXDEV; |
1242 | |
1243 | /* Do not allow mixing marks from different btrfs sub-volumes */ |
1244 | if (!fanotify_fsid_equal(fsid1: &FANOTIFY_MARK(mark: old)->fsid, |
1245 | fsid2: &FANOTIFY_MARK(mark)->fsid)) |
1246 | return -EXDEV; |
1247 | |
1248 | return 0; |
1249 | } |
1250 | |
1251 | static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, |
1252 | fsnotify_connp_t *connp, |
1253 | unsigned int obj_type, |
1254 | unsigned int fan_flags, |
1255 | struct fan_fsid *fsid) |
1256 | { |
1257 | struct ucounts *ucounts = group->fanotify_data.ucounts; |
1258 | struct fanotify_mark *fan_mark; |
1259 | struct fsnotify_mark *mark; |
1260 | int ret; |
1261 | |
1262 | /* |
1263 | * Enforce per user marks limits per user in all containing user ns. |
1264 | * A group with FAN_UNLIMITED_MARKS does not contribute to mark count |
1265 | * in the limited groups account. |
1266 | */ |
1267 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && |
1268 | !inc_ucount(ns: ucounts->ns, uid: ucounts->uid, type: UCOUNT_FANOTIFY_MARKS)) |
1269 | return ERR_PTR(error: -ENOSPC); |
1270 | |
1271 | fan_mark = kmem_cache_alloc(cachep: fanotify_mark_cache, GFP_KERNEL); |
1272 | if (!fan_mark) { |
1273 | ret = -ENOMEM; |
1274 | goto out_dec_ucounts; |
1275 | } |
1276 | |
1277 | mark = &fan_mark->fsn_mark; |
1278 | fsnotify_init_mark(mark, group); |
1279 | if (fan_flags & FAN_MARK_EVICTABLE) |
1280 | mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; |
1281 | |
1282 | /* Cache fsid of filesystem containing the marked object */ |
1283 | if (fsid) { |
1284 | ret = fanotify_set_mark_fsid(group, mark, fsid); |
1285 | if (ret) |
1286 | goto out_put_mark; |
1287 | } else { |
1288 | fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; |
1289 | } |
1290 | |
1291 | ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags: 0); |
1292 | if (ret) |
1293 | goto out_put_mark; |
1294 | |
1295 | return mark; |
1296 | |
1297 | out_put_mark: |
1298 | fsnotify_put_mark(mark); |
1299 | out_dec_ucounts: |
1300 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) |
1301 | dec_ucount(ucounts, type: UCOUNT_FANOTIFY_MARKS); |
1302 | return ERR_PTR(error: ret); |
1303 | } |
1304 | |
1305 | static int fanotify_group_init_error_pool(struct fsnotify_group *group) |
1306 | { |
1307 | if (mempool_initialized(pool: &group->fanotify_data.error_events_pool)) |
1308 | return 0; |
1309 | |
1310 | return mempool_init_kmalloc_pool(pool: &group->fanotify_data.error_events_pool, |
1311 | FANOTIFY_DEFAULT_FEE_POOL_SIZE, |
1312 | size: sizeof(struct fanotify_error_event)); |
1313 | } |
1314 | |
1315 | static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, |
1316 | unsigned int fan_flags) |
1317 | { |
1318 | /* |
1319 | * Non evictable mark cannot be downgraded to evictable mark. |
1320 | */ |
1321 | if (fan_flags & FAN_MARK_EVICTABLE && |
1322 | !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) |
1323 | return -EEXIST; |
1324 | |
1325 | /* |
1326 | * New ignore mask semantics cannot be downgraded to old semantics. |
1327 | */ |
1328 | if (fan_flags & FAN_MARK_IGNORED_MASK && |
1329 | fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) |
1330 | return -EEXIST; |
1331 | |
1332 | /* |
1333 | * An ignore mask that survives modify could never be downgraded to not |
1334 | * survive modify. With new FAN_MARK_IGNORE semantics we make that rule |
1335 | * explicit and return an error when trying to update the ignore mask |
1336 | * without the original FAN_MARK_IGNORED_SURV_MODIFY value. |
1337 | */ |
1338 | if (fan_flags & FAN_MARK_IGNORE && |
1339 | !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && |
1340 | fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) |
1341 | return -EEXIST; |
1342 | |
1343 | return 0; |
1344 | } |
1345 | |
1346 | static int fanotify_add_mark(struct fsnotify_group *group, |
1347 | fsnotify_connp_t *connp, unsigned int obj_type, |
1348 | __u32 mask, unsigned int fan_flags, |
1349 | struct fan_fsid *fsid) |
1350 | { |
1351 | struct fsnotify_mark *fsn_mark; |
1352 | bool recalc; |
1353 | int ret = 0; |
1354 | |
1355 | fsnotify_group_lock(group); |
1356 | fsn_mark = fsnotify_find_mark(connp, group); |
1357 | if (!fsn_mark) { |
1358 | fsn_mark = fanotify_add_new_mark(group, connp, obj_type, |
1359 | fan_flags, fsid); |
1360 | if (IS_ERR(ptr: fsn_mark)) { |
1361 | fsnotify_group_unlock(group); |
1362 | return PTR_ERR(ptr: fsn_mark); |
1363 | } |
1364 | } |
1365 | |
1366 | /* |
1367 | * Check if requested mark flags conflict with an existing mark flags. |
1368 | */ |
1369 | ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); |
1370 | if (ret) |
1371 | goto out; |
1372 | |
1373 | /* |
1374 | * Error events are pre-allocated per group, only if strictly |
1375 | * needed (i.e. FAN_FS_ERROR was requested). |
1376 | */ |
1377 | if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) && |
1378 | (mask & FAN_FS_ERROR)) { |
1379 | ret = fanotify_group_init_error_pool(group); |
1380 | if (ret) |
1381 | goto out; |
1382 | } |
1383 | |
1384 | recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); |
1385 | if (recalc) |
1386 | fsnotify_recalc_mask(conn: fsn_mark->connector); |
1387 | |
1388 | out: |
1389 | fsnotify_group_unlock(group); |
1390 | |
1391 | fsnotify_put_mark(mark: fsn_mark); |
1392 | return ret; |
1393 | } |
1394 | |
1395 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, |
1396 | struct vfsmount *mnt, __u32 mask, |
1397 | unsigned int flags, struct fan_fsid *fsid) |
1398 | { |
1399 | return fanotify_add_mark(group, connp: &real_mount(mnt)->mnt_fsnotify_marks, |
1400 | obj_type: FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, fan_flags: flags, fsid); |
1401 | } |
1402 | |
1403 | static int fanotify_add_sb_mark(struct fsnotify_group *group, |
1404 | struct super_block *sb, __u32 mask, |
1405 | unsigned int flags, struct fan_fsid *fsid) |
1406 | { |
1407 | return fanotify_add_mark(group, connp: &sb->s_fsnotify_marks, |
1408 | obj_type: FSNOTIFY_OBJ_TYPE_SB, mask, fan_flags: flags, fsid); |
1409 | } |
1410 | |
1411 | static int fanotify_add_inode_mark(struct fsnotify_group *group, |
1412 | struct inode *inode, __u32 mask, |
1413 | unsigned int flags, struct fan_fsid *fsid) |
1414 | { |
1415 | pr_debug("%s: group=%p inode=%p\n" , __func__, group, inode); |
1416 | |
1417 | /* |
1418 | * If some other task has this inode open for write we should not add |
1419 | * an ignore mask, unless that ignore mask is supposed to survive |
1420 | * modification changes anyway. |
1421 | */ |
1422 | if ((flags & FANOTIFY_MARK_IGNORE_BITS) && |
1423 | !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && |
1424 | inode_is_open_for_write(inode)) |
1425 | return 0; |
1426 | |
1427 | return fanotify_add_mark(group, connp: &inode->i_fsnotify_marks, |
1428 | obj_type: FSNOTIFY_OBJ_TYPE_INODE, mask, fan_flags: flags, fsid); |
1429 | } |
1430 | |
1431 | static struct fsnotify_event *fanotify_alloc_overflow_event(void) |
1432 | { |
1433 | struct fanotify_event *oevent; |
1434 | |
1435 | oevent = kmalloc(size: sizeof(*oevent), GFP_KERNEL_ACCOUNT); |
1436 | if (!oevent) |
1437 | return NULL; |
1438 | |
1439 | fanotify_init_event(event: oevent, hash: 0, FS_Q_OVERFLOW); |
1440 | oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW; |
1441 | |
1442 | return &oevent->fse; |
1443 | } |
1444 | |
1445 | static struct hlist_head *fanotify_alloc_merge_hash(void) |
1446 | { |
1447 | struct hlist_head *hash; |
1448 | |
1449 | hash = kmalloc(size: sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, |
1450 | GFP_KERNEL_ACCOUNT); |
1451 | if (!hash) |
1452 | return NULL; |
1453 | |
1454 | __hash_init(ht: hash, FANOTIFY_HTABLE_SIZE); |
1455 | |
1456 | return hash; |
1457 | } |
1458 | |
1459 | /* fanotify syscalls */ |
1460 | SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) |
1461 | { |
1462 | struct fsnotify_group *group; |
1463 | int f_flags, fd; |
1464 | unsigned int fid_mode = flags & FANOTIFY_FID_BITS; |
1465 | unsigned int class = flags & FANOTIFY_CLASS_BITS; |
1466 | unsigned int internal_flags = 0; |
1467 | |
1468 | pr_debug("%s: flags=%x event_f_flags=%x\n" , |
1469 | __func__, flags, event_f_flags); |
1470 | |
1471 | if (!capable(CAP_SYS_ADMIN)) { |
1472 | /* |
1473 | * An unprivileged user can setup an fanotify group with |
1474 | * limited functionality - an unprivileged group is limited to |
1475 | * notification events with file handles and it cannot use |
1476 | * unlimited queue/marks. |
1477 | */ |
1478 | if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) |
1479 | return -EPERM; |
1480 | |
1481 | /* |
1482 | * Setting the internal flag FANOTIFY_UNPRIV on the group |
1483 | * prevents setting mount/filesystem marks on this group and |
1484 | * prevents reporting pid and open fd in events. |
1485 | */ |
1486 | internal_flags |= FANOTIFY_UNPRIV; |
1487 | } |
1488 | |
1489 | #ifdef CONFIG_AUDITSYSCALL |
1490 | if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) |
1491 | #else |
1492 | if (flags & ~FANOTIFY_INIT_FLAGS) |
1493 | #endif |
1494 | return -EINVAL; |
1495 | |
1496 | /* |
1497 | * A pidfd can only be returned for a thread-group leader; thus |
1498 | * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually |
1499 | * exclusive. |
1500 | */ |
1501 | if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) |
1502 | return -EINVAL; |
1503 | |
1504 | if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) |
1505 | return -EINVAL; |
1506 | |
1507 | switch (event_f_flags & O_ACCMODE) { |
1508 | case O_RDONLY: |
1509 | case O_RDWR: |
1510 | case O_WRONLY: |
1511 | break; |
1512 | default: |
1513 | return -EINVAL; |
1514 | } |
1515 | |
1516 | if (fid_mode && class != FAN_CLASS_NOTIF) |
1517 | return -EINVAL; |
1518 | |
1519 | /* |
1520 | * Child name is reported with parent fid so requires dir fid. |
1521 | * We can report both child fid and dir fid with or without name. |
1522 | */ |
1523 | if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) |
1524 | return -EINVAL; |
1525 | |
1526 | /* |
1527 | * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID |
1528 | * and is used as an indication to report both dir and child fid on all |
1529 | * dirent events. |
1530 | */ |
1531 | if ((fid_mode & FAN_REPORT_TARGET_FID) && |
1532 | (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) |
1533 | return -EINVAL; |
1534 | |
1535 | f_flags = O_RDWR | __FMODE_NONOTIFY; |
1536 | if (flags & FAN_CLOEXEC) |
1537 | f_flags |= O_CLOEXEC; |
1538 | if (flags & FAN_NONBLOCK) |
1539 | f_flags |= O_NONBLOCK; |
1540 | |
1541 | /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ |
1542 | group = fsnotify_alloc_group(ops: &fanotify_fsnotify_ops, |
1543 | FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS); |
1544 | if (IS_ERR(ptr: group)) { |
1545 | return PTR_ERR(ptr: group); |
1546 | } |
1547 | |
1548 | /* Enforce groups limits per user in all containing user ns */ |
1549 | group->fanotify_data.ucounts = inc_ucount(current_user_ns(), |
1550 | current_euid(), |
1551 | type: UCOUNT_FANOTIFY_GROUPS); |
1552 | if (!group->fanotify_data.ucounts) { |
1553 | fd = -EMFILE; |
1554 | goto out_destroy_group; |
1555 | } |
1556 | |
1557 | group->fanotify_data.flags = flags | internal_flags; |
1558 | group->memcg = get_mem_cgroup_from_mm(current->mm); |
1559 | |
1560 | group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); |
1561 | if (!group->fanotify_data.merge_hash) { |
1562 | fd = -ENOMEM; |
1563 | goto out_destroy_group; |
1564 | } |
1565 | |
1566 | group->overflow_event = fanotify_alloc_overflow_event(); |
1567 | if (unlikely(!group->overflow_event)) { |
1568 | fd = -ENOMEM; |
1569 | goto out_destroy_group; |
1570 | } |
1571 | |
1572 | if (force_o_largefile()) |
1573 | event_f_flags |= O_LARGEFILE; |
1574 | group->fanotify_data.f_flags = event_f_flags; |
1575 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
1576 | INIT_LIST_HEAD(list: &group->fanotify_data.access_list); |
1577 | switch (class) { |
1578 | case FAN_CLASS_NOTIF: |
1579 | group->priority = FS_PRIO_0; |
1580 | break; |
1581 | case FAN_CLASS_CONTENT: |
1582 | group->priority = FS_PRIO_1; |
1583 | break; |
1584 | case FAN_CLASS_PRE_CONTENT: |
1585 | group->priority = FS_PRIO_2; |
1586 | break; |
1587 | default: |
1588 | fd = -EINVAL; |
1589 | goto out_destroy_group; |
1590 | } |
1591 | |
1592 | if (flags & FAN_UNLIMITED_QUEUE) { |
1593 | fd = -EPERM; |
1594 | if (!capable(CAP_SYS_ADMIN)) |
1595 | goto out_destroy_group; |
1596 | group->max_events = UINT_MAX; |
1597 | } else { |
1598 | group->max_events = fanotify_max_queued_events; |
1599 | } |
1600 | |
1601 | if (flags & FAN_UNLIMITED_MARKS) { |
1602 | fd = -EPERM; |
1603 | if (!capable(CAP_SYS_ADMIN)) |
1604 | goto out_destroy_group; |
1605 | } |
1606 | |
1607 | if (flags & FAN_ENABLE_AUDIT) { |
1608 | fd = -EPERM; |
1609 | if (!capable(CAP_AUDIT_WRITE)) |
1610 | goto out_destroy_group; |
1611 | } |
1612 | |
1613 | fd = anon_inode_getfd(name: "[fanotify]" , fops: &fanotify_fops, priv: group, flags: f_flags); |
1614 | if (fd < 0) |
1615 | goto out_destroy_group; |
1616 | |
1617 | return fd; |
1618 | |
1619 | out_destroy_group: |
1620 | fsnotify_destroy_group(group); |
1621 | return fd; |
1622 | } |
1623 | |
1624 | static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags, |
1625 | struct fan_fsid *fsid) |
1626 | { |
1627 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
1628 | __kernel_fsid_t root_fsid; |
1629 | int err; |
1630 | |
1631 | /* |
1632 | * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). |
1633 | */ |
1634 | err = vfs_get_fsid(dentry, fsid: &fsid->id); |
1635 | if (err) |
1636 | return err; |
1637 | |
1638 | fsid->sb = dentry->d_sb; |
1639 | if (!fsid->id.val[0] && !fsid->id.val[1]) { |
1640 | err = -ENODEV; |
1641 | goto weak; |
1642 | } |
1643 | |
1644 | /* |
1645 | * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) |
1646 | * which uses a different fsid than sb root. |
1647 | */ |
1648 | err = vfs_get_fsid(dentry: dentry->d_sb->s_root, fsid: &root_fsid); |
1649 | if (err) |
1650 | return err; |
1651 | |
1652 | if (!fanotify_fsid_equal(fsid1: &root_fsid, fsid2: &fsid->id)) { |
1653 | err = -EXDEV; |
1654 | goto weak; |
1655 | } |
1656 | |
1657 | fsid->weak = false; |
1658 | return 0; |
1659 | |
1660 | weak: |
1661 | /* Allow weak fsid when marking inodes */ |
1662 | fsid->weak = true; |
1663 | return (mark_type == FAN_MARK_INODE) ? 0 : err; |
1664 | } |
1665 | |
1666 | /* Check if filesystem can encode a unique fid */ |
1667 | static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) |
1668 | { |
1669 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
1670 | const struct export_operations *nop = dentry->d_sb->s_export_op; |
1671 | |
1672 | /* |
1673 | * We need to make sure that the filesystem supports encoding of |
1674 | * file handles so user can use name_to_handle_at() to compare fids |
1675 | * reported with events to the file handle of watched objects. |
1676 | */ |
1677 | if (!exportfs_can_encode_fid(nop)) |
1678 | return -EOPNOTSUPP; |
1679 | |
1680 | /* |
1681 | * For sb/mount mark, we also need to make sure that the filesystem |
1682 | * supports decoding file handles, so user has a way to map back the |
1683 | * reported fids to filesystem objects. |
1684 | */ |
1685 | if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop)) |
1686 | return -EOPNOTSUPP; |
1687 | |
1688 | return 0; |
1689 | } |
1690 | |
1691 | static int fanotify_events_supported(struct fsnotify_group *group, |
1692 | const struct path *path, __u64 mask, |
1693 | unsigned int flags) |
1694 | { |
1695 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
1696 | /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ |
1697 | bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || |
1698 | (mask & FAN_RENAME) || |
1699 | (flags & FAN_MARK_IGNORE); |
1700 | |
1701 | /* |
1702 | * Some filesystems such as 'proc' acquire unusual locks when opening |
1703 | * files. For them fanotify permission events have high chances of |
1704 | * deadlocking the system - open done when reporting fanotify event |
1705 | * blocks on this "unusual" lock while another process holding the lock |
1706 | * waits for fanotify permission event to be answered. Just disallow |
1707 | * permission events for such filesystems. |
1708 | */ |
1709 | if (mask & FANOTIFY_PERM_EVENTS && |
1710 | path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) |
1711 | return -EINVAL; |
1712 | |
1713 | /* |
1714 | * mount and sb marks are not allowed on kernel internal pseudo fs, |
1715 | * like pipe_mnt, because that would subscribe to events on all the |
1716 | * anonynous pipes in the system. |
1717 | * |
1718 | * SB_NOUSER covers all of the internal pseudo fs whose objects are not |
1719 | * exposed to user's mount namespace, but there are other SB_KERNMOUNT |
1720 | * fs, like nsfs, debugfs, for which the value of allowing sb and mount |
1721 | * mark is questionable. For now we leave them alone. |
1722 | */ |
1723 | if (mark_type != FAN_MARK_INODE && |
1724 | path->mnt->mnt_sb->s_flags & SB_NOUSER) |
1725 | return -EINVAL; |
1726 | |
1727 | /* |
1728 | * We shouldn't have allowed setting dirent events and the directory |
1729 | * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, |
1730 | * but because we always allowed it, error only when using new APIs. |
1731 | */ |
1732 | if (strict_dir_events && mark_type == FAN_MARK_INODE && |
1733 | !d_is_dir(dentry: path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) |
1734 | return -ENOTDIR; |
1735 | |
1736 | return 0; |
1737 | } |
1738 | |
1739 | static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, |
1740 | int dfd, const char __user *pathname) |
1741 | { |
1742 | struct inode *inode = NULL; |
1743 | struct vfsmount *mnt = NULL; |
1744 | struct fsnotify_group *group; |
1745 | struct fd f; |
1746 | struct path path; |
1747 | struct fan_fsid __fsid, *fsid = NULL; |
1748 | u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; |
1749 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
1750 | unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; |
1751 | unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; |
1752 | unsigned int obj_type, fid_mode; |
1753 | u32 umask = 0; |
1754 | int ret; |
1755 | |
1756 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n" , |
1757 | __func__, fanotify_fd, flags, dfd, pathname, mask); |
1758 | |
1759 | /* we only use the lower 32 bits as of right now. */ |
1760 | if (upper_32_bits(mask)) |
1761 | return -EINVAL; |
1762 | |
1763 | if (flags & ~FANOTIFY_MARK_FLAGS) |
1764 | return -EINVAL; |
1765 | |
1766 | switch (mark_type) { |
1767 | case FAN_MARK_INODE: |
1768 | obj_type = FSNOTIFY_OBJ_TYPE_INODE; |
1769 | break; |
1770 | case FAN_MARK_MOUNT: |
1771 | obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; |
1772 | break; |
1773 | case FAN_MARK_FILESYSTEM: |
1774 | obj_type = FSNOTIFY_OBJ_TYPE_SB; |
1775 | break; |
1776 | default: |
1777 | return -EINVAL; |
1778 | } |
1779 | |
1780 | switch (mark_cmd) { |
1781 | case FAN_MARK_ADD: |
1782 | case FAN_MARK_REMOVE: |
1783 | if (!mask) |
1784 | return -EINVAL; |
1785 | break; |
1786 | case FAN_MARK_FLUSH: |
1787 | if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) |
1788 | return -EINVAL; |
1789 | break; |
1790 | default: |
1791 | return -EINVAL; |
1792 | } |
1793 | |
1794 | if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) |
1795 | valid_mask |= FANOTIFY_PERM_EVENTS; |
1796 | |
1797 | if (mask & ~valid_mask) |
1798 | return -EINVAL; |
1799 | |
1800 | |
1801 | /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */ |
1802 | if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK)) |
1803 | return -EINVAL; |
1804 | |
1805 | /* |
1806 | * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with |
1807 | * FAN_MARK_IGNORED_MASK. |
1808 | */ |
1809 | if (ignore == FAN_MARK_IGNORED_MASK) { |
1810 | mask &= ~FANOTIFY_EVENT_FLAGS; |
1811 | umask = FANOTIFY_EVENT_FLAGS; |
1812 | } |
1813 | |
1814 | f = fdget(fd: fanotify_fd); |
1815 | if (unlikely(!f.file)) |
1816 | return -EBADF; |
1817 | |
1818 | /* verify that this is indeed an fanotify instance */ |
1819 | ret = -EINVAL; |
1820 | if (unlikely(f.file->f_op != &fanotify_fops)) |
1821 | goto fput_and_out; |
1822 | group = f.file->private_data; |
1823 | |
1824 | /* |
1825 | * An unprivileged user is not allowed to setup mount nor filesystem |
1826 | * marks. This also includes setting up such marks by a group that |
1827 | * was initialized by an unprivileged user. |
1828 | */ |
1829 | ret = -EPERM; |
1830 | if ((!capable(CAP_SYS_ADMIN) || |
1831 | FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && |
1832 | mark_type != FAN_MARK_INODE) |
1833 | goto fput_and_out; |
1834 | |
1835 | /* |
1836 | * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not |
1837 | * allowed to set permissions events. |
1838 | */ |
1839 | ret = -EINVAL; |
1840 | if (mask & FANOTIFY_PERM_EVENTS && |
1841 | group->priority == FS_PRIO_0) |
1842 | goto fput_and_out; |
1843 | |
1844 | if (mask & FAN_FS_ERROR && |
1845 | mark_type != FAN_MARK_FILESYSTEM) |
1846 | goto fput_and_out; |
1847 | |
1848 | /* |
1849 | * Evictable is only relevant for inode marks, because only inode object |
1850 | * can be evicted on memory pressure. |
1851 | */ |
1852 | if (flags & FAN_MARK_EVICTABLE && |
1853 | mark_type != FAN_MARK_INODE) |
1854 | goto fput_and_out; |
1855 | |
1856 | /* |
1857 | * Events that do not carry enough information to report |
1858 | * event->fd require a group that supports reporting fid. Those |
1859 | * events are not supported on a mount mark, because they do not |
1860 | * carry enough information (i.e. path) to be filtered by mount |
1861 | * point. |
1862 | */ |
1863 | fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
1864 | if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && |
1865 | (!fid_mode || mark_type == FAN_MARK_MOUNT)) |
1866 | goto fput_and_out; |
1867 | |
1868 | /* |
1869 | * FAN_RENAME uses special info type records to report the old and |
1870 | * new parent+name. Reporting only old and new parent id is less |
1871 | * useful and was not implemented. |
1872 | */ |
1873 | if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) |
1874 | goto fput_and_out; |
1875 | |
1876 | if (mark_cmd == FAN_MARK_FLUSH) { |
1877 | ret = 0; |
1878 | if (mark_type == FAN_MARK_MOUNT) |
1879 | fsnotify_clear_vfsmount_marks_by_group(group); |
1880 | else if (mark_type == FAN_MARK_FILESYSTEM) |
1881 | fsnotify_clear_sb_marks_by_group(group); |
1882 | else |
1883 | fsnotify_clear_inode_marks_by_group(group); |
1884 | goto fput_and_out; |
1885 | } |
1886 | |
1887 | ret = fanotify_find_path(dfd, filename: pathname, path: &path, flags, |
1888 | mask: (mask & ALL_FSNOTIFY_EVENTS), obj_type); |
1889 | if (ret) |
1890 | goto fput_and_out; |
1891 | |
1892 | if (mark_cmd == FAN_MARK_ADD) { |
1893 | ret = fanotify_events_supported(group, path: &path, mask, flags); |
1894 | if (ret) |
1895 | goto path_put_and_out; |
1896 | } |
1897 | |
1898 | if (fid_mode) { |
1899 | ret = fanotify_test_fsid(dentry: path.dentry, flags, fsid: &__fsid); |
1900 | if (ret) |
1901 | goto path_put_and_out; |
1902 | |
1903 | ret = fanotify_test_fid(dentry: path.dentry, flags); |
1904 | if (ret) |
1905 | goto path_put_and_out; |
1906 | |
1907 | fsid = &__fsid; |
1908 | } |
1909 | |
1910 | /* inode held in place by reference to path; group by fget on fd */ |
1911 | if (mark_type == FAN_MARK_INODE) |
1912 | inode = path.dentry->d_inode; |
1913 | else |
1914 | mnt = path.mnt; |
1915 | |
1916 | ret = mnt ? -EINVAL : -EISDIR; |
1917 | /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ |
1918 | if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && |
1919 | (mnt || S_ISDIR(inode->i_mode)) && |
1920 | !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) |
1921 | goto path_put_and_out; |
1922 | |
1923 | /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ |
1924 | if (mnt || !S_ISDIR(inode->i_mode)) { |
1925 | mask &= ~FAN_EVENT_ON_CHILD; |
1926 | umask = FAN_EVENT_ON_CHILD; |
1927 | /* |
1928 | * If group needs to report parent fid, register for getting |
1929 | * events with parent/name info for non-directory. |
1930 | */ |
1931 | if ((fid_mode & FAN_REPORT_DIR_FID) && |
1932 | (flags & FAN_MARK_ADD) && !ignore) |
1933 | mask |= FAN_EVENT_ON_CHILD; |
1934 | } |
1935 | |
1936 | /* create/update an inode mark */ |
1937 | switch (mark_cmd) { |
1938 | case FAN_MARK_ADD: |
1939 | if (mark_type == FAN_MARK_MOUNT) |
1940 | ret = fanotify_add_vfsmount_mark(group, mnt, mask, |
1941 | flags, fsid); |
1942 | else if (mark_type == FAN_MARK_FILESYSTEM) |
1943 | ret = fanotify_add_sb_mark(group, sb: mnt->mnt_sb, mask, |
1944 | flags, fsid); |
1945 | else |
1946 | ret = fanotify_add_inode_mark(group, inode, mask, |
1947 | flags, fsid); |
1948 | break; |
1949 | case FAN_MARK_REMOVE: |
1950 | if (mark_type == FAN_MARK_MOUNT) |
1951 | ret = fanotify_remove_vfsmount_mark(group, mnt, mask, |
1952 | flags, umask); |
1953 | else if (mark_type == FAN_MARK_FILESYSTEM) |
1954 | ret = fanotify_remove_sb_mark(group, sb: mnt->mnt_sb, mask, |
1955 | flags, umask); |
1956 | else |
1957 | ret = fanotify_remove_inode_mark(group, inode, mask, |
1958 | flags, umask); |
1959 | break; |
1960 | default: |
1961 | ret = -EINVAL; |
1962 | } |
1963 | |
1964 | path_put_and_out: |
1965 | path_put(&path); |
1966 | fput_and_out: |
1967 | fdput(fd: f); |
1968 | return ret; |
1969 | } |
1970 | |
1971 | #ifndef CONFIG_ARCH_SPLIT_ARG64 |
1972 | SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, |
1973 | __u64, mask, int, dfd, |
1974 | const char __user *, pathname) |
1975 | { |
1976 | return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); |
1977 | } |
1978 | #endif |
1979 | |
1980 | #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) |
1981 | SYSCALL32_DEFINE6(fanotify_mark, |
1982 | int, fanotify_fd, unsigned int, flags, |
1983 | SC_ARG64(mask), int, dfd, |
1984 | const char __user *, pathname) |
1985 | { |
1986 | return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), |
1987 | dfd, pathname); |
1988 | } |
1989 | #endif |
1990 | |
1991 | /* |
1992 | * fanotify_user_setup - Our initialization function. Note that we cannot return |
1993 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here |
1994 | * must result in panic(). |
1995 | */ |
1996 | static int __init fanotify_user_setup(void) |
1997 | { |
1998 | struct sysinfo si; |
1999 | int max_marks; |
2000 | |
2001 | si_meminfo(val: &si); |
2002 | /* |
2003 | * Allow up to 1% of addressable memory to be accounted for per user |
2004 | * marks limited to the range [8192, 1048576]. mount and sb marks are |
2005 | * a lot cheaper than inode marks, but there is no reason for a user |
2006 | * to have many of those, so calculate by the cost of inode marks. |
2007 | */ |
2008 | max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / |
2009 | INODE_MARK_COST; |
2010 | max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, |
2011 | FANOTIFY_DEFAULT_MAX_USER_MARKS); |
2012 | |
2013 | BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); |
2014 | BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); |
2015 | BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); |
2016 | |
2017 | fanotify_mark_cache = KMEM_CACHE(fanotify_mark, |
2018 | SLAB_PANIC|SLAB_ACCOUNT); |
2019 | fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, |
2020 | SLAB_PANIC); |
2021 | fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, |
2022 | SLAB_PANIC); |
2023 | if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { |
2024 | fanotify_perm_event_cachep = |
2025 | KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); |
2026 | } |
2027 | |
2028 | fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; |
2029 | init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = |
2030 | FANOTIFY_DEFAULT_MAX_GROUPS; |
2031 | init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; |
2032 | fanotify_sysctls_init(); |
2033 | |
2034 | return 0; |
2035 | } |
2036 | device_initcall(fanotify_user_setup); |
2037 | |