1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/fanotify.h> |
3 | #include <linux/fdtable.h> |
4 | #include <linux/fsnotify_backend.h> |
5 | #include <linux/init.h> |
6 | #include <linux/jiffies.h> |
7 | #include <linux/kernel.h> /* UINT_MAX */ |
8 | #include <linux/mount.h> |
9 | #include <linux/sched.h> |
10 | #include <linux/sched/user.h> |
11 | #include <linux/sched/signal.h> |
12 | #include <linux/types.h> |
13 | #include <linux/wait.h> |
14 | #include <linux/audit.h> |
15 | #include <linux/sched/mm.h> |
16 | #include <linux/statfs.h> |
17 | #include <linux/stringhash.h> |
18 | |
19 | #include "fanotify.h" |
20 | |
21 | static bool fanotify_path_equal(const struct path *p1, const struct path *p2) |
22 | { |
23 | return p1->mnt == p2->mnt && p1->dentry == p2->dentry; |
24 | } |
25 | |
26 | static unsigned int fanotify_hash_path(const struct path *path) |
27 | { |
28 | return hash_ptr(ptr: path->dentry, FANOTIFY_EVENT_HASH_BITS) ^ |
29 | hash_ptr(ptr: path->mnt, FANOTIFY_EVENT_HASH_BITS); |
30 | } |
31 | |
32 | static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, |
33 | __kernel_fsid_t *fsid2) |
34 | { |
35 | return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; |
36 | } |
37 | |
38 | static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) |
39 | { |
40 | return hash_32(val: fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ |
41 | hash_32(val: fsid->val[1], FANOTIFY_EVENT_HASH_BITS); |
42 | } |
43 | |
44 | static bool fanotify_fh_equal(struct fanotify_fh *fh1, |
45 | struct fanotify_fh *fh2) |
46 | { |
47 | if (fh1->type != fh2->type || fh1->len != fh2->len) |
48 | return false; |
49 | |
50 | return !fh1->len || |
51 | !memcmp(p: fanotify_fh_buf(fh: fh1), q: fanotify_fh_buf(fh: fh2), size: fh1->len); |
52 | } |
53 | |
54 | static unsigned int fanotify_hash_fh(struct fanotify_fh *fh) |
55 | { |
56 | long salt = (long)fh->type | (long)fh->len << 8; |
57 | |
58 | /* |
59 | * full_name_hash() works long by long, so it handles fh buf optimally. |
60 | */ |
61 | return full_name_hash(salt: (void *)salt, fanotify_fh_buf(fh), fh->len); |
62 | } |
63 | |
64 | static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, |
65 | struct fanotify_fid_event *ffe2) |
66 | { |
67 | /* Do not merge fid events without object fh */ |
68 | if (!ffe1->object_fh.len) |
69 | return false; |
70 | |
71 | return fanotify_fsid_equal(fsid1: &ffe1->fsid, fsid2: &ffe2->fsid) && |
72 | fanotify_fh_equal(fh1: &ffe1->object_fh, fh2: &ffe2->object_fh); |
73 | } |
74 | |
75 | static bool fanotify_info_equal(struct fanotify_info *info1, |
76 | struct fanotify_info *info2) |
77 | { |
78 | if (info1->dir_fh_totlen != info2->dir_fh_totlen || |
79 | info1->dir2_fh_totlen != info2->dir2_fh_totlen || |
80 | info1->file_fh_totlen != info2->file_fh_totlen || |
81 | info1->name_len != info2->name_len || |
82 | info1->name2_len != info2->name2_len) |
83 | return false; |
84 | |
85 | if (info1->dir_fh_totlen && |
86 | !fanotify_fh_equal(fh1: fanotify_info_dir_fh(info: info1), |
87 | fh2: fanotify_info_dir_fh(info: info2))) |
88 | return false; |
89 | |
90 | if (info1->dir2_fh_totlen && |
91 | !fanotify_fh_equal(fh1: fanotify_info_dir2_fh(info: info1), |
92 | fh2: fanotify_info_dir2_fh(info: info2))) |
93 | return false; |
94 | |
95 | if (info1->file_fh_totlen && |
96 | !fanotify_fh_equal(fh1: fanotify_info_file_fh(info: info1), |
97 | fh2: fanotify_info_file_fh(info: info2))) |
98 | return false; |
99 | |
100 | if (info1->name_len && |
101 | memcmp(p: fanotify_info_name(info: info1), q: fanotify_info_name(info: info2), |
102 | size: info1->name_len)) |
103 | return false; |
104 | |
105 | return !info1->name2_len || |
106 | !memcmp(p: fanotify_info_name2(info: info1), q: fanotify_info_name2(info: info2), |
107 | size: info1->name2_len); |
108 | } |
109 | |
110 | static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, |
111 | struct fanotify_name_event *fne2) |
112 | { |
113 | struct fanotify_info *info1 = &fne1->info; |
114 | struct fanotify_info *info2 = &fne2->info; |
115 | |
116 | /* Do not merge name events without dir fh */ |
117 | if (!info1->dir_fh_totlen) |
118 | return false; |
119 | |
120 | if (!fanotify_fsid_equal(fsid1: &fne1->fsid, fsid2: &fne2->fsid)) |
121 | return false; |
122 | |
123 | return fanotify_info_equal(info1, info2); |
124 | } |
125 | |
126 | static bool fanotify_error_event_equal(struct fanotify_error_event *fee1, |
127 | struct fanotify_error_event *fee2) |
128 | { |
129 | /* Error events against the same file system are always merged. */ |
130 | if (!fanotify_fsid_equal(fsid1: &fee1->fsid, fsid2: &fee2->fsid)) |
131 | return false; |
132 | |
133 | return true; |
134 | } |
135 | |
136 | static bool fanotify_should_merge(struct fanotify_event *old, |
137 | struct fanotify_event *new) |
138 | { |
139 | pr_debug("%s: old=%p new=%p\n" , __func__, old, new); |
140 | |
141 | if (old->hash != new->hash || |
142 | old->type != new->type || old->pid != new->pid) |
143 | return false; |
144 | |
145 | /* |
146 | * We want to merge many dirent events in the same dir (i.e. |
147 | * creates/unlinks/renames), but we do not want to merge dirent |
148 | * events referring to subdirs with dirent events referring to |
149 | * non subdirs, otherwise, user won't be able to tell from a |
150 | * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ |
151 | * unlink pair or rmdir+create pair of events. |
152 | */ |
153 | if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) |
154 | return false; |
155 | |
156 | /* |
157 | * FAN_RENAME event is reported with special info record types, |
158 | * so we cannot merge it with other events. |
159 | */ |
160 | if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) |
161 | return false; |
162 | |
163 | switch (old->type) { |
164 | case FANOTIFY_EVENT_TYPE_PATH: |
165 | return fanotify_path_equal(p1: fanotify_event_path(event: old), |
166 | p2: fanotify_event_path(event: new)); |
167 | case FANOTIFY_EVENT_TYPE_FID: |
168 | return fanotify_fid_event_equal(ffe1: FANOTIFY_FE(event: old), |
169 | ffe2: FANOTIFY_FE(event: new)); |
170 | case FANOTIFY_EVENT_TYPE_FID_NAME: |
171 | return fanotify_name_event_equal(fne1: FANOTIFY_NE(event: old), |
172 | fne2: FANOTIFY_NE(event: new)); |
173 | case FANOTIFY_EVENT_TYPE_FS_ERROR: |
174 | return fanotify_error_event_equal(fee1: FANOTIFY_EE(event: old), |
175 | fee2: FANOTIFY_EE(event: new)); |
176 | default: |
177 | WARN_ON_ONCE(1); |
178 | } |
179 | |
180 | return false; |
181 | } |
182 | |
183 | /* Limit event merges to limit CPU overhead per event */ |
184 | #define FANOTIFY_MAX_MERGE_EVENTS 128 |
185 | |
186 | /* and the list better be locked by something too! */ |
187 | static int fanotify_merge(struct fsnotify_group *group, |
188 | struct fsnotify_event *event) |
189 | { |
190 | struct fanotify_event *old, *new = FANOTIFY_E(fse: event); |
191 | unsigned int bucket = fanotify_event_hash_bucket(group, event: new); |
192 | struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; |
193 | int i = 0; |
194 | |
195 | pr_debug("%s: group=%p event=%p bucket=%u\n" , __func__, |
196 | group, event, bucket); |
197 | |
198 | /* |
199 | * Don't merge a permission event with any other event so that we know |
200 | * the event structure we have created in fanotify_handle_event() is the |
201 | * one we should check for permission response. |
202 | */ |
203 | if (fanotify_is_perm_event(mask: new->mask)) |
204 | return 0; |
205 | |
206 | hlist_for_each_entry(old, hlist, merge_list) { |
207 | if (++i > FANOTIFY_MAX_MERGE_EVENTS) |
208 | break; |
209 | if (fanotify_should_merge(old, new)) { |
210 | old->mask |= new->mask; |
211 | |
212 | if (fanotify_is_error_event(mask: old->mask)) |
213 | FANOTIFY_EE(event: old)->err_count++; |
214 | |
215 | return 1; |
216 | } |
217 | } |
218 | |
219 | return 0; |
220 | } |
221 | |
222 | /* |
223 | * Wait for response to permission event. The function also takes care of |
224 | * freeing the permission event (or offloads that in case the wait is canceled |
225 | * by a signal). The function returns 0 in case access got allowed by userspace, |
226 | * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case |
227 | * the wait got interrupted by a signal. |
228 | */ |
229 | static int fanotify_get_response(struct fsnotify_group *group, |
230 | struct fanotify_perm_event *event, |
231 | struct fsnotify_iter_info *iter_info) |
232 | { |
233 | int ret; |
234 | |
235 | pr_debug("%s: group=%p event=%p\n" , __func__, group, event); |
236 | |
237 | ret = wait_event_killable(group->fanotify_data.access_waitq, |
238 | event->state == FAN_EVENT_ANSWERED); |
239 | /* Signal pending? */ |
240 | if (ret < 0) { |
241 | spin_lock(lock: &group->notification_lock); |
242 | /* Event reported to userspace and no answer yet? */ |
243 | if (event->state == FAN_EVENT_REPORTED) { |
244 | /* Event will get freed once userspace answers to it */ |
245 | event->state = FAN_EVENT_CANCELED; |
246 | spin_unlock(lock: &group->notification_lock); |
247 | return ret; |
248 | } |
249 | /* Event not yet reported? Just remove it. */ |
250 | if (event->state == FAN_EVENT_INIT) { |
251 | fsnotify_remove_queued_event(group, event: &event->fae.fse); |
252 | /* Permission events are not supposed to be hashed */ |
253 | WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list)); |
254 | } |
255 | /* |
256 | * Event may be also answered in case signal delivery raced |
257 | * with wakeup. In that case we have nothing to do besides |
258 | * freeing the event and reporting error. |
259 | */ |
260 | spin_unlock(lock: &group->notification_lock); |
261 | goto out; |
262 | } |
263 | |
264 | /* userspace responded, convert to something usable */ |
265 | switch (event->response & FANOTIFY_RESPONSE_ACCESS) { |
266 | case FAN_ALLOW: |
267 | ret = 0; |
268 | break; |
269 | case FAN_DENY: |
270 | default: |
271 | ret = -EPERM; |
272 | } |
273 | |
274 | /* Check if the response should be audited */ |
275 | if (event->response & FAN_AUDIT) |
276 | audit_fanotify(response: event->response & ~FAN_AUDIT, |
277 | friar: &event->audit_rule); |
278 | |
279 | pr_debug("%s: group=%p event=%p about to return ret=%d\n" , __func__, |
280 | group, event, ret); |
281 | out: |
282 | fsnotify_destroy_event(group, event: &event->fae.fse); |
283 | |
284 | return ret; |
285 | } |
286 | |
287 | /* |
288 | * This function returns a mask for an event that only contains the flags |
289 | * that have been specifically requested by the user. Flags that may have |
290 | * been included within the event mask, but have not been explicitly |
291 | * requested by the user, will not be present in the returned mask. |
292 | */ |
293 | static u32 fanotify_group_event_mask(struct fsnotify_group *group, |
294 | struct fsnotify_iter_info *iter_info, |
295 | u32 *match_mask, u32 event_mask, |
296 | const void *data, int data_type, |
297 | struct inode *dir) |
298 | { |
299 | __u32 marks_mask = 0, marks_ignore_mask = 0; |
300 | __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS | |
301 | FANOTIFY_EVENT_FLAGS; |
302 | const struct path *path = fsnotify_data_path(data, data_type); |
303 | unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
304 | struct fsnotify_mark *mark; |
305 | bool ondir = event_mask & FAN_ONDIR; |
306 | int type; |
307 | |
308 | pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n" , |
309 | __func__, iter_info->report_mask, event_mask, data, data_type); |
310 | |
311 | if (!fid_mode) { |
312 | /* Do we have path to open a file descriptor? */ |
313 | if (!path) |
314 | return 0; |
315 | /* Path type events are only relevant for files and dirs */ |
316 | if (!d_is_reg(dentry: path->dentry) && !d_can_lookup(dentry: path->dentry)) |
317 | return 0; |
318 | } else if (!(fid_mode & FAN_REPORT_FID)) { |
319 | /* Do we have a directory inode to report? */ |
320 | if (!dir && !ondir) |
321 | return 0; |
322 | } |
323 | |
324 | fsnotify_foreach_iter_mark_type(iter_info, mark, type) { |
325 | /* |
326 | * Apply ignore mask depending on event flags in ignore mask. |
327 | */ |
328 | marks_ignore_mask |= |
329 | fsnotify_effective_ignore_mask(mark, is_dir: ondir, iter_type: type); |
330 | |
331 | /* |
332 | * Send the event depending on event flags in mark mask. |
333 | */ |
334 | if (!fsnotify_mask_applicable(mask: mark->mask, is_dir: ondir, iter_type: type)) |
335 | continue; |
336 | |
337 | marks_mask |= mark->mask; |
338 | |
339 | /* Record the mark types of this group that matched the event */ |
340 | *match_mask |= 1U << type; |
341 | } |
342 | |
343 | test_mask = event_mask & marks_mask & ~marks_ignore_mask; |
344 | |
345 | /* |
346 | * For dirent modification events (create/delete/move) that do not carry |
347 | * the child entry name information, we report FAN_ONDIR for mkdir/rmdir |
348 | * so user can differentiate them from creat/unlink. |
349 | * |
350 | * For backward compatibility and consistency, do not report FAN_ONDIR |
351 | * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR |
352 | * to user in fid mode for all event types. |
353 | * |
354 | * We never report FAN_EVENT_ON_CHILD to user, but we do pass it in to |
355 | * fanotify_alloc_event() when group is reporting fid as indication |
356 | * that event happened on child. |
357 | */ |
358 | if (fid_mode) { |
359 | /* Do not report event flags without any event */ |
360 | if (!(test_mask & ~FANOTIFY_EVENT_FLAGS)) |
361 | return 0; |
362 | } else { |
363 | user_mask &= ~FANOTIFY_EVENT_FLAGS; |
364 | } |
365 | |
366 | return test_mask & user_mask; |
367 | } |
368 | |
369 | /* |
370 | * Check size needed to encode fanotify_fh. |
371 | * |
372 | * Return size of encoded fh without fanotify_fh header. |
373 | * Return 0 on failure to encode. |
374 | */ |
375 | static int fanotify_encode_fh_len(struct inode *inode) |
376 | { |
377 | int dwords = 0; |
378 | int fh_len; |
379 | |
380 | if (!inode) |
381 | return 0; |
382 | |
383 | exportfs_encode_fid(inode, NULL, max_len: &dwords); |
384 | fh_len = dwords << 2; |
385 | |
386 | /* |
387 | * struct fanotify_error_event might be preallocated and is |
388 | * limited to MAX_HANDLE_SZ. This should never happen, but |
389 | * safeguard by forcing an invalid file handle. |
390 | */ |
391 | if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) |
392 | return 0; |
393 | |
394 | return fh_len; |
395 | } |
396 | |
397 | /* |
398 | * Encode fanotify_fh. |
399 | * |
400 | * Return total size of encoded fh including fanotify_fh header. |
401 | * Return 0 on failure to encode. |
402 | */ |
403 | static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, |
404 | unsigned int fh_len, unsigned int *hash, |
405 | gfp_t gfp) |
406 | { |
407 | int dwords, type = 0; |
408 | char *ext_buf = NULL; |
409 | void *buf = fh->buf; |
410 | int err; |
411 | |
412 | fh->type = FILEID_ROOT; |
413 | fh->len = 0; |
414 | fh->flags = 0; |
415 | |
416 | /* |
417 | * Invalid FHs are used by FAN_FS_ERROR for errors not |
418 | * linked to any inode. The f_handle won't be reported |
419 | * back to userspace. |
420 | */ |
421 | if (!inode) |
422 | goto out; |
423 | |
424 | /* |
425 | * !gpf means preallocated variable size fh, but fh_len could |
426 | * be zero in that case if encoding fh len failed. |
427 | */ |
428 | err = -ENOENT; |
429 | if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ) |
430 | goto out_err; |
431 | |
432 | /* No external buffer in a variable size allocated fh */ |
433 | if (gfp && fh_len > FANOTIFY_INLINE_FH_LEN) { |
434 | /* Treat failure to allocate fh as failure to encode fh */ |
435 | err = -ENOMEM; |
436 | ext_buf = kmalloc(size: fh_len, flags: gfp); |
437 | if (!ext_buf) |
438 | goto out_err; |
439 | |
440 | *fanotify_fh_ext_buf_ptr(fh) = ext_buf; |
441 | buf = ext_buf; |
442 | fh->flags |= FANOTIFY_FH_FLAG_EXT_BUF; |
443 | } |
444 | |
445 | dwords = fh_len >> 2; |
446 | type = exportfs_encode_fid(inode, fid: buf, max_len: &dwords); |
447 | err = -EINVAL; |
448 | if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2) |
449 | goto out_err; |
450 | |
451 | fh->type = type; |
452 | fh->len = fh_len; |
453 | |
454 | out: |
455 | /* |
456 | * Mix fh into event merge key. Hash might be NULL in case of |
457 | * unhashed FID events (i.e. FAN_FS_ERROR). |
458 | */ |
459 | if (hash) |
460 | *hash ^= fanotify_hash_fh(fh); |
461 | |
462 | return FANOTIFY_FH_HDR_LEN + fh_len; |
463 | |
464 | out_err: |
465 | pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n" , |
466 | type, fh_len, err); |
467 | kfree(objp: ext_buf); |
468 | *fanotify_fh_ext_buf_ptr(fh) = NULL; |
469 | /* Report the event without a file identifier on encode error */ |
470 | fh->type = FILEID_INVALID; |
471 | fh->len = 0; |
472 | return 0; |
473 | } |
474 | |
475 | /* |
476 | * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for |
477 | * some events and the fid of the parent for create/delete/move events. |
478 | * |
479 | * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported |
480 | * also in create/delete/move events in addition to the fid of the parent |
481 | * and the name of the child. |
482 | */ |
483 | static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask) |
484 | { |
485 | if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) |
486 | return (fid_mode & FAN_REPORT_TARGET_FID); |
487 | |
488 | return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR); |
489 | } |
490 | |
491 | /* |
492 | * The inode to use as identifier when reporting fid depends on the event |
493 | * and the group flags. |
494 | * |
495 | * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. |
496 | * |
497 | * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory |
498 | * fid on dirent events and the child fid otherwise. |
499 | * |
500 | * For example: |
501 | * FS_ATTRIB reports the child fid even if reported on a watched parent. |
502 | * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. |
503 | * and reports the created child fid with FAN_REPORT_TARGET_FID. |
504 | */ |
505 | static struct inode *fanotify_fid_inode(u32 event_mask, const void *data, |
506 | int data_type, struct inode *dir, |
507 | unsigned int fid_mode) |
508 | { |
509 | if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) && |
510 | !(fid_mode & FAN_REPORT_TARGET_FID)) |
511 | return dir; |
512 | |
513 | return fsnotify_data_inode(data, data_type); |
514 | } |
515 | |
516 | /* |
517 | * The inode to use as identifier when reporting dir fid depends on the event. |
518 | * Report the modified directory inode on dirent modification events. |
519 | * Report the "victim" inode if "victim" is a directory. |
520 | * Report the parent inode if "victim" is not a directory and event is |
521 | * reported to parent. |
522 | * Otherwise, do not report dir fid. |
523 | */ |
524 | static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, |
525 | int data_type, struct inode *dir) |
526 | { |
527 | struct inode *inode = fsnotify_data_inode(data, data_type); |
528 | |
529 | if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) |
530 | return dir; |
531 | |
532 | if (inode && S_ISDIR(inode->i_mode)) |
533 | return inode; |
534 | |
535 | return dir; |
536 | } |
537 | |
538 | static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, |
539 | unsigned int *hash, |
540 | gfp_t gfp) |
541 | { |
542 | struct fanotify_path_event *pevent; |
543 | |
544 | pevent = kmem_cache_alloc(cachep: fanotify_path_event_cachep, flags: gfp); |
545 | if (!pevent) |
546 | return NULL; |
547 | |
548 | pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH; |
549 | pevent->path = *path; |
550 | *hash ^= fanotify_hash_path(path); |
551 | path_get(path); |
552 | |
553 | return &pevent->fae; |
554 | } |
555 | |
556 | static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, |
557 | gfp_t gfp) |
558 | { |
559 | struct fanotify_perm_event *pevent; |
560 | |
561 | pevent = kmem_cache_alloc(cachep: fanotify_perm_event_cachep, flags: gfp); |
562 | if (!pevent) |
563 | return NULL; |
564 | |
565 | pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH_PERM; |
566 | pevent->response = 0; |
567 | pevent->hdr.type = FAN_RESPONSE_INFO_NONE; |
568 | pevent->hdr.pad = 0; |
569 | pevent->hdr.len = 0; |
570 | pevent->state = FAN_EVENT_INIT; |
571 | pevent->path = *path; |
572 | path_get(path); |
573 | |
574 | return &pevent->fae; |
575 | } |
576 | |
577 | static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, |
578 | __kernel_fsid_t *fsid, |
579 | unsigned int *hash, |
580 | gfp_t gfp) |
581 | { |
582 | struct fanotify_fid_event *ffe; |
583 | |
584 | ffe = kmem_cache_alloc(cachep: fanotify_fid_event_cachep, flags: gfp); |
585 | if (!ffe) |
586 | return NULL; |
587 | |
588 | ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; |
589 | ffe->fsid = *fsid; |
590 | *hash ^= fanotify_hash_fsid(fsid); |
591 | fanotify_encode_fh(fh: &ffe->object_fh, inode: id, fh_len: fanotify_encode_fh_len(inode: id), |
592 | hash, gfp); |
593 | |
594 | return &ffe->fae; |
595 | } |
596 | |
597 | static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, |
598 | __kernel_fsid_t *fsid, |
599 | const struct qstr *name, |
600 | struct inode *child, |
601 | struct dentry *moved, |
602 | unsigned int *hash, |
603 | gfp_t gfp) |
604 | { |
605 | struct fanotify_name_event *fne; |
606 | struct fanotify_info *info; |
607 | struct fanotify_fh *dfh, *ffh; |
608 | struct inode *dir2 = moved ? d_inode(dentry: moved->d_parent) : NULL; |
609 | const struct qstr *name2 = moved ? &moved->d_name : NULL; |
610 | unsigned int dir_fh_len = fanotify_encode_fh_len(inode: dir); |
611 | unsigned int dir2_fh_len = fanotify_encode_fh_len(inode: dir2); |
612 | unsigned int child_fh_len = fanotify_encode_fh_len(inode: child); |
613 | unsigned long name_len = name ? name->len : 0; |
614 | unsigned long name2_len = name2 ? name2->len : 0; |
615 | unsigned int len, size; |
616 | |
617 | /* Reserve terminating null byte even for empty name */ |
618 | size = sizeof(*fne) + name_len + name2_len + 2; |
619 | if (dir_fh_len) |
620 | size += FANOTIFY_FH_HDR_LEN + dir_fh_len; |
621 | if (dir2_fh_len) |
622 | size += FANOTIFY_FH_HDR_LEN + dir2_fh_len; |
623 | if (child_fh_len) |
624 | size += FANOTIFY_FH_HDR_LEN + child_fh_len; |
625 | fne = kmalloc(size, flags: gfp); |
626 | if (!fne) |
627 | return NULL; |
628 | |
629 | fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME; |
630 | fne->fsid = *fsid; |
631 | *hash ^= fanotify_hash_fsid(fsid); |
632 | info = &fne->info; |
633 | fanotify_info_init(info); |
634 | if (dir_fh_len) { |
635 | dfh = fanotify_info_dir_fh(info); |
636 | len = fanotify_encode_fh(fh: dfh, inode: dir, fh_len: dir_fh_len, hash, gfp: 0); |
637 | fanotify_info_set_dir_fh(info, totlen: len); |
638 | } |
639 | if (dir2_fh_len) { |
640 | dfh = fanotify_info_dir2_fh(info); |
641 | len = fanotify_encode_fh(fh: dfh, inode: dir2, fh_len: dir2_fh_len, hash, gfp: 0); |
642 | fanotify_info_set_dir2_fh(info, totlen: len); |
643 | } |
644 | if (child_fh_len) { |
645 | ffh = fanotify_info_file_fh(info); |
646 | len = fanotify_encode_fh(fh: ffh, inode: child, fh_len: child_fh_len, hash, gfp: 0); |
647 | fanotify_info_set_file_fh(info, totlen: len); |
648 | } |
649 | if (name_len) { |
650 | fanotify_info_copy_name(info, name); |
651 | *hash ^= full_name_hash(salt: (void *)name_len, name->name, name_len); |
652 | } |
653 | if (name2_len) { |
654 | fanotify_info_copy_name2(info, name: name2); |
655 | *hash ^= full_name_hash(salt: (void *)name2_len, name2->name, |
656 | name2_len); |
657 | } |
658 | |
659 | pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n" , |
660 | __func__, size, dir_fh_len, child_fh_len, |
661 | info->name_len, info->name_len, fanotify_info_name(info)); |
662 | |
663 | if (dir2_fh_len) { |
664 | pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n" , |
665 | __func__, dir2_fh_len, info->name2_len, |
666 | info->name2_len, fanotify_info_name2(info)); |
667 | } |
668 | |
669 | return &fne->fae; |
670 | } |
671 | |
672 | static struct fanotify_event *fanotify_alloc_error_event( |
673 | struct fsnotify_group *group, |
674 | __kernel_fsid_t *fsid, |
675 | const void *data, int data_type, |
676 | unsigned int *hash) |
677 | { |
678 | struct fs_error_report *report = |
679 | fsnotify_data_error_report(data, data_type); |
680 | struct inode *inode; |
681 | struct fanotify_error_event *fee; |
682 | int fh_len; |
683 | |
684 | if (WARN_ON_ONCE(!report)) |
685 | return NULL; |
686 | |
687 | fee = mempool_alloc(pool: &group->fanotify_data.error_events_pool, GFP_NOFS); |
688 | if (!fee) |
689 | return NULL; |
690 | |
691 | fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR; |
692 | fee->error = report->error; |
693 | fee->err_count = 1; |
694 | fee->fsid = *fsid; |
695 | |
696 | inode = report->inode; |
697 | fh_len = fanotify_encode_fh_len(inode); |
698 | |
699 | /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ |
700 | if (!fh_len && inode) |
701 | inode = NULL; |
702 | |
703 | fanotify_encode_fh(fh: &fee->object_fh, inode, fh_len, NULL, gfp: 0); |
704 | |
705 | *hash ^= fanotify_hash_fsid(fsid); |
706 | |
707 | return &fee->fae; |
708 | } |
709 | |
710 | static struct fanotify_event *fanotify_alloc_event( |
711 | struct fsnotify_group *group, |
712 | u32 mask, const void *data, int data_type, |
713 | struct inode *dir, const struct qstr *file_name, |
714 | __kernel_fsid_t *fsid, u32 match_mask) |
715 | { |
716 | struct fanotify_event *event = NULL; |
717 | gfp_t gfp = GFP_KERNEL_ACCOUNT; |
718 | unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
719 | struct inode *id = fanotify_fid_inode(event_mask: mask, data, data_type, dir, |
720 | fid_mode); |
721 | struct inode *dirid = fanotify_dfid_inode(event_mask: mask, data, data_type, dir); |
722 | const struct path *path = fsnotify_data_path(data, data_type); |
723 | struct mem_cgroup *old_memcg; |
724 | struct dentry *moved = NULL; |
725 | struct inode *child = NULL; |
726 | bool name_event = false; |
727 | unsigned int hash = 0; |
728 | bool ondir = mask & FAN_ONDIR; |
729 | struct pid *pid; |
730 | |
731 | if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { |
732 | /* |
733 | * For certain events and group flags, report the child fid |
734 | * in addition to reporting the parent fid and maybe child name. |
735 | */ |
736 | if (fanotify_report_child_fid(fid_mode, mask) && id != dirid) |
737 | child = id; |
738 | |
739 | id = dirid; |
740 | |
741 | /* |
742 | * We record file name only in a group with FAN_REPORT_NAME |
743 | * and when we have a directory inode to report. |
744 | * |
745 | * For directory entry modification event, we record the fid of |
746 | * the directory and the name of the modified entry. |
747 | * |
748 | * For event on non-directory that is reported to parent, we |
749 | * record the fid of the parent and the name of the child. |
750 | * |
751 | * Even if not reporting name, we need a variable length |
752 | * fanotify_name_event if reporting both parent and child fids. |
753 | */ |
754 | if (!(fid_mode & FAN_REPORT_NAME)) { |
755 | name_event = !!child; |
756 | file_name = NULL; |
757 | } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { |
758 | name_event = true; |
759 | } |
760 | |
761 | /* |
762 | * In the special case of FAN_RENAME event, use the match_mask |
763 | * to determine if we need to report only the old parent+name, |
764 | * only the new parent+name or both. |
765 | * 'dirid' and 'file_name' are the old parent+name and |
766 | * 'moved' has the new parent+name. |
767 | */ |
768 | if (mask & FAN_RENAME) { |
769 | bool report_old, report_new; |
770 | |
771 | if (WARN_ON_ONCE(!match_mask)) |
772 | return NULL; |
773 | |
774 | /* Report both old and new parent+name if sb watching */ |
775 | report_old = report_new = |
776 | match_mask & (1U << FSNOTIFY_ITER_TYPE_SB); |
777 | report_old |= |
778 | match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE); |
779 | report_new |= |
780 | match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2); |
781 | |
782 | if (!report_old) { |
783 | /* Do not report old parent+name */ |
784 | dirid = NULL; |
785 | file_name = NULL; |
786 | } |
787 | if (report_new) { |
788 | /* Report new parent+name */ |
789 | moved = fsnotify_data_dentry(data, data_type); |
790 | } |
791 | } |
792 | } |
793 | |
794 | /* |
795 | * For queues with unlimited length lost events are not expected and |
796 | * can possibly have security implications. Avoid losing events when |
797 | * memory is short. For the limited size queues, avoid OOM killer in the |
798 | * target monitoring memcg as it may have security repercussion. |
799 | */ |
800 | if (group->max_events == UINT_MAX) |
801 | gfp |= __GFP_NOFAIL; |
802 | else |
803 | gfp |= __GFP_RETRY_MAYFAIL; |
804 | |
805 | /* Whoever is interested in the event, pays for the allocation. */ |
806 | old_memcg = set_active_memcg(group->memcg); |
807 | |
808 | if (fanotify_is_perm_event(mask)) { |
809 | event = fanotify_alloc_perm_event(path, gfp); |
810 | } else if (fanotify_is_error_event(mask)) { |
811 | event = fanotify_alloc_error_event(group, fsid, data, |
812 | data_type, hash: &hash); |
813 | } else if (name_event && (file_name || moved || child)) { |
814 | event = fanotify_alloc_name_event(dir: dirid, fsid, name: file_name, child, |
815 | moved, hash: &hash, gfp); |
816 | } else if (fid_mode) { |
817 | event = fanotify_alloc_fid_event(id, fsid, hash: &hash, gfp); |
818 | } else { |
819 | event = fanotify_alloc_path_event(path, hash: &hash, gfp); |
820 | } |
821 | |
822 | if (!event) |
823 | goto out; |
824 | |
825 | if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) |
826 | pid = get_pid(pid: task_pid(current)); |
827 | else |
828 | pid = get_pid(pid: task_tgid(current)); |
829 | |
830 | /* Mix event info, FAN_ONDIR flag and pid into event merge key */ |
831 | hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS); |
832 | fanotify_init_event(event, hash, mask); |
833 | event->pid = pid; |
834 | |
835 | out: |
836 | set_active_memcg(old_memcg); |
837 | return event; |
838 | } |
839 | |
840 | /* |
841 | * Get cached fsid of the filesystem containing the object from any connector. |
842 | * All connectors are supposed to have the same fsid, but we do not verify that |
843 | * here. |
844 | */ |
845 | static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) |
846 | { |
847 | struct fsnotify_mark *mark; |
848 | int type; |
849 | __kernel_fsid_t fsid = {}; |
850 | |
851 | fsnotify_foreach_iter_mark_type(iter_info, mark, type) { |
852 | struct fsnotify_mark_connector *conn; |
853 | |
854 | conn = READ_ONCE(mark->connector); |
855 | /* Mark is just getting destroyed or created? */ |
856 | if (!conn) |
857 | continue; |
858 | if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) |
859 | continue; |
860 | /* Pairs with smp_wmb() in fsnotify_add_mark_list() */ |
861 | smp_rmb(); |
862 | fsid = conn->fsid; |
863 | if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) |
864 | continue; |
865 | return fsid; |
866 | } |
867 | |
868 | return fsid; |
869 | } |
870 | |
871 | /* |
872 | * Add an event to hash table for faster merge. |
873 | */ |
874 | static void fanotify_insert_event(struct fsnotify_group *group, |
875 | struct fsnotify_event *fsn_event) |
876 | { |
877 | struct fanotify_event *event = FANOTIFY_E(fse: fsn_event); |
878 | unsigned int bucket = fanotify_event_hash_bucket(group, event); |
879 | struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; |
880 | |
881 | assert_spin_locked(&group->notification_lock); |
882 | |
883 | if (!fanotify_is_hashed_event(mask: event->mask)) |
884 | return; |
885 | |
886 | pr_debug("%s: group=%p event=%p bucket=%u\n" , __func__, |
887 | group, event, bucket); |
888 | |
889 | hlist_add_head(n: &event->merge_list, h: hlist); |
890 | } |
891 | |
892 | static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, |
893 | const void *data, int data_type, |
894 | struct inode *dir, |
895 | const struct qstr *file_name, u32 cookie, |
896 | struct fsnotify_iter_info *iter_info) |
897 | { |
898 | int ret = 0; |
899 | struct fanotify_event *event; |
900 | struct fsnotify_event *fsn_event; |
901 | __kernel_fsid_t fsid = {}; |
902 | u32 match_mask = 0; |
903 | |
904 | BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); |
905 | BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); |
906 | BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB); |
907 | BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); |
908 | BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); |
909 | BUILD_BUG_ON(FAN_OPEN != FS_OPEN); |
910 | BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO); |
911 | BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); |
912 | BUILD_BUG_ON(FAN_CREATE != FS_CREATE); |
913 | BUILD_BUG_ON(FAN_DELETE != FS_DELETE); |
914 | BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); |
915 | BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); |
916 | BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); |
917 | BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); |
918 | BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); |
919 | BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); |
920 | BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); |
921 | BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); |
922 | BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); |
923 | BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); |
924 | BUILD_BUG_ON(FAN_RENAME != FS_RENAME); |
925 | |
926 | BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); |
927 | |
928 | mask = fanotify_group_event_mask(group, iter_info, match_mask: &match_mask, |
929 | event_mask: mask, data, data_type, dir); |
930 | if (!mask) |
931 | return 0; |
932 | |
933 | pr_debug("%s: group=%p mask=%x report_mask=%x\n" , __func__, |
934 | group, mask, match_mask); |
935 | |
936 | if (fanotify_is_perm_event(mask)) { |
937 | /* |
938 | * fsnotify_prepare_user_wait() fails if we race with mark |
939 | * deletion. Just let the operation pass in that case. |
940 | */ |
941 | if (!fsnotify_prepare_user_wait(iter_info)) |
942 | return 0; |
943 | } |
944 | |
945 | if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) { |
946 | fsid = fanotify_get_fsid(iter_info); |
947 | /* Racing with mark destruction or creation? */ |
948 | if (!fsid.val[0] && !fsid.val[1]) |
949 | return 0; |
950 | } |
951 | |
952 | event = fanotify_alloc_event(group, mask, data, data_type, dir, |
953 | file_name, fsid: &fsid, match_mask); |
954 | ret = -ENOMEM; |
955 | if (unlikely(!event)) { |
956 | /* |
957 | * We don't queue overflow events for permission events as |
958 | * there the access is denied and so no event is in fact lost. |
959 | */ |
960 | if (!fanotify_is_perm_event(mask)) |
961 | fsnotify_queue_overflow(group); |
962 | goto finish; |
963 | } |
964 | |
965 | fsn_event = &event->fse; |
966 | ret = fsnotify_insert_event(group, event: fsn_event, merge: fanotify_merge, |
967 | insert: fanotify_insert_event); |
968 | if (ret) { |
969 | /* Permission events shouldn't be merged */ |
970 | BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); |
971 | /* Our event wasn't used in the end. Free it. */ |
972 | fsnotify_destroy_event(group, event: fsn_event); |
973 | |
974 | ret = 0; |
975 | } else if (fanotify_is_perm_event(mask)) { |
976 | ret = fanotify_get_response(group, event: FANOTIFY_PERM(event), |
977 | iter_info); |
978 | } |
979 | finish: |
980 | if (fanotify_is_perm_event(mask)) |
981 | fsnotify_finish_user_wait(iter_info); |
982 | |
983 | return ret; |
984 | } |
985 | |
986 | static void fanotify_free_group_priv(struct fsnotify_group *group) |
987 | { |
988 | kfree(objp: group->fanotify_data.merge_hash); |
989 | if (group->fanotify_data.ucounts) |
990 | dec_ucount(ucounts: group->fanotify_data.ucounts, |
991 | type: UCOUNT_FANOTIFY_GROUPS); |
992 | |
993 | if (mempool_initialized(pool: &group->fanotify_data.error_events_pool)) |
994 | mempool_exit(pool: &group->fanotify_data.error_events_pool); |
995 | } |
996 | |
997 | static void fanotify_free_path_event(struct fanotify_event *event) |
998 | { |
999 | path_put(fanotify_event_path(event)); |
1000 | kmem_cache_free(s: fanotify_path_event_cachep, objp: FANOTIFY_PE(event)); |
1001 | } |
1002 | |
1003 | static void fanotify_free_perm_event(struct fanotify_event *event) |
1004 | { |
1005 | path_put(fanotify_event_path(event)); |
1006 | kmem_cache_free(s: fanotify_perm_event_cachep, objp: FANOTIFY_PERM(event)); |
1007 | } |
1008 | |
1009 | static void fanotify_free_fid_event(struct fanotify_event *event) |
1010 | { |
1011 | struct fanotify_fid_event *ffe = FANOTIFY_FE(event); |
1012 | |
1013 | if (fanotify_fh_has_ext_buf(fh: &ffe->object_fh)) |
1014 | kfree(objp: fanotify_fh_ext_buf(fh: &ffe->object_fh)); |
1015 | kmem_cache_free(s: fanotify_fid_event_cachep, objp: ffe); |
1016 | } |
1017 | |
1018 | static void fanotify_free_name_event(struct fanotify_event *event) |
1019 | { |
1020 | kfree(objp: FANOTIFY_NE(event)); |
1021 | } |
1022 | |
1023 | static void fanotify_free_error_event(struct fsnotify_group *group, |
1024 | struct fanotify_event *event) |
1025 | { |
1026 | struct fanotify_error_event *fee = FANOTIFY_EE(event); |
1027 | |
1028 | mempool_free(element: fee, pool: &group->fanotify_data.error_events_pool); |
1029 | } |
1030 | |
1031 | static void fanotify_free_event(struct fsnotify_group *group, |
1032 | struct fsnotify_event *fsn_event) |
1033 | { |
1034 | struct fanotify_event *event; |
1035 | |
1036 | event = FANOTIFY_E(fse: fsn_event); |
1037 | put_pid(pid: event->pid); |
1038 | switch (event->type) { |
1039 | case FANOTIFY_EVENT_TYPE_PATH: |
1040 | fanotify_free_path_event(event); |
1041 | break; |
1042 | case FANOTIFY_EVENT_TYPE_PATH_PERM: |
1043 | fanotify_free_perm_event(event); |
1044 | break; |
1045 | case FANOTIFY_EVENT_TYPE_FID: |
1046 | fanotify_free_fid_event(event); |
1047 | break; |
1048 | case FANOTIFY_EVENT_TYPE_FID_NAME: |
1049 | fanotify_free_name_event(event); |
1050 | break; |
1051 | case FANOTIFY_EVENT_TYPE_OVERFLOW: |
1052 | kfree(objp: event); |
1053 | break; |
1054 | case FANOTIFY_EVENT_TYPE_FS_ERROR: |
1055 | fanotify_free_error_event(group, event); |
1056 | break; |
1057 | default: |
1058 | WARN_ON_ONCE(1); |
1059 | } |
1060 | } |
1061 | |
1062 | static void fanotify_freeing_mark(struct fsnotify_mark *mark, |
1063 | struct fsnotify_group *group) |
1064 | { |
1065 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) |
1066 | dec_ucount(ucounts: group->fanotify_data.ucounts, type: UCOUNT_FANOTIFY_MARKS); |
1067 | } |
1068 | |
1069 | static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) |
1070 | { |
1071 | kmem_cache_free(s: fanotify_mark_cache, objp: fsn_mark); |
1072 | } |
1073 | |
1074 | const struct fsnotify_ops fanotify_fsnotify_ops = { |
1075 | .handle_event = fanotify_handle_event, |
1076 | .free_group_priv = fanotify_free_group_priv, |
1077 | .free_event = fanotify_free_event, |
1078 | .freeing_mark = fanotify_freeing_mark, |
1079 | .free_mark = fanotify_free_mark, |
1080 | }; |
1081 | |