1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/fs/readdir.c |
4 | * |
5 | * Copyright (C) 1995 Linus Torvalds |
6 | */ |
7 | |
8 | #include <linux/stddef.h> |
9 | #include <linux/kernel.h> |
10 | #include <linux/export.h> |
11 | #include <linux/time.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/errno.h> |
14 | #include <linux/stat.h> |
15 | #include <linux/file.h> |
16 | #include <linux/fs.h> |
17 | #include <linux/fsnotify.h> |
18 | #include <linux/dirent.h> |
19 | #include <linux/security.h> |
20 | #include <linux/syscalls.h> |
21 | #include <linux/unistd.h> |
22 | #include <linux/compat.h> |
23 | #include <linux/uaccess.h> |
24 | |
25 | #include <asm/unaligned.h> |
26 | |
27 | /* |
28 | * Some filesystems were never converted to '->iterate_shared()' |
29 | * and their directory iterators want the inode lock held for |
30 | * writing. This wrapper allows for converting from the shared |
31 | * semantics to the exclusive inode use. |
32 | */ |
33 | int wrap_directory_iterator(struct file *file, |
34 | struct dir_context *ctx, |
35 | int (*iter)(struct file *, struct dir_context *)) |
36 | { |
37 | struct inode *inode = file_inode(f: file); |
38 | int ret; |
39 | |
40 | /* |
41 | * We'd love to have an 'inode_upgrade_trylock()' operation, |
42 | * see the comment in mmap_upgrade_trylock() in mm/memory.c. |
43 | * |
44 | * But considering this is for "filesystems that never got |
45 | * converted", it really doesn't matter. |
46 | * |
47 | * Also note that since we have to return with the lock held |
48 | * for reading, we can't use the "killable()" locking here, |
49 | * since we do need to get the lock even if we're dying. |
50 | * |
51 | * We could do the write part killably and then get the read |
52 | * lock unconditionally if it mattered, but see above on why |
53 | * this does the very simplistic conversion. |
54 | */ |
55 | up_read(sem: &inode->i_rwsem); |
56 | down_write(sem: &inode->i_rwsem); |
57 | |
58 | /* |
59 | * Since we dropped the inode lock, we should do the |
60 | * DEADDIR test again. See 'iterate_dir()' below. |
61 | * |
62 | * Note that we don't need to re-do the f_pos games, |
63 | * since the file must be locked wrt f_pos anyway. |
64 | */ |
65 | ret = -ENOENT; |
66 | if (!IS_DEADDIR(inode)) |
67 | ret = iter(file, ctx); |
68 | |
69 | downgrade_write(sem: &inode->i_rwsem); |
70 | return ret; |
71 | } |
72 | EXPORT_SYMBOL(wrap_directory_iterator); |
73 | |
74 | /* |
75 | * Note the "unsafe_put_user() semantics: we goto a |
76 | * label for errors. |
77 | */ |
78 | #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ |
79 | char __user *dst = (_dst); \ |
80 | const char *src = (_src); \ |
81 | size_t len = (_len); \ |
82 | unsafe_put_user(0, dst+len, label); \ |
83 | unsafe_copy_to_user(dst, src, len, label); \ |
84 | } while (0) |
85 | |
86 | |
87 | int iterate_dir(struct file *file, struct dir_context *ctx) |
88 | { |
89 | struct inode *inode = file_inode(f: file); |
90 | int res = -ENOTDIR; |
91 | |
92 | if (!file->f_op->iterate_shared) |
93 | goto out; |
94 | |
95 | res = security_file_permission(file, MAY_READ); |
96 | if (res) |
97 | goto out; |
98 | |
99 | res = fsnotify_file_perm(file, MAY_READ); |
100 | if (res) |
101 | goto out; |
102 | |
103 | res = down_read_killable(sem: &inode->i_rwsem); |
104 | if (res) |
105 | goto out; |
106 | |
107 | res = -ENOENT; |
108 | if (!IS_DEADDIR(inode)) { |
109 | ctx->pos = file->f_pos; |
110 | res = file->f_op->iterate_shared(file, ctx); |
111 | file->f_pos = ctx->pos; |
112 | fsnotify_access(file); |
113 | file_accessed(file); |
114 | } |
115 | inode_unlock_shared(inode); |
116 | out: |
117 | return res; |
118 | } |
119 | EXPORT_SYMBOL(iterate_dir); |
120 | |
121 | /* |
122 | * POSIX says that a dirent name cannot contain NULL or a '/'. |
123 | * |
124 | * It's not 100% clear what we should really do in this case. |
125 | * The filesystem is clearly corrupted, but returning a hard |
126 | * error means that you now don't see any of the other names |
127 | * either, so that isn't a perfect alternative. |
128 | * |
129 | * And if you return an error, what error do you use? Several |
130 | * filesystems seem to have decided on EUCLEAN being the error |
131 | * code for EFSCORRUPTED, and that may be the error to use. Or |
132 | * just EIO, which is perhaps more obvious to users. |
133 | * |
134 | * In order to see the other file names in the directory, the |
135 | * caller might want to make this a "soft" error: skip the |
136 | * entry, and return the error at the end instead. |
137 | * |
138 | * Note that this should likely do a "memchr(name, 0, len)" |
139 | * check too, since that would be filesystem corruption as |
140 | * well. However, that case can't actually confuse user space, |
141 | * which has to do a strlen() on the name anyway to find the |
142 | * filename length, and the above "soft error" worry means |
143 | * that it's probably better left alone until we have that |
144 | * issue clarified. |
145 | * |
146 | * Note the PATH_MAX check - it's arbitrary but the real |
147 | * kernel limit on a possible path component, not NAME_MAX, |
148 | * which is the technical standard limit. |
149 | */ |
150 | static int verify_dirent_name(const char *name, int len) |
151 | { |
152 | if (len <= 0 || len >= PATH_MAX) |
153 | return -EIO; |
154 | if (memchr(p: name, c: '/', size: len)) |
155 | return -EIO; |
156 | return 0; |
157 | } |
158 | |
159 | /* |
160 | * Traditional linux readdir() handling.. |
161 | * |
162 | * "count=1" is a special case, meaning that the buffer is one |
163 | * dirent-structure in size and that the code can't handle more |
164 | * anyway. Thus the special "fillonedir()" function for that |
165 | * case (the low-level handlers don't need to care about this). |
166 | */ |
167 | |
168 | #ifdef __ARCH_WANT_OLD_READDIR |
169 | |
170 | struct old_linux_dirent { |
171 | unsigned long d_ino; |
172 | unsigned long d_offset; |
173 | unsigned short d_namlen; |
174 | char d_name[]; |
175 | }; |
176 | |
177 | struct readdir_callback { |
178 | struct dir_context ctx; |
179 | struct old_linux_dirent __user * dirent; |
180 | int result; |
181 | }; |
182 | |
183 | static bool fillonedir(struct dir_context *ctx, const char *name, int namlen, |
184 | loff_t offset, u64 ino, unsigned int d_type) |
185 | { |
186 | struct readdir_callback *buf = |
187 | container_of(ctx, struct readdir_callback, ctx); |
188 | struct old_linux_dirent __user * dirent; |
189 | unsigned long d_ino; |
190 | |
191 | if (buf->result) |
192 | return false; |
193 | buf->result = verify_dirent_name(name, len: namlen); |
194 | if (buf->result) |
195 | return false; |
196 | d_ino = ino; |
197 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
198 | buf->result = -EOVERFLOW; |
199 | return false; |
200 | } |
201 | buf->result++; |
202 | dirent = buf->dirent; |
203 | if (!user_write_access_begin(dirent, |
204 | (unsigned long)(dirent->d_name + namlen + 1) - |
205 | (unsigned long)dirent)) |
206 | goto efault; |
207 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
208 | unsafe_put_user(offset, &dirent->d_offset, efault_end); |
209 | unsafe_put_user(namlen, &dirent->d_namlen, efault_end); |
210 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
211 | user_write_access_end(); |
212 | return true; |
213 | efault_end: |
214 | user_write_access_end(); |
215 | efault: |
216 | buf->result = -EFAULT; |
217 | return false; |
218 | } |
219 | |
220 | SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
221 | struct old_linux_dirent __user *, dirent, unsigned int, count) |
222 | { |
223 | int error; |
224 | struct fd f = fdget_pos(fd); |
225 | struct readdir_callback buf = { |
226 | .ctx.actor = fillonedir, |
227 | .dirent = dirent |
228 | }; |
229 | |
230 | if (!f.file) |
231 | return -EBADF; |
232 | |
233 | error = iterate_dir(f.file, &buf.ctx); |
234 | if (buf.result) |
235 | error = buf.result; |
236 | |
237 | fdput_pos(f); |
238 | return error; |
239 | } |
240 | |
241 | #endif /* __ARCH_WANT_OLD_READDIR */ |
242 | |
243 | /* |
244 | * New, all-improved, singing, dancing, iBCS2-compliant getdents() |
245 | * interface. |
246 | */ |
247 | struct linux_dirent { |
248 | unsigned long d_ino; |
249 | unsigned long d_off; |
250 | unsigned short d_reclen; |
251 | char d_name[]; |
252 | }; |
253 | |
254 | struct getdents_callback { |
255 | struct dir_context ctx; |
256 | struct linux_dirent __user * current_dir; |
257 | int prev_reclen; |
258 | int count; |
259 | int error; |
260 | }; |
261 | |
262 | static bool filldir(struct dir_context *ctx, const char *name, int namlen, |
263 | loff_t offset, u64 ino, unsigned int d_type) |
264 | { |
265 | struct linux_dirent __user *dirent, *prev; |
266 | struct getdents_callback *buf = |
267 | container_of(ctx, struct getdents_callback, ctx); |
268 | unsigned long d_ino; |
269 | int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, |
270 | sizeof(long)); |
271 | int prev_reclen; |
272 | |
273 | buf->error = verify_dirent_name(name, len: namlen); |
274 | if (unlikely(buf->error)) |
275 | return false; |
276 | buf->error = -EINVAL; /* only used if we fail.. */ |
277 | if (reclen > buf->count) |
278 | return false; |
279 | d_ino = ino; |
280 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
281 | buf->error = -EOVERFLOW; |
282 | return false; |
283 | } |
284 | prev_reclen = buf->prev_reclen; |
285 | if (prev_reclen && signal_pending(current)) |
286 | return false; |
287 | dirent = buf->current_dir; |
288 | prev = (void __user *) dirent - prev_reclen; |
289 | if (!user_write_access_begin(prev, reclen + prev_reclen)) |
290 | goto efault; |
291 | |
292 | /* This might be 'dirent->d_off', but if so it will get overwritten */ |
293 | unsafe_put_user(offset, &prev->d_off, efault_end); |
294 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
295 | unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
296 | unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); |
297 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
298 | user_write_access_end(); |
299 | |
300 | buf->current_dir = (void __user *)dirent + reclen; |
301 | buf->prev_reclen = reclen; |
302 | buf->count -= reclen; |
303 | return true; |
304 | efault_end: |
305 | user_write_access_end(); |
306 | efault: |
307 | buf->error = -EFAULT; |
308 | return false; |
309 | } |
310 | |
311 | SYSCALL_DEFINE3(getdents, unsigned int, fd, |
312 | struct linux_dirent __user *, dirent, unsigned int, count) |
313 | { |
314 | struct fd f; |
315 | struct getdents_callback buf = { |
316 | .ctx.actor = filldir, |
317 | .count = count, |
318 | .current_dir = dirent |
319 | }; |
320 | int error; |
321 | |
322 | f = fdget_pos(fd); |
323 | if (!f.file) |
324 | return -EBADF; |
325 | |
326 | error = iterate_dir(f.file, &buf.ctx); |
327 | if (error >= 0) |
328 | error = buf.error; |
329 | if (buf.prev_reclen) { |
330 | struct linux_dirent __user * lastdirent; |
331 | lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; |
332 | |
333 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
334 | error = -EFAULT; |
335 | else |
336 | error = count - buf.count; |
337 | } |
338 | fdput_pos(f); |
339 | return error; |
340 | } |
341 | |
342 | struct getdents_callback64 { |
343 | struct dir_context ctx; |
344 | struct linux_dirent64 __user * current_dir; |
345 | int prev_reclen; |
346 | int count; |
347 | int error; |
348 | }; |
349 | |
350 | static bool filldir64(struct dir_context *ctx, const char *name, int namlen, |
351 | loff_t offset, u64 ino, unsigned int d_type) |
352 | { |
353 | struct linux_dirent64 __user *dirent, *prev; |
354 | struct getdents_callback64 *buf = |
355 | container_of(ctx, struct getdents_callback64, ctx); |
356 | int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, |
357 | sizeof(u64)); |
358 | int prev_reclen; |
359 | |
360 | buf->error = verify_dirent_name(name, len: namlen); |
361 | if (unlikely(buf->error)) |
362 | return false; |
363 | buf->error = -EINVAL; /* only used if we fail.. */ |
364 | if (reclen > buf->count) |
365 | return false; |
366 | prev_reclen = buf->prev_reclen; |
367 | if (prev_reclen && signal_pending(current)) |
368 | return false; |
369 | dirent = buf->current_dir; |
370 | prev = (void __user *)dirent - prev_reclen; |
371 | if (!user_write_access_begin(prev, reclen + prev_reclen)) |
372 | goto efault; |
373 | |
374 | /* This might be 'dirent->d_off', but if so it will get overwritten */ |
375 | unsafe_put_user(offset, &prev->d_off, efault_end); |
376 | unsafe_put_user(ino, &dirent->d_ino, efault_end); |
377 | unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
378 | unsafe_put_user(d_type, &dirent->d_type, efault_end); |
379 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
380 | user_write_access_end(); |
381 | |
382 | buf->prev_reclen = reclen; |
383 | buf->current_dir = (void __user *)dirent + reclen; |
384 | buf->count -= reclen; |
385 | return true; |
386 | |
387 | efault_end: |
388 | user_write_access_end(); |
389 | efault: |
390 | buf->error = -EFAULT; |
391 | return false; |
392 | } |
393 | |
394 | SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
395 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
396 | { |
397 | struct fd f; |
398 | struct getdents_callback64 buf = { |
399 | .ctx.actor = filldir64, |
400 | .count = count, |
401 | .current_dir = dirent |
402 | }; |
403 | int error; |
404 | |
405 | f = fdget_pos(fd); |
406 | if (!f.file) |
407 | return -EBADF; |
408 | |
409 | error = iterate_dir(f.file, &buf.ctx); |
410 | if (error >= 0) |
411 | error = buf.error; |
412 | if (buf.prev_reclen) { |
413 | struct linux_dirent64 __user * lastdirent; |
414 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
415 | |
416 | lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; |
417 | if (put_user(d_off, &lastdirent->d_off)) |
418 | error = -EFAULT; |
419 | else |
420 | error = count - buf.count; |
421 | } |
422 | fdput_pos(f); |
423 | return error; |
424 | } |
425 | |
426 | #ifdef CONFIG_COMPAT |
427 | struct compat_old_linux_dirent { |
428 | compat_ulong_t d_ino; |
429 | compat_ulong_t d_offset; |
430 | unsigned short d_namlen; |
431 | char d_name[]; |
432 | }; |
433 | |
434 | struct compat_readdir_callback { |
435 | struct dir_context ctx; |
436 | struct compat_old_linux_dirent __user *dirent; |
437 | int result; |
438 | }; |
439 | |
440 | static bool compat_fillonedir(struct dir_context *ctx, const char *name, |
441 | int namlen, loff_t offset, u64 ino, |
442 | unsigned int d_type) |
443 | { |
444 | struct compat_readdir_callback *buf = |
445 | container_of(ctx, struct compat_readdir_callback, ctx); |
446 | struct compat_old_linux_dirent __user *dirent; |
447 | compat_ulong_t d_ino; |
448 | |
449 | if (buf->result) |
450 | return false; |
451 | buf->result = verify_dirent_name(name, len: namlen); |
452 | if (buf->result) |
453 | return false; |
454 | d_ino = ino; |
455 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
456 | buf->result = -EOVERFLOW; |
457 | return false; |
458 | } |
459 | buf->result++; |
460 | dirent = buf->dirent; |
461 | if (!user_write_access_begin(dirent, |
462 | (unsigned long)(dirent->d_name + namlen + 1) - |
463 | (unsigned long)dirent)) |
464 | goto efault; |
465 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
466 | unsafe_put_user(offset, &dirent->d_offset, efault_end); |
467 | unsafe_put_user(namlen, &dirent->d_namlen, efault_end); |
468 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
469 | user_write_access_end(); |
470 | return true; |
471 | efault_end: |
472 | user_write_access_end(); |
473 | efault: |
474 | buf->result = -EFAULT; |
475 | return false; |
476 | } |
477 | |
478 | COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
479 | struct compat_old_linux_dirent __user *, dirent, unsigned int, count) |
480 | { |
481 | int error; |
482 | struct fd f = fdget_pos(fd); |
483 | struct compat_readdir_callback buf = { |
484 | .ctx.actor = compat_fillonedir, |
485 | .dirent = dirent |
486 | }; |
487 | |
488 | if (!f.file) |
489 | return -EBADF; |
490 | |
491 | error = iterate_dir(f.file, &buf.ctx); |
492 | if (buf.result) |
493 | error = buf.result; |
494 | |
495 | fdput_pos(f); |
496 | return error; |
497 | } |
498 | |
499 | struct compat_linux_dirent { |
500 | compat_ulong_t d_ino; |
501 | compat_ulong_t d_off; |
502 | unsigned short d_reclen; |
503 | char d_name[]; |
504 | }; |
505 | |
506 | struct compat_getdents_callback { |
507 | struct dir_context ctx; |
508 | struct compat_linux_dirent __user *current_dir; |
509 | int prev_reclen; |
510 | int count; |
511 | int error; |
512 | }; |
513 | |
514 | static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen, |
515 | loff_t offset, u64 ino, unsigned int d_type) |
516 | { |
517 | struct compat_linux_dirent __user *dirent, *prev; |
518 | struct compat_getdents_callback *buf = |
519 | container_of(ctx, struct compat_getdents_callback, ctx); |
520 | compat_ulong_t d_ino; |
521 | int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + |
522 | namlen + 2, sizeof(compat_long_t)); |
523 | int prev_reclen; |
524 | |
525 | buf->error = verify_dirent_name(name, len: namlen); |
526 | if (unlikely(buf->error)) |
527 | return false; |
528 | buf->error = -EINVAL; /* only used if we fail.. */ |
529 | if (reclen > buf->count) |
530 | return false; |
531 | d_ino = ino; |
532 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
533 | buf->error = -EOVERFLOW; |
534 | return false; |
535 | } |
536 | prev_reclen = buf->prev_reclen; |
537 | if (prev_reclen && signal_pending(current)) |
538 | return false; |
539 | dirent = buf->current_dir; |
540 | prev = (void __user *) dirent - prev_reclen; |
541 | if (!user_write_access_begin(prev, reclen + prev_reclen)) |
542 | goto efault; |
543 | |
544 | unsafe_put_user(offset, &prev->d_off, efault_end); |
545 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
546 | unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
547 | unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); |
548 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
549 | user_write_access_end(); |
550 | |
551 | buf->prev_reclen = reclen; |
552 | buf->current_dir = (void __user *)dirent + reclen; |
553 | buf->count -= reclen; |
554 | return true; |
555 | efault_end: |
556 | user_write_access_end(); |
557 | efault: |
558 | buf->error = -EFAULT; |
559 | return false; |
560 | } |
561 | |
562 | COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, |
563 | struct compat_linux_dirent __user *, dirent, unsigned int, count) |
564 | { |
565 | struct fd f; |
566 | struct compat_getdents_callback buf = { |
567 | .ctx.actor = compat_filldir, |
568 | .current_dir = dirent, |
569 | .count = count |
570 | }; |
571 | int error; |
572 | |
573 | f = fdget_pos(fd); |
574 | if (!f.file) |
575 | return -EBADF; |
576 | |
577 | error = iterate_dir(f.file, &buf.ctx); |
578 | if (error >= 0) |
579 | error = buf.error; |
580 | if (buf.prev_reclen) { |
581 | struct compat_linux_dirent __user * lastdirent; |
582 | lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; |
583 | |
584 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
585 | error = -EFAULT; |
586 | else |
587 | error = count - buf.count; |
588 | } |
589 | fdput_pos(f); |
590 | return error; |
591 | } |
592 | #endif |
593 | |