1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * devtmpfs - kernel-maintained tmpfs-based /dev |
4 | * |
5 | * Copyright (C) 2009, Kay Sievers <kay.sievers@vrfy.org> |
6 | * |
7 | * During bootup, before any driver core device is registered, |
8 | * devtmpfs, a tmpfs-based filesystem is created. Every driver-core |
9 | * device which requests a device node, will add a node in this |
10 | * filesystem. |
11 | * By default, all devices are named after the name of the device, |
12 | * owned by root and have a default mode of 0600. Subsystems can |
13 | * overwrite the default setting if needed. |
14 | */ |
15 | |
16 | #define pr_fmt(fmt) "devtmpfs: " fmt |
17 | |
18 | #include <linux/kernel.h> |
19 | #include <linux/syscalls.h> |
20 | #include <linux/mount.h> |
21 | #include <linux/device.h> |
22 | #include <linux/blkdev.h> |
23 | #include <linux/namei.h> |
24 | #include <linux/fs.h> |
25 | #include <linux/shmem_fs.h> |
26 | #include <linux/ramfs.h> |
27 | #include <linux/sched.h> |
28 | #include <linux/slab.h> |
29 | #include <linux/kthread.h> |
30 | #include <linux/init_syscalls.h> |
31 | #include <uapi/linux/mount.h> |
32 | #include "base.h" |
33 | |
34 | #ifdef CONFIG_DEVTMPFS_SAFE |
35 | #define DEVTMPFS_MFLAGS (MS_SILENT | MS_NOEXEC | MS_NOSUID) |
36 | #else |
37 | #define DEVTMPFS_MFLAGS (MS_SILENT) |
38 | #endif |
39 | |
40 | static struct task_struct *thread; |
41 | |
42 | static int __initdata mount_dev = IS_ENABLED(CONFIG_DEVTMPFS_MOUNT); |
43 | |
44 | static DEFINE_SPINLOCK(req_lock); |
45 | |
46 | static struct req { |
47 | struct req *next; |
48 | struct completion done; |
49 | int err; |
50 | const char *name; |
51 | umode_t mode; /* 0 => delete */ |
52 | kuid_t uid; |
53 | kgid_t gid; |
54 | struct device *dev; |
55 | } *requests; |
56 | |
57 | static int __init mount_param(char *str) |
58 | { |
59 | mount_dev = simple_strtoul(str, NULL, 0); |
60 | return 1; |
61 | } |
62 | __setup("devtmpfs.mount=" , mount_param); |
63 | |
64 | static struct vfsmount *mnt; |
65 | |
66 | static struct dentry *public_dev_mount(struct file_system_type *fs_type, int flags, |
67 | const char *dev_name, void *data) |
68 | { |
69 | struct super_block *s = mnt->mnt_sb; |
70 | int err; |
71 | |
72 | atomic_inc(v: &s->s_active); |
73 | down_write(sem: &s->s_umount); |
74 | err = reconfigure_single(s, flags, data); |
75 | if (err < 0) { |
76 | deactivate_locked_super(sb: s); |
77 | return ERR_PTR(error: err); |
78 | } |
79 | return dget(dentry: s->s_root); |
80 | } |
81 | |
82 | static struct file_system_type internal_fs_type = { |
83 | .name = "devtmpfs" , |
84 | #ifdef CONFIG_TMPFS |
85 | .init_fs_context = shmem_init_fs_context, |
86 | #else |
87 | .init_fs_context = ramfs_init_fs_context, |
88 | #endif |
89 | .kill_sb = kill_litter_super, |
90 | }; |
91 | |
92 | static struct file_system_type dev_fs_type = { |
93 | .name = "devtmpfs" , |
94 | .mount = public_dev_mount, |
95 | }; |
96 | |
97 | static int devtmpfs_submit_req(struct req *req, const char *tmp) |
98 | { |
99 | init_completion(x: &req->done); |
100 | |
101 | spin_lock(lock: &req_lock); |
102 | req->next = requests; |
103 | requests = req; |
104 | spin_unlock(lock: &req_lock); |
105 | |
106 | wake_up_process(tsk: thread); |
107 | wait_for_completion(&req->done); |
108 | |
109 | kfree(objp: tmp); |
110 | |
111 | return req->err; |
112 | } |
113 | |
114 | int devtmpfs_create_node(struct device *dev) |
115 | { |
116 | const char *tmp = NULL; |
117 | struct req req; |
118 | |
119 | if (!thread) |
120 | return 0; |
121 | |
122 | req.mode = 0; |
123 | req.uid = GLOBAL_ROOT_UID; |
124 | req.gid = GLOBAL_ROOT_GID; |
125 | req.name = device_get_devnode(dev, &req.mode, &req.uid, &req.gid, &tmp); |
126 | if (!req.name) |
127 | return -ENOMEM; |
128 | |
129 | if (req.mode == 0) |
130 | req.mode = 0600; |
131 | if (is_blockdev(dev)) |
132 | req.mode |= S_IFBLK; |
133 | else |
134 | req.mode |= S_IFCHR; |
135 | |
136 | req.dev = dev; |
137 | |
138 | return devtmpfs_submit_req(&req, tmp); |
139 | } |
140 | |
141 | int devtmpfs_delete_node(struct device *dev) |
142 | { |
143 | const char *tmp = NULL; |
144 | struct req req; |
145 | |
146 | if (!thread) |
147 | return 0; |
148 | |
149 | req.name = device_get_devnode(dev, NULL, NULL, NULL, tmp: &tmp); |
150 | if (!req.name) |
151 | return -ENOMEM; |
152 | |
153 | req.mode = 0; |
154 | req.dev = dev; |
155 | |
156 | return devtmpfs_submit_req(req: &req, tmp); |
157 | } |
158 | |
159 | static int dev_mkdir(const char *name, umode_t mode) |
160 | { |
161 | struct dentry *dentry; |
162 | struct path path; |
163 | int err; |
164 | |
165 | dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); |
166 | if (IS_ERR(ptr: dentry)) |
167 | return PTR_ERR(ptr: dentry); |
168 | |
169 | err = vfs_mkdir(&nop_mnt_idmap, d_inode(dentry: path.dentry), dentry, mode); |
170 | if (!err) |
171 | /* mark as kernel-created inode */ |
172 | d_inode(dentry)->i_private = &thread; |
173 | done_path_create(&path, dentry); |
174 | return err; |
175 | } |
176 | |
177 | static int create_path(const char *nodepath) |
178 | { |
179 | char *path; |
180 | char *s; |
181 | int err = 0; |
182 | |
183 | /* parent directories do not exist, create them */ |
184 | path = kstrdup(s: nodepath, GFP_KERNEL); |
185 | if (!path) |
186 | return -ENOMEM; |
187 | |
188 | s = path; |
189 | for (;;) { |
190 | s = strchr(s, '/'); |
191 | if (!s) |
192 | break; |
193 | s[0] = '\0'; |
194 | err = dev_mkdir(name: path, mode: 0755); |
195 | if (err && err != -EEXIST) |
196 | break; |
197 | s[0] = '/'; |
198 | s++; |
199 | } |
200 | kfree(objp: path); |
201 | return err; |
202 | } |
203 | |
204 | static int handle_create(const char *nodename, umode_t mode, kuid_t uid, |
205 | kgid_t gid, struct device *dev) |
206 | { |
207 | struct dentry *dentry; |
208 | struct path path; |
209 | int err; |
210 | |
211 | dentry = kern_path_create(AT_FDCWD, nodename, &path, 0); |
212 | if (dentry == ERR_PTR(error: -ENOENT)) { |
213 | create_path(nodepath: nodename); |
214 | dentry = kern_path_create(AT_FDCWD, nodename, &path, 0); |
215 | } |
216 | if (IS_ERR(ptr: dentry)) |
217 | return PTR_ERR(ptr: dentry); |
218 | |
219 | err = vfs_mknod(&nop_mnt_idmap, d_inode(dentry: path.dentry), dentry, mode, |
220 | dev->devt); |
221 | if (!err) { |
222 | struct iattr newattrs; |
223 | |
224 | newattrs.ia_mode = mode; |
225 | newattrs.ia_uid = uid; |
226 | newattrs.ia_gid = gid; |
227 | newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID; |
228 | inode_lock(inode: d_inode(dentry)); |
229 | notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); |
230 | inode_unlock(inode: d_inode(dentry)); |
231 | |
232 | /* mark as kernel-created inode */ |
233 | d_inode(dentry)->i_private = &thread; |
234 | } |
235 | done_path_create(&path, dentry); |
236 | return err; |
237 | } |
238 | |
239 | static int dev_rmdir(const char *name) |
240 | { |
241 | struct path parent; |
242 | struct dentry *dentry; |
243 | int err; |
244 | |
245 | dentry = kern_path_locked(name, &parent); |
246 | if (IS_ERR(ptr: dentry)) |
247 | return PTR_ERR(ptr: dentry); |
248 | if (d_really_is_positive(dentry)) { |
249 | if (d_inode(dentry)->i_private == &thread) |
250 | err = vfs_rmdir(&nop_mnt_idmap, d_inode(dentry: parent.dentry), |
251 | dentry); |
252 | else |
253 | err = -EPERM; |
254 | } else { |
255 | err = -ENOENT; |
256 | } |
257 | dput(dentry); |
258 | inode_unlock(inode: d_inode(dentry: parent.dentry)); |
259 | path_put(&parent); |
260 | return err; |
261 | } |
262 | |
263 | static int delete_path(const char *nodepath) |
264 | { |
265 | char *path; |
266 | int err = 0; |
267 | |
268 | path = kstrdup(s: nodepath, GFP_KERNEL); |
269 | if (!path) |
270 | return -ENOMEM; |
271 | |
272 | for (;;) { |
273 | char *base; |
274 | |
275 | base = strrchr(path, '/'); |
276 | if (!base) |
277 | break; |
278 | base[0] = '\0'; |
279 | err = dev_rmdir(name: path); |
280 | if (err) |
281 | break; |
282 | } |
283 | |
284 | kfree(objp: path); |
285 | return err; |
286 | } |
287 | |
288 | static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) |
289 | { |
290 | /* did we create it */ |
291 | if (inode->i_private != &thread) |
292 | return 0; |
293 | |
294 | /* does the dev_t match */ |
295 | if (is_blockdev(dev)) { |
296 | if (!S_ISBLK(stat->mode)) |
297 | return 0; |
298 | } else { |
299 | if (!S_ISCHR(stat->mode)) |
300 | return 0; |
301 | } |
302 | if (stat->rdev != dev->devt) |
303 | return 0; |
304 | |
305 | /* ours */ |
306 | return 1; |
307 | } |
308 | |
309 | static int handle_remove(const char *nodename, struct device *dev) |
310 | { |
311 | struct path parent; |
312 | struct dentry *dentry; |
313 | int deleted = 0; |
314 | int err; |
315 | |
316 | dentry = kern_path_locked(nodename, &parent); |
317 | if (IS_ERR(ptr: dentry)) |
318 | return PTR_ERR(ptr: dentry); |
319 | |
320 | if (d_really_is_positive(dentry)) { |
321 | struct kstat stat; |
322 | struct path p = {.mnt = parent.mnt, .dentry = dentry}; |
323 | err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, |
324 | AT_STATX_SYNC_AS_STAT); |
325 | if (!err && dev_mynode(dev, inode: d_inode(dentry), stat: &stat)) { |
326 | struct iattr newattrs; |
327 | /* |
328 | * before unlinking this node, reset permissions |
329 | * of possible references like hardlinks |
330 | */ |
331 | newattrs.ia_uid = GLOBAL_ROOT_UID; |
332 | newattrs.ia_gid = GLOBAL_ROOT_GID; |
333 | newattrs.ia_mode = stat.mode & ~0777; |
334 | newattrs.ia_valid = |
335 | ATTR_UID|ATTR_GID|ATTR_MODE; |
336 | inode_lock(d_inode(dentry)); |
337 | notify_change(&nop_mnt_idmap, dentry, &newattrs, NULL); |
338 | inode_unlock(d_inode(dentry)); |
339 | err = vfs_unlink(&nop_mnt_idmap, d_inode(parent.dentry), |
340 | dentry, NULL); |
341 | if (!err || err == -ENOENT) |
342 | deleted = 1; |
343 | } |
344 | } else { |
345 | err = -ENOENT; |
346 | } |
347 | dput(dentry); |
348 | inode_unlock(d_inode(parent.dentry)); |
349 | |
350 | path_put(&parent); |
351 | if (deleted && strchr(nodename, '/')) |
352 | delete_path(nodename); |
353 | return err; |
354 | } |
355 | |
356 | /* |
357 | * If configured, or requested by the commandline, devtmpfs will be |
358 | * auto-mounted after the kernel mounted the root filesystem. |
359 | */ |
360 | int __init devtmpfs_mount(void) |
361 | { |
362 | int err; |
363 | |
364 | if (!mount_dev) |
365 | return 0; |
366 | |
367 | if (!thread) |
368 | return 0; |
369 | |
370 | err = init_mount(dev_name: "devtmpfs" , dir_name: "dev" , type_page: "devtmpfs" , DEVTMPFS_MFLAGS, NULL); |
371 | if (err) |
372 | pr_info("error mounting %d\n" , err); |
373 | else |
374 | pr_info("mounted\n" ); |
375 | return err; |
376 | } |
377 | |
378 | static __initdata DECLARE_COMPLETION(setup_done); |
379 | |
380 | static int handle(const char *name, umode_t mode, kuid_t uid, kgid_t gid, |
381 | struct device *dev) |
382 | { |
383 | if (mode) |
384 | return handle_create(nodename: name, mode, uid, gid, dev); |
385 | else |
386 | return handle_remove(nodename: name, dev); |
387 | } |
388 | |
389 | static void __noreturn devtmpfs_work_loop(void) |
390 | { |
391 | while (1) { |
392 | spin_lock(lock: &req_lock); |
393 | while (requests) { |
394 | struct req *req = requests; |
395 | requests = NULL; |
396 | spin_unlock(lock: &req_lock); |
397 | while (req) { |
398 | struct req *next = req->next; |
399 | req->err = handle(name: req->name, mode: req->mode, |
400 | uid: req->uid, gid: req->gid, dev: req->dev); |
401 | complete(&req->done); |
402 | req = next; |
403 | } |
404 | spin_lock(lock: &req_lock); |
405 | } |
406 | __set_current_state(TASK_INTERRUPTIBLE); |
407 | spin_unlock(lock: &req_lock); |
408 | schedule(); |
409 | } |
410 | } |
411 | |
412 | static noinline int __init devtmpfs_setup(void *p) |
413 | { |
414 | int err; |
415 | |
416 | err = ksys_unshare(CLONE_NEWNS); |
417 | if (err) |
418 | goto out; |
419 | err = init_mount(dev_name: "devtmpfs" , dir_name: "/" , type_page: "devtmpfs" , DEVTMPFS_MFLAGS, NULL); |
420 | if (err) |
421 | goto out; |
422 | init_chdir(filename: "/.." ); /* will traverse into overmounted root */ |
423 | init_chroot(filename: "." ); |
424 | out: |
425 | *(int *)p = err; |
426 | return err; |
427 | } |
428 | |
429 | /* |
430 | * The __ref is because devtmpfs_setup needs to be __init for the routines it |
431 | * calls. That call is done while devtmpfs_init, which is marked __init, |
432 | * synchronously waits for it to complete. |
433 | */ |
434 | static int __ref devtmpfsd(void *p) |
435 | { |
436 | int err = devtmpfs_setup(p); |
437 | |
438 | complete(&setup_done); |
439 | if (err) |
440 | return err; |
441 | devtmpfs_work_loop(); |
442 | return 0; |
443 | } |
444 | |
445 | /* |
446 | * Create devtmpfs instance, driver-core devices will add their device |
447 | * nodes here. |
448 | */ |
449 | int __init devtmpfs_init(void) |
450 | { |
451 | char opts[] = "mode=0755" ; |
452 | int err; |
453 | |
454 | mnt = vfs_kern_mount(type: &internal_fs_type, flags: 0, name: "devtmpfs" , data: opts); |
455 | if (IS_ERR(ptr: mnt)) { |
456 | pr_err("unable to create devtmpfs %ld\n" , PTR_ERR(mnt)); |
457 | return PTR_ERR(ptr: mnt); |
458 | } |
459 | err = register_filesystem(&dev_fs_type); |
460 | if (err) { |
461 | pr_err("unable to register devtmpfs type %d\n" , err); |
462 | return err; |
463 | } |
464 | |
465 | thread = kthread_run(devtmpfsd, &err, "kdevtmpfs" ); |
466 | if (!IS_ERR(ptr: thread)) { |
467 | wait_for_completion(&setup_done); |
468 | } else { |
469 | err = PTR_ERR(ptr: thread); |
470 | thread = NULL; |
471 | } |
472 | |
473 | if (err) { |
474 | pr_err("unable to create devtmpfs %d\n" , err); |
475 | unregister_filesystem(&dev_fs_type); |
476 | thread = NULL; |
477 | return err; |
478 | } |
479 | |
480 | pr_info("initialized\n" ); |
481 | return 0; |
482 | } |
483 | |