1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * binfmt_misc.c |
4 | * |
5 | * Copyright (C) 1997 Richard Günther |
6 | * |
7 | * binfmt_misc detects binaries via a magic or filename extension and invokes |
8 | * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details. |
9 | */ |
10 | |
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
12 | |
13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> |
15 | #include <linux/init.h> |
16 | #include <linux/sched/mm.h> |
17 | #include <linux/magic.h> |
18 | #include <linux/binfmts.h> |
19 | #include <linux/slab.h> |
20 | #include <linux/ctype.h> |
21 | #include <linux/string_helpers.h> |
22 | #include <linux/file.h> |
23 | #include <linux/pagemap.h> |
24 | #include <linux/namei.h> |
25 | #include <linux/mount.h> |
26 | #include <linux/fs_context.h> |
27 | #include <linux/syscalls.h> |
28 | #include <linux/fs.h> |
29 | #include <linux/uaccess.h> |
30 | |
31 | #include "internal.h" |
32 | |
33 | #ifdef DEBUG |
34 | # define USE_DEBUG 1 |
35 | #else |
36 | # define USE_DEBUG 0 |
37 | #endif |
38 | |
39 | enum { |
40 | VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */ |
41 | }; |
42 | |
43 | enum {Enabled, Magic}; |
44 | #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) |
45 | #define MISC_FMT_OPEN_BINARY (1UL << 30) |
46 | #define MISC_FMT_CREDENTIALS (1UL << 29) |
47 | #define MISC_FMT_OPEN_FILE (1UL << 28) |
48 | |
49 | typedef struct { |
50 | struct list_head list; |
51 | unsigned long flags; /* type, status, etc. */ |
52 | int offset; /* offset of magic */ |
53 | int size; /* size of magic/mask */ |
54 | char *magic; /* magic or filename extension */ |
55 | char *mask; /* mask, NULL for exact match */ |
56 | const char *interpreter; /* filename of interpreter */ |
57 | char *name; |
58 | struct dentry *dentry; |
59 | struct file *interp_file; |
60 | refcount_t users; /* sync removal with load_misc_binary() */ |
61 | } Node; |
62 | |
63 | static struct file_system_type bm_fs_type; |
64 | |
65 | /* |
66 | * Max length of the register string. Determined by: |
67 | * - 7 delimiters |
68 | * - name: ~50 bytes |
69 | * - type: 1 byte |
70 | * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE) |
71 | * - magic: 128 bytes (512 in escaped form) |
72 | * - mask: 128 bytes (512 in escaped form) |
73 | * - interp: ~50 bytes |
74 | * - flags: 5 bytes |
75 | * Round that up a bit, and then back off to hold the internal data |
76 | * (like struct Node). |
77 | */ |
78 | #define MAX_REGISTER_LENGTH 1920 |
79 | |
80 | /** |
81 | * search_binfmt_handler - search for a binary handler for @bprm |
82 | * @misc: handle to binfmt_misc instance |
83 | * @bprm: binary for which we are looking for a handler |
84 | * |
85 | * Search for a binary type handler for @bprm in the list of registered binary |
86 | * type handlers. |
87 | * |
88 | * Return: binary type list entry on success, NULL on failure |
89 | */ |
90 | static Node *search_binfmt_handler(struct binfmt_misc *misc, |
91 | struct linux_binprm *bprm) |
92 | { |
93 | char *p = strrchr(bprm->interp, '.'); |
94 | Node *e; |
95 | |
96 | /* Walk all the registered handlers. */ |
97 | list_for_each_entry(e, &misc->entries, list) { |
98 | char *s; |
99 | int j; |
100 | |
101 | /* Make sure this one is currently enabled. */ |
102 | if (!test_bit(Enabled, &e->flags)) |
103 | continue; |
104 | |
105 | /* Do matching based on extension if applicable. */ |
106 | if (!test_bit(Magic, &e->flags)) { |
107 | if (p && !strcmp(e->magic, p + 1)) |
108 | return e; |
109 | continue; |
110 | } |
111 | |
112 | /* Do matching based on magic & mask. */ |
113 | s = bprm->buf + e->offset; |
114 | if (e->mask) { |
115 | for (j = 0; j < e->size; j++) |
116 | if ((*s++ ^ e->magic[j]) & e->mask[j]) |
117 | break; |
118 | } else { |
119 | for (j = 0; j < e->size; j++) |
120 | if ((*s++ ^ e->magic[j])) |
121 | break; |
122 | } |
123 | if (j == e->size) |
124 | return e; |
125 | } |
126 | |
127 | return NULL; |
128 | } |
129 | |
130 | /** |
131 | * get_binfmt_handler - try to find a binary type handler |
132 | * @misc: handle to binfmt_misc instance |
133 | * @bprm: binary for which we are looking for a handler |
134 | * |
135 | * Try to find a binfmt handler for the binary type. If one is found take a |
136 | * reference to protect against removal via bm_{entry,status}_write(). |
137 | * |
138 | * Return: binary type list entry on success, NULL on failure |
139 | */ |
140 | static Node *get_binfmt_handler(struct binfmt_misc *misc, |
141 | struct linux_binprm *bprm) |
142 | { |
143 | Node *e; |
144 | |
145 | read_lock(&misc->entries_lock); |
146 | e = search_binfmt_handler(misc, bprm); |
147 | if (e) |
148 | refcount_inc(r: &e->users); |
149 | read_unlock(&misc->entries_lock); |
150 | return e; |
151 | } |
152 | |
153 | /** |
154 | * put_binfmt_handler - put binary handler node |
155 | * @e: node to put |
156 | * |
157 | * Free node syncing with load_misc_binary() and defer final free to |
158 | * load_misc_binary() in case it is using the binary type handler we were |
159 | * requested to remove. |
160 | */ |
161 | static void put_binfmt_handler(Node *e) |
162 | { |
163 | if (refcount_dec_and_test(r: &e->users)) { |
164 | if (e->flags & MISC_FMT_OPEN_FILE) |
165 | filp_close(e->interp_file, NULL); |
166 | kfree(objp: e); |
167 | } |
168 | } |
169 | |
170 | /** |
171 | * load_binfmt_misc - load the binfmt_misc of the caller's user namespace |
172 | * |
173 | * To be called in load_misc_binary() to load the relevant struct binfmt_misc. |
174 | * If a user namespace doesn't have its own binfmt_misc mount it can make use |
175 | * of its ancestor's binfmt_misc handlers. This mimicks the behavior of |
176 | * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where |
177 | * available to all user and user namespaces on the system. |
178 | * |
179 | * Return: the binfmt_misc instance of the caller's user namespace |
180 | */ |
181 | static struct binfmt_misc *load_binfmt_misc(void) |
182 | { |
183 | const struct user_namespace *user_ns; |
184 | struct binfmt_misc *misc; |
185 | |
186 | user_ns = current_user_ns(); |
187 | while (user_ns) { |
188 | /* Pairs with smp_store_release() in bm_fill_super(). */ |
189 | misc = smp_load_acquire(&user_ns->binfmt_misc); |
190 | if (misc) |
191 | return misc; |
192 | |
193 | user_ns = user_ns->parent; |
194 | } |
195 | |
196 | return &init_binfmt_misc; |
197 | } |
198 | |
199 | /* |
200 | * the loader itself |
201 | */ |
202 | static int load_misc_binary(struct linux_binprm *bprm) |
203 | { |
204 | Node *fmt; |
205 | struct file *interp_file = NULL; |
206 | int retval = -ENOEXEC; |
207 | struct binfmt_misc *misc; |
208 | |
209 | misc = load_binfmt_misc(); |
210 | if (!misc->enabled) |
211 | return retval; |
212 | |
213 | fmt = get_binfmt_handler(misc, bprm); |
214 | if (!fmt) |
215 | return retval; |
216 | |
217 | /* Need to be able to load the file after exec */ |
218 | retval = -ENOENT; |
219 | if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) |
220 | goto ret; |
221 | |
222 | if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) { |
223 | bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0; |
224 | } else { |
225 | retval = remove_arg_zero(bprm); |
226 | if (retval) |
227 | goto ret; |
228 | } |
229 | |
230 | if (fmt->flags & MISC_FMT_OPEN_BINARY) |
231 | bprm->have_execfd = 1; |
232 | |
233 | /* make argv[1] be the path to the binary */ |
234 | retval = copy_string_kernel(arg: bprm->interp, bprm); |
235 | if (retval < 0) |
236 | goto ret; |
237 | bprm->argc++; |
238 | |
239 | /* add the interp as argv[0] */ |
240 | retval = copy_string_kernel(arg: fmt->interpreter, bprm); |
241 | if (retval < 0) |
242 | goto ret; |
243 | bprm->argc++; |
244 | |
245 | /* Update interp in case binfmt_script needs it. */ |
246 | retval = bprm_change_interp(interp: fmt->interpreter, bprm); |
247 | if (retval < 0) |
248 | goto ret; |
249 | |
250 | if (fmt->flags & MISC_FMT_OPEN_FILE) { |
251 | interp_file = file_clone_open(file: fmt->interp_file); |
252 | if (!IS_ERR(ptr: interp_file)) |
253 | deny_write_access(file: interp_file); |
254 | } else { |
255 | interp_file = open_exec(fmt->interpreter); |
256 | } |
257 | retval = PTR_ERR(ptr: interp_file); |
258 | if (IS_ERR(ptr: interp_file)) |
259 | goto ret; |
260 | |
261 | bprm->interpreter = interp_file; |
262 | if (fmt->flags & MISC_FMT_CREDENTIALS) |
263 | bprm->execfd_creds = 1; |
264 | |
265 | retval = 0; |
266 | ret: |
267 | |
268 | /* |
269 | * If we actually put the node here all concurrent calls to |
270 | * load_misc_binary() will have finished. We also know |
271 | * that for the refcount to be zero someone must have concurently |
272 | * removed the binary type handler from the list and it's our job to |
273 | * free it. |
274 | */ |
275 | put_binfmt_handler(e: fmt); |
276 | |
277 | return retval; |
278 | } |
279 | |
280 | /* Command parsers */ |
281 | |
282 | /* |
283 | * parses and copies one argument enclosed in del from *sp to *dp, |
284 | * recognising the \x special. |
285 | * returns pointer to the copied argument or NULL in case of an |
286 | * error (and sets err) or null argument length. |
287 | */ |
288 | static char *scanarg(char *s, char del) |
289 | { |
290 | char c; |
291 | |
292 | while ((c = *s++) != del) { |
293 | if (c == '\\' && *s == 'x') { |
294 | s++; |
295 | if (!isxdigit(*s++)) |
296 | return NULL; |
297 | if (!isxdigit(*s++)) |
298 | return NULL; |
299 | } |
300 | } |
301 | s[-1] ='\0'; |
302 | return s; |
303 | } |
304 | |
305 | static char *check_special_flags(char *sfs, Node *e) |
306 | { |
307 | char *p = sfs; |
308 | int cont = 1; |
309 | |
310 | /* special flags */ |
311 | while (cont) { |
312 | switch (*p) { |
313 | case 'P': |
314 | pr_debug("register: flag: P (preserve argv0)\n" ); |
315 | p++; |
316 | e->flags |= MISC_FMT_PRESERVE_ARGV0; |
317 | break; |
318 | case 'O': |
319 | pr_debug("register: flag: O (open binary)\n" ); |
320 | p++; |
321 | e->flags |= MISC_FMT_OPEN_BINARY; |
322 | break; |
323 | case 'C': |
324 | pr_debug("register: flag: C (preserve creds)\n" ); |
325 | p++; |
326 | /* this flags also implies the |
327 | open-binary flag */ |
328 | e->flags |= (MISC_FMT_CREDENTIALS | |
329 | MISC_FMT_OPEN_BINARY); |
330 | break; |
331 | case 'F': |
332 | pr_debug("register: flag: F: open interpreter file now\n" ); |
333 | p++; |
334 | e->flags |= MISC_FMT_OPEN_FILE; |
335 | break; |
336 | default: |
337 | cont = 0; |
338 | } |
339 | } |
340 | |
341 | return p; |
342 | } |
343 | |
344 | /* |
345 | * This registers a new binary format, it recognises the syntax |
346 | * ':name:type:offset:magic:mask:interpreter:flags' |
347 | * where the ':' is the IFS, that can be chosen with the first char |
348 | */ |
349 | static Node *create_entry(const char __user *buffer, size_t count) |
350 | { |
351 | Node *e; |
352 | int memsize, err; |
353 | char *buf, *p; |
354 | char del; |
355 | |
356 | pr_debug("register: received %zu bytes\n" , count); |
357 | |
358 | /* some sanity checks */ |
359 | err = -EINVAL; |
360 | if ((count < 11) || (count > MAX_REGISTER_LENGTH)) |
361 | goto out; |
362 | |
363 | err = -ENOMEM; |
364 | memsize = sizeof(Node) + count + 8; |
365 | e = kmalloc(size: memsize, GFP_KERNEL_ACCOUNT); |
366 | if (!e) |
367 | goto out; |
368 | |
369 | p = buf = (char *)e + sizeof(Node); |
370 | |
371 | memset(e, 0, sizeof(Node)); |
372 | if (copy_from_user(to: buf, from: buffer, n: count)) |
373 | goto efault; |
374 | |
375 | del = *p++; /* delimeter */ |
376 | |
377 | pr_debug("register: delim: %#x {%c}\n" , del, del); |
378 | |
379 | /* Pad the buffer with the delim to simplify parsing below. */ |
380 | memset(buf + count, del, 8); |
381 | |
382 | /* Parse the 'name' field. */ |
383 | e->name = p; |
384 | p = strchr(p, del); |
385 | if (!p) |
386 | goto einval; |
387 | *p++ = '\0'; |
388 | if (!e->name[0] || |
389 | !strcmp(e->name, "." ) || |
390 | !strcmp(e->name, ".." ) || |
391 | strchr(e->name, '/')) |
392 | goto einval; |
393 | |
394 | pr_debug("register: name: {%s}\n" , e->name); |
395 | |
396 | /* Parse the 'type' field. */ |
397 | switch (*p++) { |
398 | case 'E': |
399 | pr_debug("register: type: E (extension)\n" ); |
400 | e->flags = 1 << Enabled; |
401 | break; |
402 | case 'M': |
403 | pr_debug("register: type: M (magic)\n" ); |
404 | e->flags = (1 << Enabled) | (1 << Magic); |
405 | break; |
406 | default: |
407 | goto einval; |
408 | } |
409 | if (*p++ != del) |
410 | goto einval; |
411 | |
412 | if (test_bit(Magic, &e->flags)) { |
413 | /* Handle the 'M' (magic) format. */ |
414 | char *s; |
415 | |
416 | /* Parse the 'offset' field. */ |
417 | s = strchr(p, del); |
418 | if (!s) |
419 | goto einval; |
420 | *s = '\0'; |
421 | if (p != s) { |
422 | int r = kstrtoint(s: p, base: 10, res: &e->offset); |
423 | if (r != 0 || e->offset < 0) |
424 | goto einval; |
425 | } |
426 | p = s; |
427 | if (*p++) |
428 | goto einval; |
429 | pr_debug("register: offset: %#x\n" , e->offset); |
430 | |
431 | /* Parse the 'magic' field. */ |
432 | e->magic = p; |
433 | p = scanarg(s: p, del); |
434 | if (!p) |
435 | goto einval; |
436 | if (!e->magic[0]) |
437 | goto einval; |
438 | if (USE_DEBUG) |
439 | print_hex_dump_bytes( |
440 | KBUILD_MODNAME ": register: magic[raw]: " , |
441 | DUMP_PREFIX_NONE, e->magic, p - e->magic); |
442 | |
443 | /* Parse the 'mask' field. */ |
444 | e->mask = p; |
445 | p = scanarg(s: p, del); |
446 | if (!p) |
447 | goto einval; |
448 | if (!e->mask[0]) { |
449 | e->mask = NULL; |
450 | pr_debug("register: mask[raw]: none\n" ); |
451 | } else if (USE_DEBUG) |
452 | print_hex_dump_bytes( |
453 | KBUILD_MODNAME ": register: mask[raw]: " , |
454 | DUMP_PREFIX_NONE, e->mask, p - e->mask); |
455 | |
456 | /* |
457 | * Decode the magic & mask fields. |
458 | * Note: while we might have accepted embedded NUL bytes from |
459 | * above, the unescape helpers here will stop at the first one |
460 | * it encounters. |
461 | */ |
462 | e->size = string_unescape_inplace(buf: e->magic, UNESCAPE_HEX); |
463 | if (e->mask && |
464 | string_unescape_inplace(buf: e->mask, UNESCAPE_HEX) != e->size) |
465 | goto einval; |
466 | if (e->size > BINPRM_BUF_SIZE || |
467 | BINPRM_BUF_SIZE - e->size < e->offset) |
468 | goto einval; |
469 | pr_debug("register: magic/mask length: %i\n" , e->size); |
470 | if (USE_DEBUG) { |
471 | print_hex_dump_bytes( |
472 | KBUILD_MODNAME ": register: magic[decoded]: " , |
473 | DUMP_PREFIX_NONE, e->magic, e->size); |
474 | |
475 | if (e->mask) { |
476 | int i; |
477 | char *masked = kmalloc(size: e->size, GFP_KERNEL_ACCOUNT); |
478 | |
479 | print_hex_dump_bytes( |
480 | KBUILD_MODNAME ": register: mask[decoded]: " , |
481 | DUMP_PREFIX_NONE, e->mask, e->size); |
482 | |
483 | if (masked) { |
484 | for (i = 0; i < e->size; ++i) |
485 | masked[i] = e->magic[i] & e->mask[i]; |
486 | print_hex_dump_bytes( |
487 | KBUILD_MODNAME ": register: magic[masked]: " , |
488 | DUMP_PREFIX_NONE, masked, e->size); |
489 | |
490 | kfree(objp: masked); |
491 | } |
492 | } |
493 | } |
494 | } else { |
495 | /* Handle the 'E' (extension) format. */ |
496 | |
497 | /* Skip the 'offset' field. */ |
498 | p = strchr(p, del); |
499 | if (!p) |
500 | goto einval; |
501 | *p++ = '\0'; |
502 | |
503 | /* Parse the 'magic' field. */ |
504 | e->magic = p; |
505 | p = strchr(p, del); |
506 | if (!p) |
507 | goto einval; |
508 | *p++ = '\0'; |
509 | if (!e->magic[0] || strchr(e->magic, '/')) |
510 | goto einval; |
511 | pr_debug("register: extension: {%s}\n" , e->magic); |
512 | |
513 | /* Skip the 'mask' field. */ |
514 | p = strchr(p, del); |
515 | if (!p) |
516 | goto einval; |
517 | *p++ = '\0'; |
518 | } |
519 | |
520 | /* Parse the 'interpreter' field. */ |
521 | e->interpreter = p; |
522 | p = strchr(p, del); |
523 | if (!p) |
524 | goto einval; |
525 | *p++ = '\0'; |
526 | if (!e->interpreter[0]) |
527 | goto einval; |
528 | pr_debug("register: interpreter: {%s}\n" , e->interpreter); |
529 | |
530 | /* Parse the 'flags' field. */ |
531 | p = check_special_flags(sfs: p, e); |
532 | if (*p == '\n') |
533 | p++; |
534 | if (p != buf + count) |
535 | goto einval; |
536 | |
537 | return e; |
538 | |
539 | out: |
540 | return ERR_PTR(error: err); |
541 | |
542 | efault: |
543 | kfree(objp: e); |
544 | return ERR_PTR(error: -EFAULT); |
545 | einval: |
546 | kfree(objp: e); |
547 | return ERR_PTR(error: -EINVAL); |
548 | } |
549 | |
550 | /* |
551 | * Set status of entry/binfmt_misc: |
552 | * '1' enables, '0' disables and '-1' clears entry/binfmt_misc |
553 | */ |
554 | static int parse_command(const char __user *buffer, size_t count) |
555 | { |
556 | char s[4]; |
557 | |
558 | if (count > 3) |
559 | return -EINVAL; |
560 | if (copy_from_user(to: s, from: buffer, n: count)) |
561 | return -EFAULT; |
562 | if (!count) |
563 | return 0; |
564 | if (s[count - 1] == '\n') |
565 | count--; |
566 | if (count == 1 && s[0] == '0') |
567 | return 1; |
568 | if (count == 1 && s[0] == '1') |
569 | return 2; |
570 | if (count == 2 && s[0] == '-' && s[1] == '1') |
571 | return 3; |
572 | return -EINVAL; |
573 | } |
574 | |
575 | /* generic stuff */ |
576 | |
577 | static void entry_status(Node *e, char *page) |
578 | { |
579 | char *dp = page; |
580 | const char *status = "disabled" ; |
581 | |
582 | if (test_bit(Enabled, &e->flags)) |
583 | status = "enabled" ; |
584 | |
585 | if (!VERBOSE_STATUS) { |
586 | sprintf(buf: page, fmt: "%s\n" , status); |
587 | return; |
588 | } |
589 | |
590 | dp += sprintf(buf: dp, fmt: "%s\ninterpreter %s\n" , status, e->interpreter); |
591 | |
592 | /* print the special flags */ |
593 | dp += sprintf(buf: dp, fmt: "flags: " ); |
594 | if (e->flags & MISC_FMT_PRESERVE_ARGV0) |
595 | *dp++ = 'P'; |
596 | if (e->flags & MISC_FMT_OPEN_BINARY) |
597 | *dp++ = 'O'; |
598 | if (e->flags & MISC_FMT_CREDENTIALS) |
599 | *dp++ = 'C'; |
600 | if (e->flags & MISC_FMT_OPEN_FILE) |
601 | *dp++ = 'F'; |
602 | *dp++ = '\n'; |
603 | |
604 | if (!test_bit(Magic, &e->flags)) { |
605 | sprintf(buf: dp, fmt: "extension .%s\n" , e->magic); |
606 | } else { |
607 | dp += sprintf(buf: dp, fmt: "offset %i\nmagic " , e->offset); |
608 | dp = bin2hex(dst: dp, src: e->magic, count: e->size); |
609 | if (e->mask) { |
610 | dp += sprintf(buf: dp, fmt: "\nmask " ); |
611 | dp = bin2hex(dst: dp, src: e->mask, count: e->size); |
612 | } |
613 | *dp++ = '\n'; |
614 | *dp = '\0'; |
615 | } |
616 | } |
617 | |
618 | static struct inode *bm_get_inode(struct super_block *sb, int mode) |
619 | { |
620 | struct inode *inode = new_inode(sb); |
621 | |
622 | if (inode) { |
623 | inode->i_ino = get_next_ino(); |
624 | inode->i_mode = mode; |
625 | simple_inode_init_ts(inode); |
626 | } |
627 | return inode; |
628 | } |
629 | |
630 | /** |
631 | * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode |
632 | * @inode: inode of the relevant binfmt_misc instance |
633 | * |
634 | * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can |
635 | * be done without any memory barriers because we are guaranteed that |
636 | * user_ns->binfmt_misc is fully initialized. It was fully initialized when the |
637 | * binfmt_misc mount was first created. |
638 | * |
639 | * Return: struct binfmt_misc of the relevant binfmt_misc instance |
640 | */ |
641 | static struct binfmt_misc *i_binfmt_misc(struct inode *inode) |
642 | { |
643 | return inode->i_sb->s_user_ns->binfmt_misc; |
644 | } |
645 | |
646 | /** |
647 | * bm_evict_inode - cleanup data associated with @inode |
648 | * @inode: inode to which the data is attached |
649 | * |
650 | * Cleanup the binary type handler data associated with @inode if a binary type |
651 | * entry is removed or the filesystem is unmounted and the super block is |
652 | * shutdown. |
653 | * |
654 | * If the ->evict call was not caused by a super block shutdown but by a write |
655 | * to remove the entry or all entries via bm_{entry,status}_write() the entry |
656 | * will have already been removed from the list. We keep the list_empty() check |
657 | * to make that explicit. |
658 | */ |
659 | static void bm_evict_inode(struct inode *inode) |
660 | { |
661 | Node *e = inode->i_private; |
662 | |
663 | clear_inode(inode); |
664 | |
665 | if (e) { |
666 | struct binfmt_misc *misc; |
667 | |
668 | misc = i_binfmt_misc(inode); |
669 | write_lock(&misc->entries_lock); |
670 | if (!list_empty(head: &e->list)) |
671 | list_del_init(entry: &e->list); |
672 | write_unlock(&misc->entries_lock); |
673 | put_binfmt_handler(e); |
674 | } |
675 | } |
676 | |
677 | /** |
678 | * unlink_binfmt_dentry - remove the dentry for the binary type handler |
679 | * @dentry: dentry associated with the binary type handler |
680 | * |
681 | * Do the actual filesystem work to remove a dentry for a registered binary |
682 | * type handler. Since binfmt_misc only allows simple files to be created |
683 | * directly under the root dentry of the filesystem we ensure that we are |
684 | * indeed passed a dentry directly beneath the root dentry, that the inode |
685 | * associated with the root dentry is locked, and that it is a regular file we |
686 | * are asked to remove. |
687 | */ |
688 | static void unlink_binfmt_dentry(struct dentry *dentry) |
689 | { |
690 | struct dentry *parent = dentry->d_parent; |
691 | struct inode *inode, *parent_inode; |
692 | |
693 | /* All entries are immediate descendants of the root dentry. */ |
694 | if (WARN_ON_ONCE(dentry->d_sb->s_root != parent)) |
695 | return; |
696 | |
697 | /* We only expect to be called on regular files. */ |
698 | inode = d_inode(dentry); |
699 | if (WARN_ON_ONCE(!S_ISREG(inode->i_mode))) |
700 | return; |
701 | |
702 | /* The parent inode must be locked. */ |
703 | parent_inode = d_inode(dentry: parent); |
704 | if (WARN_ON_ONCE(!inode_is_locked(parent_inode))) |
705 | return; |
706 | |
707 | if (simple_positive(dentry)) { |
708 | dget(dentry); |
709 | simple_unlink(parent_inode, dentry); |
710 | d_delete(dentry); |
711 | dput(dentry); |
712 | } |
713 | } |
714 | |
715 | /** |
716 | * remove_binfmt_handler - remove a binary type handler |
717 | * @misc: handle to binfmt_misc instance |
718 | * @e: binary type handler to remove |
719 | * |
720 | * Remove a binary type handler from the list of binary type handlers and |
721 | * remove its associated dentry. This is called from |
722 | * binfmt_{entry,status}_write(). In the future, we might want to think about |
723 | * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's |
724 | * to use writes to files in order to delete binary type handlers. But it has |
725 | * worked for so long that it's not a pressing issue. |
726 | */ |
727 | static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e) |
728 | { |
729 | write_lock(&misc->entries_lock); |
730 | list_del_init(entry: &e->list); |
731 | write_unlock(&misc->entries_lock); |
732 | unlink_binfmt_dentry(dentry: e->dentry); |
733 | } |
734 | |
735 | /* /<entry> */ |
736 | |
737 | static ssize_t |
738 | bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) |
739 | { |
740 | Node *e = file_inode(f: file)->i_private; |
741 | ssize_t res; |
742 | char *page; |
743 | |
744 | page = (char *) __get_free_page(GFP_KERNEL); |
745 | if (!page) |
746 | return -ENOMEM; |
747 | |
748 | entry_status(e, page); |
749 | |
750 | res = simple_read_from_buffer(to: buf, count: nbytes, ppos, from: page, strlen(page)); |
751 | |
752 | free_page((unsigned long) page); |
753 | return res; |
754 | } |
755 | |
756 | static ssize_t bm_entry_write(struct file *file, const char __user *buffer, |
757 | size_t count, loff_t *ppos) |
758 | { |
759 | struct inode *inode = file_inode(f: file); |
760 | Node *e = inode->i_private; |
761 | int res = parse_command(buffer, count); |
762 | |
763 | switch (res) { |
764 | case 1: |
765 | /* Disable this handler. */ |
766 | clear_bit(nr: Enabled, addr: &e->flags); |
767 | break; |
768 | case 2: |
769 | /* Enable this handler. */ |
770 | set_bit(nr: Enabled, addr: &e->flags); |
771 | break; |
772 | case 3: |
773 | /* Delete this handler. */ |
774 | inode = d_inode(dentry: inode->i_sb->s_root); |
775 | inode_lock(inode); |
776 | |
777 | /* |
778 | * In order to add new element or remove elements from the list |
779 | * via bm_{entry,register,status}_write() inode_lock() on the |
780 | * root inode must be held. |
781 | * The lock is exclusive ensuring that the list can't be |
782 | * modified. Only load_misc_binary() can access but does so |
783 | * read-only. So we only need to take the write lock when we |
784 | * actually remove the entry from the list. |
785 | */ |
786 | if (!list_empty(head: &e->list)) |
787 | remove_binfmt_handler(misc: i_binfmt_misc(inode), e); |
788 | |
789 | inode_unlock(inode); |
790 | break; |
791 | default: |
792 | return res; |
793 | } |
794 | |
795 | return count; |
796 | } |
797 | |
798 | static const struct file_operations bm_entry_operations = { |
799 | .read = bm_entry_read, |
800 | .write = bm_entry_write, |
801 | .llseek = default_llseek, |
802 | }; |
803 | |
804 | /* /register */ |
805 | |
806 | static ssize_t bm_register_write(struct file *file, const char __user *buffer, |
807 | size_t count, loff_t *ppos) |
808 | { |
809 | Node *e; |
810 | struct inode *inode; |
811 | struct super_block *sb = file_inode(f: file)->i_sb; |
812 | struct dentry *root = sb->s_root, *dentry; |
813 | struct binfmt_misc *misc; |
814 | int err = 0; |
815 | struct file *f = NULL; |
816 | |
817 | e = create_entry(buffer, count); |
818 | |
819 | if (IS_ERR(ptr: e)) |
820 | return PTR_ERR(ptr: e); |
821 | |
822 | if (e->flags & MISC_FMT_OPEN_FILE) { |
823 | const struct cred *old_cred; |
824 | |
825 | /* |
826 | * Now that we support unprivileged binfmt_misc mounts make |
827 | * sure we use the credentials that the register @file was |
828 | * opened with to also open the interpreter. Before that this |
829 | * didn't matter much as only a privileged process could open |
830 | * the register file. |
831 | */ |
832 | old_cred = override_creds(file->f_cred); |
833 | f = open_exec(e->interpreter); |
834 | revert_creds(old_cred); |
835 | if (IS_ERR(ptr: f)) { |
836 | pr_notice("register: failed to install interpreter file %s\n" , |
837 | e->interpreter); |
838 | kfree(objp: e); |
839 | return PTR_ERR(ptr: f); |
840 | } |
841 | e->interp_file = f; |
842 | } |
843 | |
844 | inode_lock(inode: d_inode(dentry: root)); |
845 | dentry = lookup_one_len(e->name, root, strlen(e->name)); |
846 | err = PTR_ERR(ptr: dentry); |
847 | if (IS_ERR(ptr: dentry)) |
848 | goto out; |
849 | |
850 | err = -EEXIST; |
851 | if (d_really_is_positive(dentry)) |
852 | goto out2; |
853 | |
854 | inode = bm_get_inode(sb, S_IFREG | 0644); |
855 | |
856 | err = -ENOMEM; |
857 | if (!inode) |
858 | goto out2; |
859 | |
860 | refcount_set(r: &e->users, n: 1); |
861 | e->dentry = dget(dentry); |
862 | inode->i_private = e; |
863 | inode->i_fop = &bm_entry_operations; |
864 | |
865 | d_instantiate(dentry, inode); |
866 | misc = i_binfmt_misc(inode); |
867 | write_lock(&misc->entries_lock); |
868 | list_add(new: &e->list, head: &misc->entries); |
869 | write_unlock(&misc->entries_lock); |
870 | |
871 | err = 0; |
872 | out2: |
873 | dput(dentry); |
874 | out: |
875 | inode_unlock(inode: d_inode(dentry: root)); |
876 | |
877 | if (err) { |
878 | if (f) |
879 | filp_close(f, NULL); |
880 | kfree(objp: e); |
881 | return err; |
882 | } |
883 | return count; |
884 | } |
885 | |
886 | static const struct file_operations bm_register_operations = { |
887 | .write = bm_register_write, |
888 | .llseek = noop_llseek, |
889 | }; |
890 | |
891 | /* /status */ |
892 | |
893 | static ssize_t |
894 | bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) |
895 | { |
896 | struct binfmt_misc *misc; |
897 | char *s; |
898 | |
899 | misc = i_binfmt_misc(inode: file_inode(f: file)); |
900 | s = misc->enabled ? "enabled\n" : "disabled\n" ; |
901 | return simple_read_from_buffer(to: buf, count: nbytes, ppos, from: s, strlen(s)); |
902 | } |
903 | |
904 | static ssize_t bm_status_write(struct file *file, const char __user *buffer, |
905 | size_t count, loff_t *ppos) |
906 | { |
907 | struct binfmt_misc *misc; |
908 | int res = parse_command(buffer, count); |
909 | Node *e, *next; |
910 | struct inode *inode; |
911 | |
912 | misc = i_binfmt_misc(inode: file_inode(f: file)); |
913 | switch (res) { |
914 | case 1: |
915 | /* Disable all handlers. */ |
916 | misc->enabled = false; |
917 | break; |
918 | case 2: |
919 | /* Enable all handlers. */ |
920 | misc->enabled = true; |
921 | break; |
922 | case 3: |
923 | /* Delete all handlers. */ |
924 | inode = d_inode(dentry: file_inode(f: file)->i_sb->s_root); |
925 | inode_lock(inode); |
926 | |
927 | /* |
928 | * In order to add new element or remove elements from the list |
929 | * via bm_{entry,register,status}_write() inode_lock() on the |
930 | * root inode must be held. |
931 | * The lock is exclusive ensuring that the list can't be |
932 | * modified. Only load_misc_binary() can access but does so |
933 | * read-only. So we only need to take the write lock when we |
934 | * actually remove the entry from the list. |
935 | */ |
936 | list_for_each_entry_safe(e, next, &misc->entries, list) |
937 | remove_binfmt_handler(misc, e); |
938 | |
939 | inode_unlock(inode); |
940 | break; |
941 | default: |
942 | return res; |
943 | } |
944 | |
945 | return count; |
946 | } |
947 | |
948 | static const struct file_operations bm_status_operations = { |
949 | .read = bm_status_read, |
950 | .write = bm_status_write, |
951 | .llseek = default_llseek, |
952 | }; |
953 | |
954 | /* Superblock handling */ |
955 | |
956 | static void bm_put_super(struct super_block *sb) |
957 | { |
958 | struct user_namespace *user_ns = sb->s_fs_info; |
959 | |
960 | sb->s_fs_info = NULL; |
961 | put_user_ns(ns: user_ns); |
962 | } |
963 | |
964 | static const struct super_operations s_ops = { |
965 | .statfs = simple_statfs, |
966 | .evict_inode = bm_evict_inode, |
967 | .put_super = bm_put_super, |
968 | }; |
969 | |
970 | static int bm_fill_super(struct super_block *sb, struct fs_context *fc) |
971 | { |
972 | int err; |
973 | struct user_namespace *user_ns = sb->s_user_ns; |
974 | struct binfmt_misc *misc; |
975 | static const struct tree_descr bm_files[] = { |
976 | [2] = {.name: "status" , .ops: &bm_status_operations, S_IWUSR|S_IRUGO}, |
977 | [3] = {"register" , &bm_register_operations, S_IWUSR}, |
978 | /* last one */ {"" } |
979 | }; |
980 | |
981 | if (WARN_ON(user_ns != current_user_ns())) |
982 | return -EINVAL; |
983 | |
984 | /* |
985 | * Lazily allocate a new binfmt_misc instance for this namespace, i.e. |
986 | * do it here during the first mount of binfmt_misc. We don't need to |
987 | * waste memory for every user namespace allocation. It's likely much |
988 | * more common to not mount a separate binfmt_misc instance than it is |
989 | * to mount one. |
990 | * |
991 | * While multiple superblocks can exist they are keyed by userns in |
992 | * s_fs_info for binfmt_misc. Hence, the vfs guarantees that |
993 | * bm_fill_super() is called exactly once whenever a binfmt_misc |
994 | * superblock for a userns is created. This in turn lets us conclude |
995 | * that when a binfmt_misc superblock is created for the first time for |
996 | * a userns there's no one racing us. Therefore we don't need any |
997 | * barriers when we dereference binfmt_misc. |
998 | */ |
999 | misc = user_ns->binfmt_misc; |
1000 | if (!misc) { |
1001 | /* |
1002 | * If it turns out that most user namespaces actually want to |
1003 | * register their own binary type handler and therefore all |
1004 | * create their own separate binfm_misc mounts we should |
1005 | * consider turning this into a kmem cache. |
1006 | */ |
1007 | misc = kzalloc(size: sizeof(struct binfmt_misc), GFP_KERNEL); |
1008 | if (!misc) |
1009 | return -ENOMEM; |
1010 | |
1011 | INIT_LIST_HEAD(list: &misc->entries); |
1012 | rwlock_init(&misc->entries_lock); |
1013 | |
1014 | /* Pairs with smp_load_acquire() in load_binfmt_misc(). */ |
1015 | smp_store_release(&user_ns->binfmt_misc, misc); |
1016 | } |
1017 | |
1018 | /* |
1019 | * When the binfmt_misc superblock for this userns is shutdown |
1020 | * ->enabled might have been set to false and we don't reinitialize |
1021 | * ->enabled again in put_super() as someone might already be mounting |
1022 | * binfmt_misc again. It also would be pointless since by the time |
1023 | * ->put_super() is called we know that the binary type list for this |
1024 | * bintfmt_misc mount is empty making load_misc_binary() return |
1025 | * -ENOEXEC independent of whether ->enabled is true. Instead, if |
1026 | * someone mounts binfmt_misc for the first time or again we simply |
1027 | * reset ->enabled to true. |
1028 | */ |
1029 | misc->enabled = true; |
1030 | |
1031 | err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); |
1032 | if (!err) |
1033 | sb->s_op = &s_ops; |
1034 | return err; |
1035 | } |
1036 | |
1037 | static void bm_free(struct fs_context *fc) |
1038 | { |
1039 | if (fc->s_fs_info) |
1040 | put_user_ns(ns: fc->s_fs_info); |
1041 | } |
1042 | |
1043 | static int bm_get_tree(struct fs_context *fc) |
1044 | { |
1045 | return get_tree_keyed(fc, fill_super: bm_fill_super, key: get_user_ns(ns: fc->user_ns)); |
1046 | } |
1047 | |
1048 | static const struct fs_context_operations bm_context_ops = { |
1049 | .free = bm_free, |
1050 | .get_tree = bm_get_tree, |
1051 | }; |
1052 | |
1053 | static int bm_init_fs_context(struct fs_context *fc) |
1054 | { |
1055 | fc->ops = &bm_context_ops; |
1056 | return 0; |
1057 | } |
1058 | |
1059 | static struct linux_binfmt misc_format = { |
1060 | .module = THIS_MODULE, |
1061 | .load_binary = load_misc_binary, |
1062 | }; |
1063 | |
1064 | static struct file_system_type bm_fs_type = { |
1065 | .owner = THIS_MODULE, |
1066 | .name = "binfmt_misc" , |
1067 | .init_fs_context = bm_init_fs_context, |
1068 | .fs_flags = FS_USERNS_MOUNT, |
1069 | .kill_sb = kill_litter_super, |
1070 | }; |
1071 | MODULE_ALIAS_FS("binfmt_misc" ); |
1072 | |
1073 | static int __init init_misc_binfmt(void) |
1074 | { |
1075 | int err = register_filesystem(&bm_fs_type); |
1076 | if (!err) |
1077 | insert_binfmt(fmt: &misc_format); |
1078 | return err; |
1079 | } |
1080 | |
1081 | static void __exit exit_misc_binfmt(void) |
1082 | { |
1083 | unregister_binfmt(&misc_format); |
1084 | unregister_filesystem(&bm_fs_type); |
1085 | } |
1086 | |
1087 | core_initcall(init_misc_binfmt); |
1088 | module_exit(exit_misc_binfmt); |
1089 | MODULE_LICENSE("GPL" ); |
1090 | |