1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Landlock LSM - Filesystem management and hooks |
4 | * |
5 | * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> |
6 | * Copyright © 2018-2020 ANSSI |
7 | * Copyright © 2021-2022 Microsoft Corporation |
8 | */ |
9 | |
10 | #include <kunit/test.h> |
11 | #include <linux/atomic.h> |
12 | #include <linux/bitops.h> |
13 | #include <linux/bits.h> |
14 | #include <linux/compiler_types.h> |
15 | #include <linux/dcache.h> |
16 | #include <linux/err.h> |
17 | #include <linux/fs.h> |
18 | #include <linux/init.h> |
19 | #include <linux/kernel.h> |
20 | #include <linux/limits.h> |
21 | #include <linux/list.h> |
22 | #include <linux/lsm_hooks.h> |
23 | #include <linux/mount.h> |
24 | #include <linux/namei.h> |
25 | #include <linux/path.h> |
26 | #include <linux/rcupdate.h> |
27 | #include <linux/spinlock.h> |
28 | #include <linux/stat.h> |
29 | #include <linux/types.h> |
30 | #include <linux/wait_bit.h> |
31 | #include <linux/workqueue.h> |
32 | #include <uapi/linux/landlock.h> |
33 | |
34 | #include "common.h" |
35 | #include "cred.h" |
36 | #include "fs.h" |
37 | #include "limits.h" |
38 | #include "object.h" |
39 | #include "ruleset.h" |
40 | #include "setup.h" |
41 | |
42 | /* Underlying object management */ |
43 | |
44 | static void release_inode(struct landlock_object *const object) |
45 | __releases(object->lock) |
46 | { |
47 | struct inode *const inode = object->underobj; |
48 | struct super_block *sb; |
49 | |
50 | if (!inode) { |
51 | spin_unlock(lock: &object->lock); |
52 | return; |
53 | } |
54 | |
55 | /* |
56 | * Protects against concurrent use by hook_sb_delete() of the reference |
57 | * to the underlying inode. |
58 | */ |
59 | object->underobj = NULL; |
60 | /* |
61 | * Makes sure that if the filesystem is concurrently unmounted, |
62 | * hook_sb_delete() will wait for us to finish iput(). |
63 | */ |
64 | sb = inode->i_sb; |
65 | atomic_long_inc(v: &landlock_superblock(superblock: sb)->inode_refs); |
66 | spin_unlock(lock: &object->lock); |
67 | /* |
68 | * Because object->underobj was not NULL, hook_sb_delete() and |
69 | * get_inode_object() guarantee that it is safe to reset |
70 | * landlock_inode(inode)->object while it is not NULL. It is therefore |
71 | * not necessary to lock inode->i_lock. |
72 | */ |
73 | rcu_assign_pointer(landlock_inode(inode)->object, NULL); |
74 | /* |
75 | * Now, new rules can safely be tied to @inode with get_inode_object(). |
76 | */ |
77 | |
78 | iput(inode); |
79 | if (atomic_long_dec_and_test(v: &landlock_superblock(superblock: sb)->inode_refs)) |
80 | wake_up_var(var: &landlock_superblock(superblock: sb)->inode_refs); |
81 | } |
82 | |
83 | static const struct landlock_object_underops landlock_fs_underops = { |
84 | .release = release_inode |
85 | }; |
86 | |
87 | /* Ruleset management */ |
88 | |
89 | static struct landlock_object *get_inode_object(struct inode *const inode) |
90 | { |
91 | struct landlock_object *object, *new_object; |
92 | struct landlock_inode_security *inode_sec = landlock_inode(inode); |
93 | |
94 | rcu_read_lock(); |
95 | retry: |
96 | object = rcu_dereference(inode_sec->object); |
97 | if (object) { |
98 | if (likely(refcount_inc_not_zero(&object->usage))) { |
99 | rcu_read_unlock(); |
100 | return object; |
101 | } |
102 | /* |
103 | * We are racing with release_inode(), the object is going |
104 | * away. Wait for release_inode(), then retry. |
105 | */ |
106 | spin_lock(lock: &object->lock); |
107 | spin_unlock(lock: &object->lock); |
108 | goto retry; |
109 | } |
110 | rcu_read_unlock(); |
111 | |
112 | /* |
113 | * If there is no object tied to @inode, then create a new one (without |
114 | * holding any locks). |
115 | */ |
116 | new_object = landlock_create_object(underops: &landlock_fs_underops, underobj: inode); |
117 | if (IS_ERR(ptr: new_object)) |
118 | return new_object; |
119 | |
120 | /* |
121 | * Protects against concurrent calls to get_inode_object() or |
122 | * hook_sb_delete(). |
123 | */ |
124 | spin_lock(lock: &inode->i_lock); |
125 | if (unlikely(rcu_access_pointer(inode_sec->object))) { |
126 | /* Someone else just created the object, bail out and retry. */ |
127 | spin_unlock(lock: &inode->i_lock); |
128 | kfree(objp: new_object); |
129 | |
130 | rcu_read_lock(); |
131 | goto retry; |
132 | } |
133 | |
134 | /* |
135 | * @inode will be released by hook_sb_delete() on its superblock |
136 | * shutdown, or by release_inode() when no more ruleset references the |
137 | * related object. |
138 | */ |
139 | ihold(inode); |
140 | rcu_assign_pointer(inode_sec->object, new_object); |
141 | spin_unlock(lock: &inode->i_lock); |
142 | return new_object; |
143 | } |
144 | |
145 | /* All access rights that can be tied to files. */ |
146 | /* clang-format off */ |
147 | #define ACCESS_FILE ( \ |
148 | LANDLOCK_ACCESS_FS_EXECUTE | \ |
149 | LANDLOCK_ACCESS_FS_WRITE_FILE | \ |
150 | LANDLOCK_ACCESS_FS_READ_FILE | \ |
151 | LANDLOCK_ACCESS_FS_TRUNCATE) |
152 | /* clang-format on */ |
153 | |
154 | /* |
155 | * @path: Should have been checked by get_path_from_fd(). |
156 | */ |
157 | int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, |
158 | const struct path *const path, |
159 | access_mask_t access_rights) |
160 | { |
161 | int err; |
162 | struct landlock_id id = { |
163 | .type = LANDLOCK_KEY_INODE, |
164 | }; |
165 | |
166 | /* Files only get access rights that make sense. */ |
167 | if (!d_is_dir(dentry: path->dentry) && |
168 | (access_rights | ACCESS_FILE) != ACCESS_FILE) |
169 | return -EINVAL; |
170 | if (WARN_ON_ONCE(ruleset->num_layers != 1)) |
171 | return -EINVAL; |
172 | |
173 | /* Transforms relative access rights to absolute ones. */ |
174 | access_rights |= LANDLOCK_MASK_ACCESS_FS & |
175 | ~landlock_get_fs_access_mask(ruleset, layer_level: 0); |
176 | id.key.object = get_inode_object(inode: d_backing_inode(upper: path->dentry)); |
177 | if (IS_ERR(ptr: id.key.object)) |
178 | return PTR_ERR(ptr: id.key.object); |
179 | mutex_lock(&ruleset->lock); |
180 | err = landlock_insert_rule(ruleset, id, access: access_rights); |
181 | mutex_unlock(lock: &ruleset->lock); |
182 | /* |
183 | * No need to check for an error because landlock_insert_rule() |
184 | * increments the refcount for the new object if needed. |
185 | */ |
186 | landlock_put_object(object: id.key.object); |
187 | return err; |
188 | } |
189 | |
190 | /* Access-control management */ |
191 | |
192 | /* |
193 | * The lifetime of the returned rule is tied to @domain. |
194 | * |
195 | * Returns NULL if no rule is found or if @dentry is negative. |
196 | */ |
197 | static const struct landlock_rule * |
198 | find_rule(const struct landlock_ruleset *const domain, |
199 | const struct dentry *const dentry) |
200 | { |
201 | const struct landlock_rule *rule; |
202 | const struct inode *inode; |
203 | struct landlock_id id = { |
204 | .type = LANDLOCK_KEY_INODE, |
205 | }; |
206 | |
207 | /* Ignores nonexistent leafs. */ |
208 | if (d_is_negative(dentry)) |
209 | return NULL; |
210 | |
211 | inode = d_backing_inode(upper: dentry); |
212 | rcu_read_lock(); |
213 | id.key.object = rcu_dereference(landlock_inode(inode)->object); |
214 | rule = landlock_find_rule(ruleset: domain, id); |
215 | rcu_read_unlock(); |
216 | return rule; |
217 | } |
218 | |
219 | /* |
220 | * Allows access to pseudo filesystems that will never be mountable (e.g. |
221 | * sockfs, pipefs), but can still be reachable through |
222 | * /proc/<pid>/fd/<file-descriptor> |
223 | */ |
224 | static bool is_nouser_or_private(const struct dentry *dentry) |
225 | { |
226 | return (dentry->d_sb->s_flags & SB_NOUSER) || |
227 | (d_is_positive(dentry) && |
228 | unlikely(IS_PRIVATE(d_backing_inode(dentry)))); |
229 | } |
230 | |
231 | static access_mask_t |
232 | get_raw_handled_fs_accesses(const struct landlock_ruleset *const domain) |
233 | { |
234 | access_mask_t access_dom = 0; |
235 | size_t layer_level; |
236 | |
237 | for (layer_level = 0; layer_level < domain->num_layers; layer_level++) |
238 | access_dom |= |
239 | landlock_get_raw_fs_access_mask(ruleset: domain, layer_level); |
240 | return access_dom; |
241 | } |
242 | |
243 | static access_mask_t |
244 | get_handled_fs_accesses(const struct landlock_ruleset *const domain) |
245 | { |
246 | /* Handles all initially denied by default access rights. */ |
247 | return get_raw_handled_fs_accesses(domain) | |
248 | LANDLOCK_ACCESS_FS_INITIALLY_DENIED; |
249 | } |
250 | |
251 | static const struct landlock_ruleset * |
252 | get_fs_domain(const struct landlock_ruleset *const domain) |
253 | { |
254 | if (!domain || !get_raw_handled_fs_accesses(domain)) |
255 | return NULL; |
256 | |
257 | return domain; |
258 | } |
259 | |
260 | static const struct landlock_ruleset *get_current_fs_domain(void) |
261 | { |
262 | return get_fs_domain(domain: landlock_get_current_domain()); |
263 | } |
264 | |
265 | /* |
266 | * Check that a destination file hierarchy has more restrictions than a source |
267 | * file hierarchy. This is only used for link and rename actions. |
268 | * |
269 | * @layer_masks_child2: Optional child masks. |
270 | */ |
271 | static bool no_more_access( |
272 | const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], |
273 | const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], |
274 | const bool child1_is_directory, |
275 | const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], |
276 | const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], |
277 | const bool child2_is_directory) |
278 | { |
279 | unsigned long access_bit; |
280 | |
281 | for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); |
282 | access_bit++) { |
283 | /* Ignores accesses that only make sense for directories. */ |
284 | const bool is_file_access = |
285 | !!(BIT_ULL(access_bit) & ACCESS_FILE); |
286 | |
287 | if (child1_is_directory || is_file_access) { |
288 | /* |
289 | * Checks if the destination restrictions are a |
290 | * superset of the source ones (i.e. inherited access |
291 | * rights without child exceptions): |
292 | * restrictions(parent2) >= restrictions(child1) |
293 | */ |
294 | if ((((*layer_masks_parent1)[access_bit] & |
295 | (*layer_masks_child1)[access_bit]) | |
296 | (*layer_masks_parent2)[access_bit]) != |
297 | (*layer_masks_parent2)[access_bit]) |
298 | return false; |
299 | } |
300 | |
301 | if (!layer_masks_child2) |
302 | continue; |
303 | if (child2_is_directory || is_file_access) { |
304 | /* |
305 | * Checks inverted restrictions for RENAME_EXCHANGE: |
306 | * restrictions(parent1) >= restrictions(child2) |
307 | */ |
308 | if ((((*layer_masks_parent2)[access_bit] & |
309 | (*layer_masks_child2)[access_bit]) | |
310 | (*layer_masks_parent1)[access_bit]) != |
311 | (*layer_masks_parent1)[access_bit]) |
312 | return false; |
313 | } |
314 | } |
315 | return true; |
316 | } |
317 | |
318 | #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) |
319 | #define NMA_FALSE(...) KUNIT_EXPECT_FALSE(test, no_more_access(__VA_ARGS__)) |
320 | |
321 | #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST |
322 | |
323 | static void test_no_more_access(struct kunit *const test) |
324 | { |
325 | const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = { |
326 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), |
327 | [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0), |
328 | }; |
329 | const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = { |
330 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), |
331 | [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0), |
332 | }; |
333 | const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = { |
334 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), |
335 | }; |
336 | const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = { |
337 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1), |
338 | }; |
339 | const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = { |
340 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | |
341 | BIT_ULL(1), |
342 | }; |
343 | const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {}; |
344 | |
345 | /* Checks without restriction. */ |
346 | NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); |
347 | NMA_TRUE(&allows_all, &x0, false, &allows_all, NULL, false); |
348 | NMA_FALSE(&x0, &x0, false, &allows_all, NULL, false); |
349 | |
350 | /* |
351 | * Checks that we can only refer a file if no more access could be |
352 | * inherited. |
353 | */ |
354 | NMA_TRUE(&x0, &x0, false, &rx0, NULL, false); |
355 | NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false); |
356 | NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false); |
357 | NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false); |
358 | |
359 | /* Checks allowed referring with different nested domains. */ |
360 | NMA_TRUE(&x0, &x1, false, &x0, NULL, false); |
361 | NMA_TRUE(&x1, &x0, false, &x0, NULL, false); |
362 | NMA_TRUE(&x0, &x01, false, &x0, NULL, false); |
363 | NMA_TRUE(&x0, &x01, false, &rx0, NULL, false); |
364 | NMA_TRUE(&x01, &x0, false, &x0, NULL, false); |
365 | NMA_TRUE(&x01, &x0, false, &rx0, NULL, false); |
366 | NMA_FALSE(&x01, &x01, false, &x0, NULL, false); |
367 | |
368 | /* Checks that file access rights are also enforced for a directory. */ |
369 | NMA_FALSE(&rx0, &rx0, true, &x0, NULL, false); |
370 | |
371 | /* Checks that directory access rights don't impact file referring... */ |
372 | NMA_TRUE(&mx0, &mx0, false, &x0, NULL, false); |
373 | /* ...but only directory referring. */ |
374 | NMA_FALSE(&mx0, &mx0, true, &x0, NULL, false); |
375 | |
376 | /* Checks directory exchange. */ |
377 | NMA_TRUE(&mx0, &mx0, true, &mx0, &mx0, true); |
378 | NMA_TRUE(&mx0, &mx0, true, &mx0, &x0, true); |
379 | NMA_FALSE(&mx0, &mx0, true, &x0, &mx0, true); |
380 | NMA_FALSE(&mx0, &mx0, true, &x0, &x0, true); |
381 | NMA_FALSE(&mx0, &mx0, true, &x1, &x1, true); |
382 | |
383 | /* Checks file exchange with directory access rights... */ |
384 | NMA_TRUE(&mx0, &mx0, false, &mx0, &mx0, false); |
385 | NMA_TRUE(&mx0, &mx0, false, &mx0, &x0, false); |
386 | NMA_TRUE(&mx0, &mx0, false, &x0, &mx0, false); |
387 | NMA_TRUE(&mx0, &mx0, false, &x0, &x0, false); |
388 | /* ...and with file access rights. */ |
389 | NMA_TRUE(&rx0, &rx0, false, &rx0, &rx0, false); |
390 | NMA_TRUE(&rx0, &rx0, false, &rx0, &x0, false); |
391 | NMA_FALSE(&rx0, &rx0, false, &x0, &rx0, false); |
392 | NMA_FALSE(&rx0, &rx0, false, &x0, &x0, false); |
393 | NMA_FALSE(&rx0, &rx0, false, &x1, &x1, false); |
394 | |
395 | /* |
396 | * Allowing the following requests should not be a security risk |
397 | * because domain 0 denies execute access, and domain 1 is always |
398 | * nested with domain 0. However, adding an exception for this case |
399 | * would mean to check all nested domains to make sure none can get |
400 | * more privileges (e.g. processes only sandboxed by domain 0). |
401 | * Moreover, this behavior (i.e. composition of N domains) could then |
402 | * be inconsistent compared to domain 1's ruleset alone (e.g. it might |
403 | * be denied to link/rename with domain 1's ruleset, whereas it would |
404 | * be allowed if nested on top of domain 0). Another drawback would be |
405 | * to create a cover channel that could enable sandboxed processes to |
406 | * infer most of the filesystem restrictions from their domain. To |
407 | * make it simple, efficient, safe, and more consistent, this case is |
408 | * always denied. |
409 | */ |
410 | NMA_FALSE(&x1, &x1, false, &x0, NULL, false); |
411 | NMA_FALSE(&x1, &x1, false, &rx0, NULL, false); |
412 | NMA_FALSE(&x1, &x1, true, &x0, NULL, false); |
413 | NMA_FALSE(&x1, &x1, true, &rx0, NULL, false); |
414 | |
415 | /* Checks the same case of exclusive domains with a file... */ |
416 | NMA_TRUE(&x1, &x1, false, &x01, NULL, false); |
417 | NMA_FALSE(&x1, &x1, false, &x01, &x0, false); |
418 | NMA_FALSE(&x1, &x1, false, &x01, &x01, false); |
419 | NMA_FALSE(&x1, &x1, false, &x0, &x0, false); |
420 | /* ...and with a directory. */ |
421 | NMA_FALSE(&x1, &x1, false, &x0, &x0, true); |
422 | NMA_FALSE(&x1, &x1, true, &x0, &x0, false); |
423 | NMA_FALSE(&x1, &x1, true, &x0, &x0, true); |
424 | } |
425 | |
426 | #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ |
427 | |
428 | #undef NMA_TRUE |
429 | #undef NMA_FALSE |
430 | |
431 | /* |
432 | * Removes @layer_masks accesses that are not requested. |
433 | * |
434 | * Returns true if the request is allowed, false otherwise. |
435 | */ |
436 | static bool |
437 | scope_to_request(const access_mask_t access_request, |
438 | layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) |
439 | { |
440 | const unsigned long access_req = access_request; |
441 | unsigned long access_bit; |
442 | |
443 | if (WARN_ON_ONCE(!layer_masks)) |
444 | return true; |
445 | |
446 | for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) |
447 | (*layer_masks)[access_bit] = 0; |
448 | return !memchr_inv(p: layer_masks, c: 0, size: sizeof(*layer_masks)); |
449 | } |
450 | |
451 | #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST |
452 | |
453 | static void test_scope_to_request_with_exec_none(struct kunit *const test) |
454 | { |
455 | /* Allows everything. */ |
456 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; |
457 | |
458 | /* Checks and scopes with execute. */ |
459 | KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, |
460 | &layer_masks)); |
461 | KUNIT_EXPECT_EQ(test, 0, |
462 | layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); |
463 | KUNIT_EXPECT_EQ(test, 0, |
464 | layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); |
465 | } |
466 | |
467 | static void test_scope_to_request_with_exec_some(struct kunit *const test) |
468 | { |
469 | /* Denies execute and write. */ |
470 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { |
471 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), |
472 | [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), |
473 | }; |
474 | |
475 | /* Checks and scopes with execute. */ |
476 | KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, |
477 | &layer_masks)); |
478 | KUNIT_EXPECT_EQ(test, BIT_ULL(0), |
479 | layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); |
480 | KUNIT_EXPECT_EQ(test, 0, |
481 | layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); |
482 | } |
483 | |
484 | static void test_scope_to_request_without_access(struct kunit *const test) |
485 | { |
486 | /* Denies execute and write. */ |
487 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { |
488 | [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), |
489 | [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), |
490 | }; |
491 | |
492 | /* Checks and scopes without access request. */ |
493 | KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks)); |
494 | KUNIT_EXPECT_EQ(test, 0, |
495 | layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); |
496 | KUNIT_EXPECT_EQ(test, 0, |
497 | layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); |
498 | } |
499 | |
500 | #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ |
501 | |
502 | /* |
503 | * Returns true if there is at least one access right different than |
504 | * LANDLOCK_ACCESS_FS_REFER. |
505 | */ |
506 | static bool |
507 | is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], |
508 | const access_mask_t access_request) |
509 | { |
510 | unsigned long access_bit; |
511 | /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ |
512 | const unsigned long access_check = access_request & |
513 | ~LANDLOCK_ACCESS_FS_REFER; |
514 | |
515 | if (!layer_masks) |
516 | return false; |
517 | |
518 | for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { |
519 | if ((*layer_masks)[access_bit]) |
520 | return true; |
521 | } |
522 | return false; |
523 | } |
524 | |
525 | #define IE_TRUE(...) KUNIT_EXPECT_TRUE(test, is_eacces(__VA_ARGS__)) |
526 | #define IE_FALSE(...) KUNIT_EXPECT_FALSE(test, is_eacces(__VA_ARGS__)) |
527 | |
528 | #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST |
529 | |
530 | static void test_is_eacces_with_none(struct kunit *const test) |
531 | { |
532 | const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; |
533 | |
534 | IE_FALSE(&layer_masks, 0); |
535 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); |
536 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); |
537 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); |
538 | } |
539 | |
540 | static void test_is_eacces_with_refer(struct kunit *const test) |
541 | { |
542 | const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { |
543 | [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0), |
544 | }; |
545 | |
546 | IE_FALSE(&layer_masks, 0); |
547 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); |
548 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); |
549 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); |
550 | } |
551 | |
552 | static void test_is_eacces_with_write(struct kunit *const test) |
553 | { |
554 | const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { |
555 | [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0), |
556 | }; |
557 | |
558 | IE_FALSE(&layer_masks, 0); |
559 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); |
560 | IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); |
561 | |
562 | IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); |
563 | } |
564 | |
565 | #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ |
566 | |
567 | #undef IE_TRUE |
568 | #undef IE_FALSE |
569 | |
570 | /** |
571 | * is_access_to_paths_allowed - Check accesses for requests with a common path |
572 | * |
573 | * @domain: Domain to check against. |
574 | * @path: File hierarchy to walk through. |
575 | * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is |
576 | * equal to @layer_masks_parent2 (if any). This is tied to the unique |
577 | * requested path for most actions, or the source in case of a refer action |
578 | * (i.e. rename or link), or the source and destination in case of |
579 | * RENAME_EXCHANGE. |
580 | * @layer_masks_parent1: Pointer to a matrix of layer masks per access |
581 | * masks, identifying the layers that forbid a specific access. Bits from |
582 | * this matrix can be unset according to the @path walk. An empty matrix |
583 | * means that @domain allows all possible Landlock accesses (i.e. not only |
584 | * those identified by @access_request_parent1). This matrix can |
585 | * initially refer to domain layer masks and, when the accesses for the |
586 | * destination and source are the same, to requested layer masks. |
587 | * @dentry_child1: Dentry to the initial child of the parent1 path. This |
588 | * pointer must be NULL for non-refer actions (i.e. not link nor rename). |
589 | * @access_request_parent2: Similar to @access_request_parent1 but for a |
590 | * request involving a source and a destination. This refers to the |
591 | * destination, except in case of RENAME_EXCHANGE where it also refers to |
592 | * the source. Must be set to 0 when using a simple path request. |
593 | * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer |
594 | * action. This must be NULL otherwise. |
595 | * @dentry_child2: Dentry to the initial child of the parent2 path. This |
596 | * pointer is only set for RENAME_EXCHANGE actions and must be NULL |
597 | * otherwise. |
598 | * |
599 | * This helper first checks that the destination has a superset of restrictions |
600 | * compared to the source (if any) for a common path. Because of |
601 | * RENAME_EXCHANGE actions, source and destinations may be swapped. It then |
602 | * checks that the collected accesses and the remaining ones are enough to |
603 | * allow the request. |
604 | * |
605 | * Returns: |
606 | * - true if the access request is granted; |
607 | * - false otherwise. |
608 | */ |
609 | static bool is_access_to_paths_allowed( |
610 | const struct landlock_ruleset *const domain, |
611 | const struct path *const path, |
612 | const access_mask_t access_request_parent1, |
613 | layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], |
614 | const struct dentry *const dentry_child1, |
615 | const access_mask_t access_request_parent2, |
616 | layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], |
617 | const struct dentry *const dentry_child2) |
618 | { |
619 | bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, |
620 | child1_is_directory = true, child2_is_directory = true; |
621 | struct path walker_path; |
622 | access_mask_t access_masked_parent1, access_masked_parent2; |
623 | layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], |
624 | _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; |
625 | layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, |
626 | (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; |
627 | |
628 | if (!access_request_parent1 && !access_request_parent2) |
629 | return true; |
630 | if (WARN_ON_ONCE(!domain || !path)) |
631 | return true; |
632 | if (is_nouser_or_private(dentry: path->dentry)) |
633 | return true; |
634 | if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1)) |
635 | return false; |
636 | |
637 | if (unlikely(layer_masks_parent2)) { |
638 | if (WARN_ON_ONCE(!dentry_child1)) |
639 | return false; |
640 | /* |
641 | * For a double request, first check for potential privilege |
642 | * escalation by looking at domain handled accesses (which are |
643 | * a superset of the meaningful requested accesses). |
644 | */ |
645 | access_masked_parent1 = access_masked_parent2 = |
646 | get_handled_fs_accesses(domain); |
647 | is_dom_check = true; |
648 | } else { |
649 | if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) |
650 | return false; |
651 | /* For a simple request, only check for requested accesses. */ |
652 | access_masked_parent1 = access_request_parent1; |
653 | access_masked_parent2 = access_request_parent2; |
654 | is_dom_check = false; |
655 | } |
656 | |
657 | if (unlikely(dentry_child1)) { |
658 | landlock_unmask_layers( |
659 | rule: find_rule(domain, dentry: dentry_child1), |
660 | access_request: landlock_init_layer_masks( |
661 | domain, LANDLOCK_MASK_ACCESS_FS, |
662 | layer_masks: &_layer_masks_child1, key_type: LANDLOCK_KEY_INODE), |
663 | layer_masks: &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1)); |
664 | layer_masks_child1 = &_layer_masks_child1; |
665 | child1_is_directory = d_is_dir(dentry: dentry_child1); |
666 | } |
667 | if (unlikely(dentry_child2)) { |
668 | landlock_unmask_layers( |
669 | rule: find_rule(domain, dentry: dentry_child2), |
670 | access_request: landlock_init_layer_masks( |
671 | domain, LANDLOCK_MASK_ACCESS_FS, |
672 | layer_masks: &_layer_masks_child2, key_type: LANDLOCK_KEY_INODE), |
673 | layer_masks: &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2)); |
674 | layer_masks_child2 = &_layer_masks_child2; |
675 | child2_is_directory = d_is_dir(dentry: dentry_child2); |
676 | } |
677 | |
678 | walker_path = *path; |
679 | path_get(&walker_path); |
680 | /* |
681 | * We need to walk through all the hierarchy to not miss any relevant |
682 | * restriction. |
683 | */ |
684 | while (true) { |
685 | struct dentry *parent_dentry; |
686 | const struct landlock_rule *rule; |
687 | |
688 | /* |
689 | * If at least all accesses allowed on the destination are |
690 | * already allowed on the source, respectively if there is at |
691 | * least as much as restrictions on the destination than on the |
692 | * source, then we can safely refer files from the source to |
693 | * the destination without risking a privilege escalation. |
694 | * This also applies in the case of RENAME_EXCHANGE, which |
695 | * implies checks on both direction. This is crucial for |
696 | * standalone multilayered security policies. Furthermore, |
697 | * this helps avoid policy writers to shoot themselves in the |
698 | * foot. |
699 | */ |
700 | if (unlikely(is_dom_check && |
701 | no_more_access( |
702 | layer_masks_parent1, layer_masks_child1, |
703 | child1_is_directory, layer_masks_parent2, |
704 | layer_masks_child2, |
705 | child2_is_directory))) { |
706 | allowed_parent1 = scope_to_request( |
707 | access_request: access_request_parent1, layer_masks: layer_masks_parent1); |
708 | allowed_parent2 = scope_to_request( |
709 | access_request: access_request_parent2, layer_masks: layer_masks_parent2); |
710 | |
711 | /* Stops when all accesses are granted. */ |
712 | if (allowed_parent1 && allowed_parent2) |
713 | break; |
714 | |
715 | /* |
716 | * Now, downgrades the remaining checks from domain |
717 | * handled accesses to requested accesses. |
718 | */ |
719 | is_dom_check = false; |
720 | access_masked_parent1 = access_request_parent1; |
721 | access_masked_parent2 = access_request_parent2; |
722 | } |
723 | |
724 | rule = find_rule(domain, dentry: walker_path.dentry); |
725 | allowed_parent1 = landlock_unmask_layers( |
726 | rule, access_request: access_masked_parent1, layer_masks: layer_masks_parent1, |
727 | ARRAY_SIZE(*layer_masks_parent1)); |
728 | allowed_parent2 = landlock_unmask_layers( |
729 | rule, access_request: access_masked_parent2, layer_masks: layer_masks_parent2, |
730 | ARRAY_SIZE(*layer_masks_parent2)); |
731 | |
732 | /* Stops when a rule from each layer grants access. */ |
733 | if (allowed_parent1 && allowed_parent2) |
734 | break; |
735 | jump_up: |
736 | if (walker_path.dentry == walker_path.mnt->mnt_root) { |
737 | if (follow_up(&walker_path)) { |
738 | /* Ignores hidden mount points. */ |
739 | goto jump_up; |
740 | } else { |
741 | /* |
742 | * Stops at the real root. Denies access |
743 | * because not all layers have granted access. |
744 | */ |
745 | break; |
746 | } |
747 | } |
748 | if (unlikely(IS_ROOT(walker_path.dentry))) { |
749 | /* |
750 | * Stops at disconnected root directories. Only allows |
751 | * access to internal filesystems (e.g. nsfs, which is |
752 | * reachable through /proc/<pid>/ns/<namespace>). |
753 | */ |
754 | allowed_parent1 = allowed_parent2 = |
755 | !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); |
756 | break; |
757 | } |
758 | parent_dentry = dget_parent(dentry: walker_path.dentry); |
759 | dput(walker_path.dentry); |
760 | walker_path.dentry = parent_dentry; |
761 | } |
762 | path_put(&walker_path); |
763 | |
764 | return allowed_parent1 && allowed_parent2; |
765 | } |
766 | |
767 | static int check_access_path(const struct landlock_ruleset *const domain, |
768 | const struct path *const path, |
769 | access_mask_t access_request) |
770 | { |
771 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; |
772 | |
773 | access_request = landlock_init_layer_masks( |
774 | domain, access_request, layer_masks: &layer_masks, key_type: LANDLOCK_KEY_INODE); |
775 | if (is_access_to_paths_allowed(domain, path, access_request_parent1: access_request, |
776 | layer_masks_parent1: &layer_masks, NULL, access_request_parent2: 0, NULL, NULL)) |
777 | return 0; |
778 | return -EACCES; |
779 | } |
780 | |
781 | static int current_check_access_path(const struct path *const path, |
782 | const access_mask_t access_request) |
783 | { |
784 | const struct landlock_ruleset *const dom = get_current_fs_domain(); |
785 | |
786 | if (!dom) |
787 | return 0; |
788 | return check_access_path(domain: dom, path, access_request); |
789 | } |
790 | |
791 | static access_mask_t get_mode_access(const umode_t mode) |
792 | { |
793 | switch (mode & S_IFMT) { |
794 | case S_IFLNK: |
795 | return LANDLOCK_ACCESS_FS_MAKE_SYM; |
796 | case 0: |
797 | /* A zero mode translates to S_IFREG. */ |
798 | case S_IFREG: |
799 | return LANDLOCK_ACCESS_FS_MAKE_REG; |
800 | case S_IFDIR: |
801 | return LANDLOCK_ACCESS_FS_MAKE_DIR; |
802 | case S_IFCHR: |
803 | return LANDLOCK_ACCESS_FS_MAKE_CHAR; |
804 | case S_IFBLK: |
805 | return LANDLOCK_ACCESS_FS_MAKE_BLOCK; |
806 | case S_IFIFO: |
807 | return LANDLOCK_ACCESS_FS_MAKE_FIFO; |
808 | case S_IFSOCK: |
809 | return LANDLOCK_ACCESS_FS_MAKE_SOCK; |
810 | default: |
811 | WARN_ON_ONCE(1); |
812 | return 0; |
813 | } |
814 | } |
815 | |
816 | static access_mask_t maybe_remove(const struct dentry *const dentry) |
817 | { |
818 | if (d_is_negative(dentry)) |
819 | return 0; |
820 | return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : |
821 | LANDLOCK_ACCESS_FS_REMOVE_FILE; |
822 | } |
823 | |
824 | /** |
825 | * collect_domain_accesses - Walk through a file path and collect accesses |
826 | * |
827 | * @domain: Domain to check against. |
828 | * @mnt_root: Last directory to check. |
829 | * @dir: Directory to start the walk from. |
830 | * @layer_masks_dom: Where to store the collected accesses. |
831 | * |
832 | * This helper is useful to begin a path walk from the @dir directory to a |
833 | * @mnt_root directory used as a mount point. This mount point is the common |
834 | * ancestor between the source and the destination of a renamed and linked |
835 | * file. While walking from @dir to @mnt_root, we record all the domain's |
836 | * allowed accesses in @layer_masks_dom. |
837 | * |
838 | * This is similar to is_access_to_paths_allowed() but much simpler because it |
839 | * only handles walking on the same mount point and only checks one set of |
840 | * accesses. |
841 | * |
842 | * Returns: |
843 | * - true if all the domain access rights are allowed for @dir; |
844 | * - false if the walk reached @mnt_root. |
845 | */ |
846 | static bool collect_domain_accesses( |
847 | const struct landlock_ruleset *const domain, |
848 | const struct dentry *const mnt_root, struct dentry *dir, |
849 | layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) |
850 | { |
851 | unsigned long access_dom; |
852 | bool ret = false; |
853 | |
854 | if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) |
855 | return true; |
856 | if (is_nouser_or_private(dentry: dir)) |
857 | return true; |
858 | |
859 | access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, |
860 | layer_masks: layer_masks_dom, |
861 | key_type: LANDLOCK_KEY_INODE); |
862 | |
863 | dget(dentry: dir); |
864 | while (true) { |
865 | struct dentry *parent_dentry; |
866 | |
867 | /* Gets all layers allowing all domain accesses. */ |
868 | if (landlock_unmask_layers(rule: find_rule(domain, dentry: dir), access_request: access_dom, |
869 | layer_masks: layer_masks_dom, |
870 | ARRAY_SIZE(*layer_masks_dom))) { |
871 | /* |
872 | * Stops when all handled accesses are allowed by at |
873 | * least one rule in each layer. |
874 | */ |
875 | ret = true; |
876 | break; |
877 | } |
878 | |
879 | /* We should not reach a root other than @mnt_root. */ |
880 | if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir))) |
881 | break; |
882 | |
883 | parent_dentry = dget_parent(dentry: dir); |
884 | dput(dir); |
885 | dir = parent_dentry; |
886 | } |
887 | dput(dir); |
888 | return ret; |
889 | } |
890 | |
891 | /** |
892 | * current_check_refer_path - Check if a rename or link action is allowed |
893 | * |
894 | * @old_dentry: File or directory requested to be moved or linked. |
895 | * @new_dir: Destination parent directory. |
896 | * @new_dentry: Destination file or directory. |
897 | * @removable: Sets to true if it is a rename operation. |
898 | * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. |
899 | * |
900 | * Because of its unprivileged constraints, Landlock relies on file hierarchies |
901 | * (and not only inodes) to tie access rights to files. Being able to link or |
902 | * rename a file hierarchy brings some challenges. Indeed, moving or linking a |
903 | * file (i.e. creating a new reference to an inode) can have an impact on the |
904 | * actions allowed for a set of files if it would change its parent directory |
905 | * (i.e. reparenting). |
906 | * |
907 | * To avoid trivial access right bypasses, Landlock first checks if the file or |
908 | * directory requested to be moved would gain new access rights inherited from |
909 | * its new hierarchy. Before returning any error, Landlock then checks that |
910 | * the parent source hierarchy and the destination hierarchy would allow the |
911 | * link or rename action. If it is not the case, an error with EACCES is |
912 | * returned to inform user space that there is no way to remove or create the |
913 | * requested source file type. If it should be allowed but the new inherited |
914 | * access rights would be greater than the source access rights, then the |
915 | * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables |
916 | * user space to abort the whole operation if there is no way to do it, or to |
917 | * manually copy the source to the destination if this remains allowed, e.g. |
918 | * because file creation is allowed on the destination directory but not direct |
919 | * linking. |
920 | * |
921 | * To achieve this goal, the kernel needs to compare two file hierarchies: the |
922 | * one identifying the source file or directory (including itself), and the |
923 | * destination one. This can be seen as a multilayer partial ordering problem. |
924 | * The kernel walks through these paths and collects in a matrix the access |
925 | * rights that are denied per layer. These matrices are then compared to see |
926 | * if the destination one has more (or the same) restrictions as the source |
927 | * one. If this is the case, the requested action will not return EXDEV, which |
928 | * doesn't mean the action is allowed. The parent hierarchy of the source |
929 | * (i.e. parent directory), and the destination hierarchy must also be checked |
930 | * to verify that they explicitly allow such action (i.e. referencing, |
931 | * creation and potentially removal rights). The kernel implementation is then |
932 | * required to rely on potentially four matrices of access rights: one for the |
933 | * source file or directory (i.e. the child), a potentially other one for the |
934 | * other source/destination (in case of RENAME_EXCHANGE), one for the source |
935 | * parent hierarchy and a last one for the destination hierarchy. These |
936 | * ephemeral matrices take some space on the stack, which limits the number of |
937 | * layers to a deemed reasonable number: 16. |
938 | * |
939 | * Returns: |
940 | * - 0 if access is allowed; |
941 | * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; |
942 | * - -EACCES if file removal or creation is denied. |
943 | */ |
944 | static int current_check_refer_path(struct dentry *const old_dentry, |
945 | const struct path *const new_dir, |
946 | struct dentry *const new_dentry, |
947 | const bool removable, const bool exchange) |
948 | { |
949 | const struct landlock_ruleset *const dom = get_current_fs_domain(); |
950 | bool allow_parent1, allow_parent2; |
951 | access_mask_t access_request_parent1, access_request_parent2; |
952 | struct path mnt_dir; |
953 | layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, |
954 | layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; |
955 | |
956 | if (!dom) |
957 | return 0; |
958 | if (WARN_ON_ONCE(dom->num_layers < 1)) |
959 | return -EACCES; |
960 | if (unlikely(d_is_negative(old_dentry))) |
961 | return -ENOENT; |
962 | if (exchange) { |
963 | if (unlikely(d_is_negative(new_dentry))) |
964 | return -ENOENT; |
965 | access_request_parent1 = |
966 | get_mode_access(mode: d_backing_inode(upper: new_dentry)->i_mode); |
967 | } else { |
968 | access_request_parent1 = 0; |
969 | } |
970 | access_request_parent2 = |
971 | get_mode_access(mode: d_backing_inode(upper: old_dentry)->i_mode); |
972 | if (removable) { |
973 | access_request_parent1 |= maybe_remove(dentry: old_dentry); |
974 | access_request_parent2 |= maybe_remove(dentry: new_dentry); |
975 | } |
976 | |
977 | /* The mount points are the same for old and new paths, cf. EXDEV. */ |
978 | if (old_dentry->d_parent == new_dir->dentry) { |
979 | /* |
980 | * The LANDLOCK_ACCESS_FS_REFER access right is not required |
981 | * for same-directory referer (i.e. no reparenting). |
982 | */ |
983 | access_request_parent1 = landlock_init_layer_masks( |
984 | domain: dom, access_request: access_request_parent1 | access_request_parent2, |
985 | layer_masks: &layer_masks_parent1, key_type: LANDLOCK_KEY_INODE); |
986 | if (is_access_to_paths_allowed( |
987 | domain: dom, path: new_dir, access_request_parent1, |
988 | layer_masks_parent1: &layer_masks_parent1, NULL, access_request_parent2: 0, NULL, NULL)) |
989 | return 0; |
990 | return -EACCES; |
991 | } |
992 | |
993 | access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; |
994 | access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; |
995 | |
996 | /* Saves the common mount point. */ |
997 | mnt_dir.mnt = new_dir->mnt; |
998 | mnt_dir.dentry = new_dir->mnt->mnt_root; |
999 | |
1000 | /* new_dir->dentry is equal to new_dentry->d_parent */ |
1001 | allow_parent1 = collect_domain_accesses(domain: dom, mnt_root: mnt_dir.dentry, |
1002 | dir: old_dentry->d_parent, |
1003 | layer_masks_dom: &layer_masks_parent1); |
1004 | allow_parent2 = collect_domain_accesses( |
1005 | domain: dom, mnt_root: mnt_dir.dentry, dir: new_dir->dentry, layer_masks_dom: &layer_masks_parent2); |
1006 | |
1007 | if (allow_parent1 && allow_parent2) |
1008 | return 0; |
1009 | |
1010 | /* |
1011 | * To be able to compare source and destination domain access rights, |
1012 | * take into account the @old_dentry access rights aggregated with its |
1013 | * parent access rights. This will be useful to compare with the |
1014 | * destination parent access rights. |
1015 | */ |
1016 | if (is_access_to_paths_allowed( |
1017 | domain: dom, path: &mnt_dir, access_request_parent1, layer_masks_parent1: &layer_masks_parent1, |
1018 | dentry_child1: old_dentry, access_request_parent2, layer_masks_parent2: &layer_masks_parent2, |
1019 | dentry_child2: exchange ? new_dentry : NULL)) |
1020 | return 0; |
1021 | |
1022 | /* |
1023 | * This prioritizes EACCES over EXDEV for all actions, including |
1024 | * renames with RENAME_EXCHANGE. |
1025 | */ |
1026 | if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || |
1027 | is_eacces(&layer_masks_parent2, access_request_parent2))) |
1028 | return -EACCES; |
1029 | |
1030 | /* |
1031 | * Gracefully forbids reparenting if the destination directory |
1032 | * hierarchy is not a superset of restrictions of the source directory |
1033 | * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the |
1034 | * source or the destination. |
1035 | */ |
1036 | return -EXDEV; |
1037 | } |
1038 | |
1039 | /* Inode hooks */ |
1040 | |
1041 | static void hook_inode_free_security(struct inode *const inode) |
1042 | { |
1043 | /* |
1044 | * All inodes must already have been untied from their object by |
1045 | * release_inode() or hook_sb_delete(). |
1046 | */ |
1047 | WARN_ON_ONCE(landlock_inode(inode)->object); |
1048 | } |
1049 | |
1050 | /* Super-block hooks */ |
1051 | |
1052 | /* |
1053 | * Release the inodes used in a security policy. |
1054 | * |
1055 | * Cf. fsnotify_unmount_inodes() and invalidate_inodes() |
1056 | */ |
1057 | static void hook_sb_delete(struct super_block *const sb) |
1058 | { |
1059 | struct inode *inode, *prev_inode = NULL; |
1060 | |
1061 | if (!landlock_initialized) |
1062 | return; |
1063 | |
1064 | spin_lock(lock: &sb->s_inode_list_lock); |
1065 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1066 | struct landlock_object *object; |
1067 | |
1068 | /* Only handles referenced inodes. */ |
1069 | if (!atomic_read(v: &inode->i_count)) |
1070 | continue; |
1071 | |
1072 | /* |
1073 | * Protects against concurrent modification of inode (e.g. |
1074 | * from get_inode_object()). |
1075 | */ |
1076 | spin_lock(lock: &inode->i_lock); |
1077 | /* |
1078 | * Checks I_FREEING and I_WILL_FREE to protect against a race |
1079 | * condition when release_inode() just called iput(), which |
1080 | * could lead to a NULL dereference of inode->security or a |
1081 | * second call to iput() for the same Landlock object. Also |
1082 | * checks I_NEW because such inode cannot be tied to an object. |
1083 | */ |
1084 | if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) { |
1085 | spin_unlock(lock: &inode->i_lock); |
1086 | continue; |
1087 | } |
1088 | |
1089 | rcu_read_lock(); |
1090 | object = rcu_dereference(landlock_inode(inode)->object); |
1091 | if (!object) { |
1092 | rcu_read_unlock(); |
1093 | spin_unlock(lock: &inode->i_lock); |
1094 | continue; |
1095 | } |
1096 | /* Keeps a reference to this inode until the next loop walk. */ |
1097 | __iget(inode); |
1098 | spin_unlock(lock: &inode->i_lock); |
1099 | |
1100 | /* |
1101 | * If there is no concurrent release_inode() ongoing, then we |
1102 | * are in charge of calling iput() on this inode, otherwise we |
1103 | * will just wait for it to finish. |
1104 | */ |
1105 | spin_lock(lock: &object->lock); |
1106 | if (object->underobj == inode) { |
1107 | object->underobj = NULL; |
1108 | spin_unlock(lock: &object->lock); |
1109 | rcu_read_unlock(); |
1110 | |
1111 | /* |
1112 | * Because object->underobj was not NULL, |
1113 | * release_inode() and get_inode_object() guarantee |
1114 | * that it is safe to reset |
1115 | * landlock_inode(inode)->object while it is not NULL. |
1116 | * It is therefore not necessary to lock inode->i_lock. |
1117 | */ |
1118 | rcu_assign_pointer(landlock_inode(inode)->object, NULL); |
1119 | /* |
1120 | * At this point, we own the ihold() reference that was |
1121 | * originally set up by get_inode_object() and the |
1122 | * __iget() reference that we just set in this loop |
1123 | * walk. Therefore the following call to iput() will |
1124 | * not sleep nor drop the inode because there is now at |
1125 | * least two references to it. |
1126 | */ |
1127 | iput(inode); |
1128 | } else { |
1129 | spin_unlock(lock: &object->lock); |
1130 | rcu_read_unlock(); |
1131 | } |
1132 | |
1133 | if (prev_inode) { |
1134 | /* |
1135 | * At this point, we still own the __iget() reference |
1136 | * that we just set in this loop walk. Therefore we |
1137 | * can drop the list lock and know that the inode won't |
1138 | * disappear from under us until the next loop walk. |
1139 | */ |
1140 | spin_unlock(lock: &sb->s_inode_list_lock); |
1141 | /* |
1142 | * We can now actually put the inode reference from the |
1143 | * previous loop walk, which is not needed anymore. |
1144 | */ |
1145 | iput(prev_inode); |
1146 | cond_resched(); |
1147 | spin_lock(lock: &sb->s_inode_list_lock); |
1148 | } |
1149 | prev_inode = inode; |
1150 | } |
1151 | spin_unlock(lock: &sb->s_inode_list_lock); |
1152 | |
1153 | /* Puts the inode reference from the last loop walk, if any. */ |
1154 | if (prev_inode) |
1155 | iput(prev_inode); |
1156 | /* Waits for pending iput() in release_inode(). */ |
1157 | wait_var_event(&landlock_superblock(sb)->inode_refs, |
1158 | !atomic_long_read(&landlock_superblock(sb)->inode_refs)); |
1159 | } |
1160 | |
1161 | /* |
1162 | * Because a Landlock security policy is defined according to the filesystem |
1163 | * topology (i.e. the mount namespace), changing it may grant access to files |
1164 | * not previously allowed. |
1165 | * |
1166 | * To make it simple, deny any filesystem topology modification by landlocked |
1167 | * processes. Non-landlocked processes may still change the namespace of a |
1168 | * landlocked process, but this kind of threat must be handled by a system-wide |
1169 | * access-control security policy. |
1170 | * |
1171 | * This could be lifted in the future if Landlock can safely handle mount |
1172 | * namespace updates requested by a landlocked process. Indeed, we could |
1173 | * update the current domain (which is currently read-only) by taking into |
1174 | * account the accesses of the source and the destination of a new mount point. |
1175 | * However, it would also require to make all the child domains dynamically |
1176 | * inherit these new constraints. Anyway, for backward compatibility reasons, |
1177 | * a dedicated user space option would be required (e.g. as a ruleset flag). |
1178 | */ |
1179 | static int hook_sb_mount(const char *const dev_name, |
1180 | const struct path *const path, const char *const type, |
1181 | const unsigned long flags, void *const data) |
1182 | { |
1183 | if (!get_current_fs_domain()) |
1184 | return 0; |
1185 | return -EPERM; |
1186 | } |
1187 | |
1188 | static int hook_move_mount(const struct path *const from_path, |
1189 | const struct path *const to_path) |
1190 | { |
1191 | if (!get_current_fs_domain()) |
1192 | return 0; |
1193 | return -EPERM; |
1194 | } |
1195 | |
1196 | /* |
1197 | * Removing a mount point may reveal a previously hidden file hierarchy, which |
1198 | * may then grant access to files, which may have previously been forbidden. |
1199 | */ |
1200 | static int hook_sb_umount(struct vfsmount *const mnt, const int flags) |
1201 | { |
1202 | if (!get_current_fs_domain()) |
1203 | return 0; |
1204 | return -EPERM; |
1205 | } |
1206 | |
1207 | static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) |
1208 | { |
1209 | if (!get_current_fs_domain()) |
1210 | return 0; |
1211 | return -EPERM; |
1212 | } |
1213 | |
1214 | /* |
1215 | * pivot_root(2), like mount(2), changes the current mount namespace. It must |
1216 | * then be forbidden for a landlocked process. |
1217 | * |
1218 | * However, chroot(2) may be allowed because it only changes the relative root |
1219 | * directory of the current process. Moreover, it can be used to restrict the |
1220 | * view of the filesystem. |
1221 | */ |
1222 | static int hook_sb_pivotroot(const struct path *const old_path, |
1223 | const struct path *const new_path) |
1224 | { |
1225 | if (!get_current_fs_domain()) |
1226 | return 0; |
1227 | return -EPERM; |
1228 | } |
1229 | |
1230 | /* Path hooks */ |
1231 | |
1232 | static int hook_path_link(struct dentry *const old_dentry, |
1233 | const struct path *const new_dir, |
1234 | struct dentry *const new_dentry) |
1235 | { |
1236 | return current_check_refer_path(old_dentry, new_dir, new_dentry, removable: false, |
1237 | exchange: false); |
1238 | } |
1239 | |
1240 | static int hook_path_rename(const struct path *const old_dir, |
1241 | struct dentry *const old_dentry, |
1242 | const struct path *const new_dir, |
1243 | struct dentry *const new_dentry, |
1244 | const unsigned int flags) |
1245 | { |
1246 | /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ |
1247 | return current_check_refer_path(old_dentry, new_dir, new_dentry, removable: true, |
1248 | exchange: !!(flags & RENAME_EXCHANGE)); |
1249 | } |
1250 | |
1251 | static int hook_path_mkdir(const struct path *const dir, |
1252 | struct dentry *const dentry, const umode_t mode) |
1253 | { |
1254 | return current_check_access_path(path: dir, LANDLOCK_ACCESS_FS_MAKE_DIR); |
1255 | } |
1256 | |
1257 | static int hook_path_mknod(const struct path *const dir, |
1258 | struct dentry *const dentry, const umode_t mode, |
1259 | const unsigned int dev) |
1260 | { |
1261 | const struct landlock_ruleset *const dom = get_current_fs_domain(); |
1262 | |
1263 | if (!dom) |
1264 | return 0; |
1265 | return check_access_path(domain: dom, path: dir, access_request: get_mode_access(mode)); |
1266 | } |
1267 | |
1268 | static int hook_path_symlink(const struct path *const dir, |
1269 | struct dentry *const dentry, |
1270 | const char *const old_name) |
1271 | { |
1272 | return current_check_access_path(path: dir, LANDLOCK_ACCESS_FS_MAKE_SYM); |
1273 | } |
1274 | |
1275 | static int hook_path_unlink(const struct path *const dir, |
1276 | struct dentry *const dentry) |
1277 | { |
1278 | return current_check_access_path(path: dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); |
1279 | } |
1280 | |
1281 | static int hook_path_rmdir(const struct path *const dir, |
1282 | struct dentry *const dentry) |
1283 | { |
1284 | return current_check_access_path(path: dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); |
1285 | } |
1286 | |
1287 | static int hook_path_truncate(const struct path *const path) |
1288 | { |
1289 | return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); |
1290 | } |
1291 | |
1292 | /* File hooks */ |
1293 | |
1294 | /** |
1295 | * get_required_file_open_access - Get access needed to open a file |
1296 | * |
1297 | * @file: File being opened. |
1298 | * |
1299 | * Returns the access rights that are required for opening the given file, |
1300 | * depending on the file type and open mode. |
1301 | */ |
1302 | static access_mask_t |
1303 | get_required_file_open_access(const struct file *const file) |
1304 | { |
1305 | access_mask_t access = 0; |
1306 | |
1307 | if (file->f_mode & FMODE_READ) { |
1308 | /* A directory can only be opened in read mode. */ |
1309 | if (S_ISDIR(file_inode(file)->i_mode)) |
1310 | return LANDLOCK_ACCESS_FS_READ_DIR; |
1311 | access = LANDLOCK_ACCESS_FS_READ_FILE; |
1312 | } |
1313 | if (file->f_mode & FMODE_WRITE) |
1314 | access |= LANDLOCK_ACCESS_FS_WRITE_FILE; |
1315 | /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ |
1316 | if (file->f_flags & __FMODE_EXEC) |
1317 | access |= LANDLOCK_ACCESS_FS_EXECUTE; |
1318 | return access; |
1319 | } |
1320 | |
1321 | static int hook_file_alloc_security(struct file *const file) |
1322 | { |
1323 | /* |
1324 | * Grants all access rights, even if most of them are not checked later |
1325 | * on. It is more consistent. |
1326 | * |
1327 | * Notably, file descriptors for regular files can also be acquired |
1328 | * without going through the file_open hook, for example when using |
1329 | * memfd_create(2). |
1330 | */ |
1331 | landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; |
1332 | return 0; |
1333 | } |
1334 | |
1335 | static int hook_file_open(struct file *const file) |
1336 | { |
1337 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; |
1338 | access_mask_t open_access_request, full_access_request, allowed_access; |
1339 | const access_mask_t optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; |
1340 | const struct landlock_ruleset *const dom = |
1341 | get_fs_domain(domain: landlock_cred(cred: file->f_cred)->domain); |
1342 | |
1343 | if (!dom) |
1344 | return 0; |
1345 | |
1346 | /* |
1347 | * Because a file may be opened with O_PATH, get_required_file_open_access() |
1348 | * may return 0. This case will be handled with a future Landlock |
1349 | * evolution. |
1350 | */ |
1351 | open_access_request = get_required_file_open_access(file); |
1352 | |
1353 | /* |
1354 | * We look up more access than what we immediately need for open(), so |
1355 | * that we can later authorize operations on opened files. |
1356 | */ |
1357 | full_access_request = open_access_request | optional_access; |
1358 | |
1359 | if (is_access_to_paths_allowed( |
1360 | domain: dom, path: &file->f_path, |
1361 | access_request_parent1: landlock_init_layer_masks(domain: dom, access_request: full_access_request, |
1362 | layer_masks: &layer_masks, key_type: LANDLOCK_KEY_INODE), |
1363 | layer_masks_parent1: &layer_masks, NULL, access_request_parent2: 0, NULL, NULL)) { |
1364 | allowed_access = full_access_request; |
1365 | } else { |
1366 | unsigned long access_bit; |
1367 | const unsigned long access_req = full_access_request; |
1368 | |
1369 | /* |
1370 | * Calculate the actual allowed access rights from layer_masks. |
1371 | * Add each access right to allowed_access which has not been |
1372 | * vetoed by any layer. |
1373 | */ |
1374 | allowed_access = 0; |
1375 | for_each_set_bit(access_bit, &access_req, |
1376 | ARRAY_SIZE(layer_masks)) { |
1377 | if (!layer_masks[access_bit]) |
1378 | allowed_access |= BIT_ULL(access_bit); |
1379 | } |
1380 | } |
1381 | |
1382 | /* |
1383 | * For operations on already opened files (i.e. ftruncate()), it is the |
1384 | * access rights at the time of open() which decide whether the |
1385 | * operation is permitted. Therefore, we record the relevant subset of |
1386 | * file access rights in the opened struct file. |
1387 | */ |
1388 | landlock_file(file)->allowed_access = allowed_access; |
1389 | |
1390 | if ((open_access_request & allowed_access) == open_access_request) |
1391 | return 0; |
1392 | |
1393 | return -EACCES; |
1394 | } |
1395 | |
1396 | static int hook_file_truncate(struct file *const file) |
1397 | { |
1398 | /* |
1399 | * Allows truncation if the truncate right was available at the time of |
1400 | * opening the file, to get a consistent access check as for read, write |
1401 | * and execute operations. |
1402 | * |
1403 | * Note: For checks done based on the file's Landlock allowed access, we |
1404 | * enforce them independently of whether the current thread is in a |
1405 | * Landlock domain, so that open files passed between independent |
1406 | * processes retain their behaviour. |
1407 | */ |
1408 | if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) |
1409 | return 0; |
1410 | return -EACCES; |
1411 | } |
1412 | |
1413 | static struct security_hook_list landlock_hooks[] __ro_after_init = { |
1414 | LSM_HOOK_INIT(inode_free_security, hook_inode_free_security), |
1415 | |
1416 | LSM_HOOK_INIT(sb_delete, hook_sb_delete), |
1417 | LSM_HOOK_INIT(sb_mount, hook_sb_mount), |
1418 | LSM_HOOK_INIT(move_mount, hook_move_mount), |
1419 | LSM_HOOK_INIT(sb_umount, hook_sb_umount), |
1420 | LSM_HOOK_INIT(sb_remount, hook_sb_remount), |
1421 | LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), |
1422 | |
1423 | LSM_HOOK_INIT(path_link, hook_path_link), |
1424 | LSM_HOOK_INIT(path_rename, hook_path_rename), |
1425 | LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), |
1426 | LSM_HOOK_INIT(path_mknod, hook_path_mknod), |
1427 | LSM_HOOK_INIT(path_symlink, hook_path_symlink), |
1428 | LSM_HOOK_INIT(path_unlink, hook_path_unlink), |
1429 | LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), |
1430 | LSM_HOOK_INIT(path_truncate, hook_path_truncate), |
1431 | |
1432 | LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), |
1433 | LSM_HOOK_INIT(file_open, hook_file_open), |
1434 | LSM_HOOK_INIT(file_truncate, hook_file_truncate), |
1435 | }; |
1436 | |
1437 | __init void landlock_add_fs_hooks(void) |
1438 | { |
1439 | security_add_hooks(hooks: landlock_hooks, ARRAY_SIZE(landlock_hooks), |
1440 | lsmid: &landlock_lsmid); |
1441 | } |
1442 | |
1443 | #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST |
1444 | |
1445 | /* clang-format off */ |
1446 | static struct kunit_case test_cases[] = { |
1447 | KUNIT_CASE(test_no_more_access), |
1448 | KUNIT_CASE(test_scope_to_request_with_exec_none), |
1449 | KUNIT_CASE(test_scope_to_request_with_exec_some), |
1450 | KUNIT_CASE(test_scope_to_request_without_access), |
1451 | KUNIT_CASE(test_is_eacces_with_none), |
1452 | KUNIT_CASE(test_is_eacces_with_refer), |
1453 | KUNIT_CASE(test_is_eacces_with_write), |
1454 | {} |
1455 | }; |
1456 | /* clang-format on */ |
1457 | |
1458 | static struct kunit_suite test_suite = { |
1459 | .name = "landlock_fs" , |
1460 | .test_cases = test_cases, |
1461 | }; |
1462 | |
1463 | kunit_test_suite(test_suite); |
1464 | |
1465 | #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ |
1466 | |