1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
3 | |
4 | #include <linux/workqueue.h> |
5 | #include <linux/rtnetlink.h> |
6 | #include <linux/cache.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/list.h> |
9 | #include <linux/delay.h> |
10 | #include <linux/sched.h> |
11 | #include <linux/idr.h> |
12 | #include <linux/rculist.h> |
13 | #include <linux/nsproxy.h> |
14 | #include <linux/fs.h> |
15 | #include <linux/proc_ns.h> |
16 | #include <linux/file.h> |
17 | #include <linux/export.h> |
18 | #include <linux/user_namespace.h> |
19 | #include <linux/net_namespace.h> |
20 | #include <linux/sched/task.h> |
21 | #include <linux/uidgid.h> |
22 | #include <linux/cookie.h> |
23 | #include <linux/proc_fs.h> |
24 | |
25 | #include <net/sock.h> |
26 | #include <net/netlink.h> |
27 | #include <net/net_namespace.h> |
28 | #include <net/netns/generic.h> |
29 | |
30 | /* |
31 | * Our network namespace constructor/destructor lists |
32 | */ |
33 | |
34 | static LIST_HEAD(pernet_list); |
35 | static struct list_head *first_device = &pernet_list; |
36 | |
37 | LIST_HEAD(net_namespace_list); |
38 | EXPORT_SYMBOL_GPL(net_namespace_list); |
39 | |
40 | /* Protects net_namespace_list. Nests iside rtnl_lock() */ |
41 | DECLARE_RWSEM(net_rwsem); |
42 | EXPORT_SYMBOL_GPL(net_rwsem); |
43 | |
44 | #ifdef CONFIG_KEYS |
45 | static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; |
46 | #endif |
47 | |
48 | struct net init_net; |
49 | EXPORT_SYMBOL(init_net); |
50 | |
51 | static bool init_net_initialized; |
52 | /* |
53 | * pernet_ops_rwsem: protects: pernet_list, net_generic_ids, |
54 | * init_net_initialized and first_device pointer. |
55 | * This is internal net namespace object. Please, don't use it |
56 | * outside. |
57 | */ |
58 | DECLARE_RWSEM(pernet_ops_rwsem); |
59 | EXPORT_SYMBOL_GPL(pernet_ops_rwsem); |
60 | |
61 | #define MIN_PERNET_OPS_ID \ |
62 | ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) |
63 | |
64 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
65 | |
66 | static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; |
67 | |
68 | DEFINE_COOKIE(net_cookie); |
69 | |
70 | static struct net_generic *net_alloc_generic(void) |
71 | { |
72 | struct net_generic *ng; |
73 | unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); |
74 | |
75 | ng = kzalloc(size: generic_size, GFP_KERNEL); |
76 | if (ng) |
77 | ng->s.len = max_gen_ptrs; |
78 | |
79 | return ng; |
80 | } |
81 | |
82 | static int net_assign_generic(struct net *net, unsigned int id, void *data) |
83 | { |
84 | struct net_generic *ng, *old_ng; |
85 | |
86 | BUG_ON(id < MIN_PERNET_OPS_ID); |
87 | |
88 | old_ng = rcu_dereference_protected(net->gen, |
89 | lockdep_is_held(&pernet_ops_rwsem)); |
90 | if (old_ng->s.len > id) { |
91 | old_ng->ptr[id] = data; |
92 | return 0; |
93 | } |
94 | |
95 | ng = net_alloc_generic(); |
96 | if (!ng) |
97 | return -ENOMEM; |
98 | |
99 | /* |
100 | * Some synchronisation notes: |
101 | * |
102 | * The net_generic explores the net->gen array inside rcu |
103 | * read section. Besides once set the net->gen->ptr[x] |
104 | * pointer never changes (see rules in netns/generic.h). |
105 | * |
106 | * That said, we simply duplicate this array and schedule |
107 | * the old copy for kfree after a grace period. |
108 | */ |
109 | |
110 | memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], |
111 | (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); |
112 | ng->ptr[id] = data; |
113 | |
114 | rcu_assign_pointer(net->gen, ng); |
115 | kfree_rcu(old_ng, s.rcu); |
116 | return 0; |
117 | } |
118 | |
119 | static int ops_init(const struct pernet_operations *ops, struct net *net) |
120 | { |
121 | struct net_generic *ng; |
122 | int err = -ENOMEM; |
123 | void *data = NULL; |
124 | |
125 | if (ops->id && ops->size) { |
126 | data = kzalloc(size: ops->size, GFP_KERNEL); |
127 | if (!data) |
128 | goto out; |
129 | |
130 | err = net_assign_generic(net, id: *ops->id, data); |
131 | if (err) |
132 | goto cleanup; |
133 | } |
134 | err = 0; |
135 | if (ops->init) |
136 | err = ops->init(net); |
137 | if (!err) |
138 | return 0; |
139 | |
140 | if (ops->id && ops->size) { |
141 | ng = rcu_dereference_protected(net->gen, |
142 | lockdep_is_held(&pernet_ops_rwsem)); |
143 | ng->ptr[*ops->id] = NULL; |
144 | } |
145 | |
146 | cleanup: |
147 | kfree(objp: data); |
148 | |
149 | out: |
150 | return err; |
151 | } |
152 | |
153 | static void ops_pre_exit_list(const struct pernet_operations *ops, |
154 | struct list_head *net_exit_list) |
155 | { |
156 | struct net *net; |
157 | |
158 | if (ops->pre_exit) { |
159 | list_for_each_entry(net, net_exit_list, exit_list) |
160 | ops->pre_exit(net); |
161 | } |
162 | } |
163 | |
164 | static void ops_exit_list(const struct pernet_operations *ops, |
165 | struct list_head *net_exit_list) |
166 | { |
167 | struct net *net; |
168 | if (ops->exit) { |
169 | list_for_each_entry(net, net_exit_list, exit_list) { |
170 | ops->exit(net); |
171 | cond_resched(); |
172 | } |
173 | } |
174 | if (ops->exit_batch) |
175 | ops->exit_batch(net_exit_list); |
176 | } |
177 | |
178 | static void ops_free_list(const struct pernet_operations *ops, |
179 | struct list_head *net_exit_list) |
180 | { |
181 | struct net *net; |
182 | if (ops->size && ops->id) { |
183 | list_for_each_entry(net, net_exit_list, exit_list) |
184 | kfree(objp: net_generic(net, id: *ops->id)); |
185 | } |
186 | } |
187 | |
188 | /* should be called with nsid_lock held */ |
189 | static int alloc_netid(struct net *net, struct net *peer, int reqid) |
190 | { |
191 | int min = 0, max = 0; |
192 | |
193 | if (reqid >= 0) { |
194 | min = reqid; |
195 | max = reqid + 1; |
196 | } |
197 | |
198 | return idr_alloc(&net->netns_ids, ptr: peer, start: min, end: max, GFP_ATOMIC); |
199 | } |
200 | |
201 | /* This function is used by idr_for_each(). If net is equal to peer, the |
202 | * function returns the id so that idr_for_each() stops. Because we cannot |
203 | * returns the id 0 (idr_for_each() will not stop), we return the magic value |
204 | * NET_ID_ZERO (-1) for it. |
205 | */ |
206 | #define NET_ID_ZERO -1 |
207 | static int net_eq_idr(int id, void *net, void *peer) |
208 | { |
209 | if (net_eq(net1: net, net2: peer)) |
210 | return id ? : NET_ID_ZERO; |
211 | return 0; |
212 | } |
213 | |
214 | /* Must be called from RCU-critical section or with nsid_lock held */ |
215 | static int __peernet2id(const struct net *net, struct net *peer) |
216 | { |
217 | int id = idr_for_each(&net->netns_ids, fn: net_eq_idr, data: peer); |
218 | |
219 | /* Magic value for id 0. */ |
220 | if (id == NET_ID_ZERO) |
221 | return 0; |
222 | if (id > 0) |
223 | return id; |
224 | |
225 | return NETNSA_NSID_NOT_ASSIGNED; |
226 | } |
227 | |
228 | static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, |
229 | struct nlmsghdr *nlh, gfp_t gfp); |
230 | /* This function returns the id of a peer netns. If no id is assigned, one will |
231 | * be allocated and returned. |
232 | */ |
233 | int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) |
234 | { |
235 | int id; |
236 | |
237 | if (refcount_read(r: &net->ns.count) == 0) |
238 | return NETNSA_NSID_NOT_ASSIGNED; |
239 | |
240 | spin_lock_bh(lock: &net->nsid_lock); |
241 | id = __peernet2id(net, peer); |
242 | if (id >= 0) { |
243 | spin_unlock_bh(lock: &net->nsid_lock); |
244 | return id; |
245 | } |
246 | |
247 | /* When peer is obtained from RCU lists, we may race with |
248 | * its cleanup. Check whether it's alive, and this guarantees |
249 | * we never hash a peer back to net->netns_ids, after it has |
250 | * just been idr_remove()'d from there in cleanup_net(). |
251 | */ |
252 | if (!maybe_get_net(net: peer)) { |
253 | spin_unlock_bh(lock: &net->nsid_lock); |
254 | return NETNSA_NSID_NOT_ASSIGNED; |
255 | } |
256 | |
257 | id = alloc_netid(net, peer, reqid: -1); |
258 | spin_unlock_bh(lock: &net->nsid_lock); |
259 | |
260 | put_net(net: peer); |
261 | if (id < 0) |
262 | return NETNSA_NSID_NOT_ASSIGNED; |
263 | |
264 | rtnl_net_notifyid(net, RTM_NEWNSID, id, portid: 0, NULL, gfp); |
265 | |
266 | return id; |
267 | } |
268 | EXPORT_SYMBOL_GPL(peernet2id_alloc); |
269 | |
270 | /* This function returns, if assigned, the id of a peer netns. */ |
271 | int peernet2id(const struct net *net, struct net *peer) |
272 | { |
273 | int id; |
274 | |
275 | rcu_read_lock(); |
276 | id = __peernet2id(net, peer); |
277 | rcu_read_unlock(); |
278 | |
279 | return id; |
280 | } |
281 | EXPORT_SYMBOL(peernet2id); |
282 | |
283 | /* This function returns true is the peer netns has an id assigned into the |
284 | * current netns. |
285 | */ |
286 | bool peernet_has_id(const struct net *net, struct net *peer) |
287 | { |
288 | return peernet2id(net, peer) >= 0; |
289 | } |
290 | |
291 | struct net *get_net_ns_by_id(const struct net *net, int id) |
292 | { |
293 | struct net *peer; |
294 | |
295 | if (id < 0) |
296 | return NULL; |
297 | |
298 | rcu_read_lock(); |
299 | peer = idr_find(&net->netns_ids, id); |
300 | if (peer) |
301 | peer = maybe_get_net(net: peer); |
302 | rcu_read_unlock(); |
303 | |
304 | return peer; |
305 | } |
306 | EXPORT_SYMBOL_GPL(get_net_ns_by_id); |
307 | |
308 | /* init code that must occur even if setup_net() is not called. */ |
309 | static __net_init void preinit_net(struct net *net) |
310 | { |
311 | ref_tracker_dir_init(dir: &net->notrefcnt_tracker, quarantine_count: 128, name: "net notrefcnt" ); |
312 | } |
313 | |
314 | /* |
315 | * setup_net runs the initializers for the network namespace object. |
316 | */ |
317 | static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) |
318 | { |
319 | /* Must be called with pernet_ops_rwsem held */ |
320 | const struct pernet_operations *ops, *saved_ops; |
321 | int error = 0; |
322 | LIST_HEAD(net_exit_list); |
323 | |
324 | refcount_set(r: &net->ns.count, n: 1); |
325 | ref_tracker_dir_init(dir: &net->refcnt_tracker, quarantine_count: 128, name: "net refcnt" ); |
326 | |
327 | refcount_set(r: &net->passive, n: 1); |
328 | get_random_bytes(buf: &net->hash_mix, len: sizeof(u32)); |
329 | preempt_disable(); |
330 | net->net_cookie = gen_cookie_next(gc: &net_cookie); |
331 | preempt_enable(); |
332 | net->dev_base_seq = 1; |
333 | net->user_ns = user_ns; |
334 | idr_init(idr: &net->netns_ids); |
335 | spin_lock_init(&net->nsid_lock); |
336 | mutex_init(&net->ipv4.ra_mutex); |
337 | |
338 | list_for_each_entry(ops, &pernet_list, list) { |
339 | error = ops_init(ops, net); |
340 | if (error < 0) |
341 | goto out_undo; |
342 | } |
343 | down_write(sem: &net_rwsem); |
344 | list_add_tail_rcu(new: &net->list, head: &net_namespace_list); |
345 | up_write(sem: &net_rwsem); |
346 | out: |
347 | return error; |
348 | |
349 | out_undo: |
350 | /* Walk through the list backwards calling the exit functions |
351 | * for the pernet modules whose init functions did not fail. |
352 | */ |
353 | list_add(new: &net->exit_list, head: &net_exit_list); |
354 | saved_ops = ops; |
355 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
356 | ops_pre_exit_list(ops, net_exit_list: &net_exit_list); |
357 | |
358 | synchronize_rcu(); |
359 | |
360 | ops = saved_ops; |
361 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
362 | ops_exit_list(ops, net_exit_list: &net_exit_list); |
363 | |
364 | ops = saved_ops; |
365 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
366 | ops_free_list(ops, net_exit_list: &net_exit_list); |
367 | |
368 | rcu_barrier(); |
369 | goto out; |
370 | } |
371 | |
372 | static int __net_init net_defaults_init_net(struct net *net) |
373 | { |
374 | net->core.sysctl_somaxconn = SOMAXCONN; |
375 | net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; |
376 | |
377 | return 0; |
378 | } |
379 | |
380 | static struct pernet_operations net_defaults_ops = { |
381 | .init = net_defaults_init_net, |
382 | }; |
383 | |
384 | static __init int net_defaults_init(void) |
385 | { |
386 | if (register_pernet_subsys(&net_defaults_ops)) |
387 | panic(fmt: "Cannot initialize net default settings" ); |
388 | |
389 | return 0; |
390 | } |
391 | |
392 | core_initcall(net_defaults_init); |
393 | |
394 | #ifdef CONFIG_NET_NS |
395 | static struct ucounts *inc_net_namespaces(struct user_namespace *ns) |
396 | { |
397 | return inc_ucount(ns, current_euid(), type: UCOUNT_NET_NAMESPACES); |
398 | } |
399 | |
400 | static void dec_net_namespaces(struct ucounts *ucounts) |
401 | { |
402 | dec_ucount(ucounts, type: UCOUNT_NET_NAMESPACES); |
403 | } |
404 | |
405 | static struct kmem_cache *net_cachep __ro_after_init; |
406 | static struct workqueue_struct *netns_wq; |
407 | |
408 | static struct net *net_alloc(void) |
409 | { |
410 | struct net *net = NULL; |
411 | struct net_generic *ng; |
412 | |
413 | ng = net_alloc_generic(); |
414 | if (!ng) |
415 | goto out; |
416 | |
417 | net = kmem_cache_zalloc(k: net_cachep, GFP_KERNEL); |
418 | if (!net) |
419 | goto out_free; |
420 | |
421 | #ifdef CONFIG_KEYS |
422 | net->key_domain = kzalloc(size: sizeof(struct key_tag), GFP_KERNEL); |
423 | if (!net->key_domain) |
424 | goto out_free_2; |
425 | refcount_set(r: &net->key_domain->usage, n: 1); |
426 | #endif |
427 | |
428 | rcu_assign_pointer(net->gen, ng); |
429 | out: |
430 | return net; |
431 | |
432 | #ifdef CONFIG_KEYS |
433 | out_free_2: |
434 | kmem_cache_free(s: net_cachep, objp: net); |
435 | net = NULL; |
436 | #endif |
437 | out_free: |
438 | kfree(objp: ng); |
439 | goto out; |
440 | } |
441 | |
442 | static void net_free(struct net *net) |
443 | { |
444 | if (refcount_dec_and_test(r: &net->passive)) { |
445 | kfree(rcu_access_pointer(net->gen)); |
446 | |
447 | /* There should not be any trackers left there. */ |
448 | ref_tracker_dir_exit(dir: &net->notrefcnt_tracker); |
449 | |
450 | kmem_cache_free(s: net_cachep, objp: net); |
451 | } |
452 | } |
453 | |
454 | void net_drop_ns(void *p) |
455 | { |
456 | struct net *net = (struct net *)p; |
457 | |
458 | if (net) |
459 | net_free(net); |
460 | } |
461 | |
462 | struct net *copy_net_ns(unsigned long flags, |
463 | struct user_namespace *user_ns, struct net *old_net) |
464 | { |
465 | struct ucounts *ucounts; |
466 | struct net *net; |
467 | int rv; |
468 | |
469 | if (!(flags & CLONE_NEWNET)) |
470 | return get_net(net: old_net); |
471 | |
472 | ucounts = inc_net_namespaces(ns: user_ns); |
473 | if (!ucounts) |
474 | return ERR_PTR(error: -ENOSPC); |
475 | |
476 | net = net_alloc(); |
477 | if (!net) { |
478 | rv = -ENOMEM; |
479 | goto dec_ucounts; |
480 | } |
481 | |
482 | preinit_net(net); |
483 | refcount_set(r: &net->passive, n: 1); |
484 | net->ucounts = ucounts; |
485 | get_user_ns(ns: user_ns); |
486 | |
487 | rv = down_read_killable(sem: &pernet_ops_rwsem); |
488 | if (rv < 0) |
489 | goto put_userns; |
490 | |
491 | rv = setup_net(net, user_ns); |
492 | |
493 | up_read(sem: &pernet_ops_rwsem); |
494 | |
495 | if (rv < 0) { |
496 | put_userns: |
497 | #ifdef CONFIG_KEYS |
498 | key_remove_domain(domain_tag: net->key_domain); |
499 | #endif |
500 | put_user_ns(ns: user_ns); |
501 | net_free(net); |
502 | dec_ucounts: |
503 | dec_net_namespaces(ucounts); |
504 | return ERR_PTR(error: rv); |
505 | } |
506 | return net; |
507 | } |
508 | |
509 | /** |
510 | * net_ns_get_ownership - get sysfs ownership data for @net |
511 | * @net: network namespace in question (can be NULL) |
512 | * @uid: kernel user ID for sysfs objects |
513 | * @gid: kernel group ID for sysfs objects |
514 | * |
515 | * Returns the uid/gid pair of root in the user namespace associated with the |
516 | * given network namespace. |
517 | */ |
518 | void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid) |
519 | { |
520 | if (net) { |
521 | kuid_t ns_root_uid = make_kuid(from: net->user_ns, uid: 0); |
522 | kgid_t ns_root_gid = make_kgid(from: net->user_ns, gid: 0); |
523 | |
524 | if (uid_valid(uid: ns_root_uid)) |
525 | *uid = ns_root_uid; |
526 | |
527 | if (gid_valid(gid: ns_root_gid)) |
528 | *gid = ns_root_gid; |
529 | } else { |
530 | *uid = GLOBAL_ROOT_UID; |
531 | *gid = GLOBAL_ROOT_GID; |
532 | } |
533 | } |
534 | EXPORT_SYMBOL_GPL(net_ns_get_ownership); |
535 | |
536 | static void unhash_nsid(struct net *net, struct net *last) |
537 | { |
538 | struct net *tmp; |
539 | /* This function is only called from cleanup_net() work, |
540 | * and this work is the only process, that may delete |
541 | * a net from net_namespace_list. So, when the below |
542 | * is executing, the list may only grow. Thus, we do not |
543 | * use for_each_net_rcu() or net_rwsem. |
544 | */ |
545 | for_each_net(tmp) { |
546 | int id; |
547 | |
548 | spin_lock_bh(lock: &tmp->nsid_lock); |
549 | id = __peernet2id(net: tmp, peer: net); |
550 | if (id >= 0) |
551 | idr_remove(&tmp->netns_ids, id); |
552 | spin_unlock_bh(lock: &tmp->nsid_lock); |
553 | if (id >= 0) |
554 | rtnl_net_notifyid(net: tmp, RTM_DELNSID, id, portid: 0, NULL, |
555 | GFP_KERNEL); |
556 | if (tmp == last) |
557 | break; |
558 | } |
559 | spin_lock_bh(lock: &net->nsid_lock); |
560 | idr_destroy(&net->netns_ids); |
561 | spin_unlock_bh(lock: &net->nsid_lock); |
562 | } |
563 | |
564 | static LLIST_HEAD(cleanup_list); |
565 | |
566 | static void cleanup_net(struct work_struct *work) |
567 | { |
568 | const struct pernet_operations *ops; |
569 | struct net *net, *tmp, *last; |
570 | struct llist_node *net_kill_list; |
571 | LIST_HEAD(net_exit_list); |
572 | |
573 | /* Atomically snapshot the list of namespaces to cleanup */ |
574 | net_kill_list = llist_del_all(head: &cleanup_list); |
575 | |
576 | down_read(sem: &pernet_ops_rwsem); |
577 | |
578 | /* Don't let anyone else find us. */ |
579 | down_write(sem: &net_rwsem); |
580 | llist_for_each_entry(net, net_kill_list, cleanup_list) |
581 | list_del_rcu(entry: &net->list); |
582 | /* Cache last net. After we unlock rtnl, no one new net |
583 | * added to net_namespace_list can assign nsid pointer |
584 | * to a net from net_kill_list (see peernet2id_alloc()). |
585 | * So, we skip them in unhash_nsid(). |
586 | * |
587 | * Note, that unhash_nsid() does not delete nsid links |
588 | * between net_kill_list's nets, as they've already |
589 | * deleted from net_namespace_list. But, this would be |
590 | * useless anyway, as netns_ids are destroyed there. |
591 | */ |
592 | last = list_last_entry(&net_namespace_list, struct net, list); |
593 | up_write(sem: &net_rwsem); |
594 | |
595 | llist_for_each_entry(net, net_kill_list, cleanup_list) { |
596 | unhash_nsid(net, last); |
597 | list_add_tail(new: &net->exit_list, head: &net_exit_list); |
598 | } |
599 | |
600 | /* Run all of the network namespace pre_exit methods */ |
601 | list_for_each_entry_reverse(ops, &pernet_list, list) |
602 | ops_pre_exit_list(ops, net_exit_list: &net_exit_list); |
603 | |
604 | /* |
605 | * Another CPU might be rcu-iterating the list, wait for it. |
606 | * This needs to be before calling the exit() notifiers, so |
607 | * the rcu_barrier() below isn't sufficient alone. |
608 | * Also the pre_exit() and exit() methods need this barrier. |
609 | */ |
610 | synchronize_rcu(); |
611 | |
612 | /* Run all of the network namespace exit methods */ |
613 | list_for_each_entry_reverse(ops, &pernet_list, list) |
614 | ops_exit_list(ops, net_exit_list: &net_exit_list); |
615 | |
616 | /* Free the net generic variables */ |
617 | list_for_each_entry_reverse(ops, &pernet_list, list) |
618 | ops_free_list(ops, net_exit_list: &net_exit_list); |
619 | |
620 | up_read(sem: &pernet_ops_rwsem); |
621 | |
622 | /* Ensure there are no outstanding rcu callbacks using this |
623 | * network namespace. |
624 | */ |
625 | rcu_barrier(); |
626 | |
627 | /* Finally it is safe to free my network namespace structure */ |
628 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
629 | list_del_init(entry: &net->exit_list); |
630 | dec_net_namespaces(ucounts: net->ucounts); |
631 | #ifdef CONFIG_KEYS |
632 | key_remove_domain(domain_tag: net->key_domain); |
633 | #endif |
634 | put_user_ns(ns: net->user_ns); |
635 | net_free(net); |
636 | } |
637 | } |
638 | |
639 | /** |
640 | * net_ns_barrier - wait until concurrent net_cleanup_work is done |
641 | * |
642 | * cleanup_net runs from work queue and will first remove namespaces |
643 | * from the global list, then run net exit functions. |
644 | * |
645 | * Call this in module exit path to make sure that all netns |
646 | * ->exit ops have been invoked before the function is removed. |
647 | */ |
648 | void net_ns_barrier(void) |
649 | { |
650 | down_write(sem: &pernet_ops_rwsem); |
651 | up_write(sem: &pernet_ops_rwsem); |
652 | } |
653 | EXPORT_SYMBOL(net_ns_barrier); |
654 | |
655 | static DECLARE_WORK(net_cleanup_work, cleanup_net); |
656 | |
657 | void __put_net(struct net *net) |
658 | { |
659 | ref_tracker_dir_exit(dir: &net->refcnt_tracker); |
660 | /* Cleanup the network namespace in process context */ |
661 | if (llist_add(new: &net->cleanup_list, head: &cleanup_list)) |
662 | queue_work(wq: netns_wq, work: &net_cleanup_work); |
663 | } |
664 | EXPORT_SYMBOL_GPL(__put_net); |
665 | |
666 | /** |
667 | * get_net_ns - increment the refcount of the network namespace |
668 | * @ns: common namespace (net) |
669 | * |
670 | * Returns the net's common namespace. |
671 | */ |
672 | struct ns_common *get_net_ns(struct ns_common *ns) |
673 | { |
674 | return &get_net(container_of(ns, struct net, ns))->ns; |
675 | } |
676 | EXPORT_SYMBOL_GPL(get_net_ns); |
677 | |
678 | struct net *get_net_ns_by_fd(int fd) |
679 | { |
680 | struct fd f = fdget(fd); |
681 | struct net *net = ERR_PTR(error: -EINVAL); |
682 | |
683 | if (!f.file) |
684 | return ERR_PTR(error: -EBADF); |
685 | |
686 | if (proc_ns_file(file: f.file)) { |
687 | struct ns_common *ns = get_proc_ns(file_inode(f.file)); |
688 | if (ns->ops == &netns_operations) |
689 | net = get_net(container_of(ns, struct net, ns)); |
690 | } |
691 | fdput(fd: f); |
692 | |
693 | return net; |
694 | } |
695 | EXPORT_SYMBOL_GPL(get_net_ns_by_fd); |
696 | #endif |
697 | |
698 | struct net *get_net_ns_by_pid(pid_t pid) |
699 | { |
700 | struct task_struct *tsk; |
701 | struct net *net; |
702 | |
703 | /* Lookup the network namespace */ |
704 | net = ERR_PTR(error: -ESRCH); |
705 | rcu_read_lock(); |
706 | tsk = find_task_by_vpid(nr: pid); |
707 | if (tsk) { |
708 | struct nsproxy *nsproxy; |
709 | task_lock(p: tsk); |
710 | nsproxy = tsk->nsproxy; |
711 | if (nsproxy) |
712 | net = get_net(net: nsproxy->net_ns); |
713 | task_unlock(p: tsk); |
714 | } |
715 | rcu_read_unlock(); |
716 | return net; |
717 | } |
718 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); |
719 | |
720 | static __net_init int net_ns_net_init(struct net *net) |
721 | { |
722 | #ifdef CONFIG_NET_NS |
723 | net->ns.ops = &netns_operations; |
724 | #endif |
725 | return ns_alloc_inum(ns: &net->ns); |
726 | } |
727 | |
728 | static __net_exit void net_ns_net_exit(struct net *net) |
729 | { |
730 | ns_free_inum(&net->ns); |
731 | } |
732 | |
733 | static struct pernet_operations __net_initdata net_ns_ops = { |
734 | .init = net_ns_net_init, |
735 | .exit = net_ns_net_exit, |
736 | }; |
737 | |
738 | static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { |
739 | [NETNSA_NONE] = { .type = NLA_UNSPEC }, |
740 | [NETNSA_NSID] = { .type = NLA_S32 }, |
741 | [NETNSA_PID] = { .type = NLA_U32 }, |
742 | [NETNSA_FD] = { .type = NLA_U32 }, |
743 | [NETNSA_TARGET_NSID] = { .type = NLA_S32 }, |
744 | }; |
745 | |
746 | static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, |
747 | struct netlink_ext_ack *extack) |
748 | { |
749 | struct net *net = sock_net(sk: skb->sk); |
750 | struct nlattr *tb[NETNSA_MAX + 1]; |
751 | struct nlattr *nla; |
752 | struct net *peer; |
753 | int nsid, err; |
754 | |
755 | err = nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg), tb, |
756 | NETNSA_MAX, policy: rtnl_net_policy, extack); |
757 | if (err < 0) |
758 | return err; |
759 | if (!tb[NETNSA_NSID]) { |
760 | NL_SET_ERR_MSG(extack, "nsid is missing" ); |
761 | return -EINVAL; |
762 | } |
763 | nsid = nla_get_s32(nla: tb[NETNSA_NSID]); |
764 | |
765 | if (tb[NETNSA_PID]) { |
766 | peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID])); |
767 | nla = tb[NETNSA_PID]; |
768 | } else if (tb[NETNSA_FD]) { |
769 | peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD])); |
770 | nla = tb[NETNSA_FD]; |
771 | } else { |
772 | NL_SET_ERR_MSG(extack, "Peer netns reference is missing" ); |
773 | return -EINVAL; |
774 | } |
775 | if (IS_ERR(ptr: peer)) { |
776 | NL_SET_BAD_ATTR(extack, nla); |
777 | NL_SET_ERR_MSG(extack, "Peer netns reference is invalid" ); |
778 | return PTR_ERR(ptr: peer); |
779 | } |
780 | |
781 | spin_lock_bh(lock: &net->nsid_lock); |
782 | if (__peernet2id(net, peer) >= 0) { |
783 | spin_unlock_bh(lock: &net->nsid_lock); |
784 | err = -EEXIST; |
785 | NL_SET_BAD_ATTR(extack, nla); |
786 | NL_SET_ERR_MSG(extack, |
787 | "Peer netns already has a nsid assigned" ); |
788 | goto out; |
789 | } |
790 | |
791 | err = alloc_netid(net, peer, reqid: nsid); |
792 | spin_unlock_bh(lock: &net->nsid_lock); |
793 | if (err >= 0) { |
794 | rtnl_net_notifyid(net, RTM_NEWNSID, id: err, NETLINK_CB(skb).portid, |
795 | nlh, GFP_KERNEL); |
796 | err = 0; |
797 | } else if (err == -ENOSPC && nsid >= 0) { |
798 | err = -EEXIST; |
799 | NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); |
800 | NL_SET_ERR_MSG(extack, "The specified nsid is already used" ); |
801 | } |
802 | out: |
803 | put_net(net: peer); |
804 | return err; |
805 | } |
806 | |
807 | static int rtnl_net_get_size(void) |
808 | { |
809 | return NLMSG_ALIGN(sizeof(struct rtgenmsg)) |
810 | + nla_total_size(payload: sizeof(s32)) /* NETNSA_NSID */ |
811 | + nla_total_size(payload: sizeof(s32)) /* NETNSA_CURRENT_NSID */ |
812 | ; |
813 | } |
814 | |
815 | struct net_fill_args { |
816 | u32 portid; |
817 | u32 seq; |
818 | int flags; |
819 | int cmd; |
820 | int nsid; |
821 | bool add_ref; |
822 | int ref_nsid; |
823 | }; |
824 | |
825 | static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args) |
826 | { |
827 | struct nlmsghdr *nlh; |
828 | struct rtgenmsg *rth; |
829 | |
830 | nlh = nlmsg_put(skb, portid: args->portid, seq: args->seq, type: args->cmd, payload: sizeof(*rth), |
831 | flags: args->flags); |
832 | if (!nlh) |
833 | return -EMSGSIZE; |
834 | |
835 | rth = nlmsg_data(nlh); |
836 | rth->rtgen_family = AF_UNSPEC; |
837 | |
838 | if (nla_put_s32(skb, attrtype: NETNSA_NSID, value: args->nsid)) |
839 | goto nla_put_failure; |
840 | |
841 | if (args->add_ref && |
842 | nla_put_s32(skb, attrtype: NETNSA_CURRENT_NSID, value: args->ref_nsid)) |
843 | goto nla_put_failure; |
844 | |
845 | nlmsg_end(skb, nlh); |
846 | return 0; |
847 | |
848 | nla_put_failure: |
849 | nlmsg_cancel(skb, nlh); |
850 | return -EMSGSIZE; |
851 | } |
852 | |
853 | static int rtnl_net_valid_getid_req(struct sk_buff *skb, |
854 | const struct nlmsghdr *nlh, |
855 | struct nlattr **tb, |
856 | struct netlink_ext_ack *extack) |
857 | { |
858 | int i, err; |
859 | |
860 | if (!netlink_strict_get_check(skb)) |
861 | return nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg), |
862 | tb, NETNSA_MAX, policy: rtnl_net_policy, |
863 | extack); |
864 | |
865 | err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb, |
866 | NETNSA_MAX, policy: rtnl_net_policy, |
867 | extack); |
868 | if (err) |
869 | return err; |
870 | |
871 | for (i = 0; i <= NETNSA_MAX; i++) { |
872 | if (!tb[i]) |
873 | continue; |
874 | |
875 | switch (i) { |
876 | case NETNSA_PID: |
877 | case NETNSA_FD: |
878 | case NETNSA_NSID: |
879 | case NETNSA_TARGET_NSID: |
880 | break; |
881 | default: |
882 | NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request" ); |
883 | return -EINVAL; |
884 | } |
885 | } |
886 | |
887 | return 0; |
888 | } |
889 | |
890 | static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, |
891 | struct netlink_ext_ack *extack) |
892 | { |
893 | struct net *net = sock_net(sk: skb->sk); |
894 | struct nlattr *tb[NETNSA_MAX + 1]; |
895 | struct net_fill_args fillargs = { |
896 | .portid = NETLINK_CB(skb).portid, |
897 | .seq = nlh->nlmsg_seq, |
898 | .cmd = RTM_NEWNSID, |
899 | }; |
900 | struct net *peer, *target = net; |
901 | struct nlattr *nla; |
902 | struct sk_buff *msg; |
903 | int err; |
904 | |
905 | err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); |
906 | if (err < 0) |
907 | return err; |
908 | if (tb[NETNSA_PID]) { |
909 | peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID])); |
910 | nla = tb[NETNSA_PID]; |
911 | } else if (tb[NETNSA_FD]) { |
912 | peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD])); |
913 | nla = tb[NETNSA_FD]; |
914 | } else if (tb[NETNSA_NSID]) { |
915 | peer = get_net_ns_by_id(net, nla_get_s32(nla: tb[NETNSA_NSID])); |
916 | if (!peer) |
917 | peer = ERR_PTR(error: -ENOENT); |
918 | nla = tb[NETNSA_NSID]; |
919 | } else { |
920 | NL_SET_ERR_MSG(extack, "Peer netns reference is missing" ); |
921 | return -EINVAL; |
922 | } |
923 | |
924 | if (IS_ERR(ptr: peer)) { |
925 | NL_SET_BAD_ATTR(extack, nla); |
926 | NL_SET_ERR_MSG(extack, "Peer netns reference is invalid" ); |
927 | return PTR_ERR(ptr: peer); |
928 | } |
929 | |
930 | if (tb[NETNSA_TARGET_NSID]) { |
931 | int id = nla_get_s32(nla: tb[NETNSA_TARGET_NSID]); |
932 | |
933 | target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid: id); |
934 | if (IS_ERR(ptr: target)) { |
935 | NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]); |
936 | NL_SET_ERR_MSG(extack, |
937 | "Target netns reference is invalid" ); |
938 | err = PTR_ERR(ptr: target); |
939 | goto out; |
940 | } |
941 | fillargs.add_ref = true; |
942 | fillargs.ref_nsid = peernet2id(net, peer); |
943 | } |
944 | |
945 | msg = nlmsg_new(payload: rtnl_net_get_size(), GFP_KERNEL); |
946 | if (!msg) { |
947 | err = -ENOMEM; |
948 | goto out; |
949 | } |
950 | |
951 | fillargs.nsid = peernet2id(target, peer); |
952 | err = rtnl_net_fill(skb: msg, args: &fillargs); |
953 | if (err < 0) |
954 | goto err_out; |
955 | |
956 | err = rtnl_unicast(skb: msg, net, NETLINK_CB(skb).portid); |
957 | goto out; |
958 | |
959 | err_out: |
960 | nlmsg_free(skb: msg); |
961 | out: |
962 | if (fillargs.add_ref) |
963 | put_net(net: target); |
964 | put_net(net: peer); |
965 | return err; |
966 | } |
967 | |
968 | struct rtnl_net_dump_cb { |
969 | struct net *tgt_net; |
970 | struct net *ref_net; |
971 | struct sk_buff *skb; |
972 | struct net_fill_args fillargs; |
973 | int idx; |
974 | int s_idx; |
975 | }; |
976 | |
977 | /* Runs in RCU-critical section. */ |
978 | static int rtnl_net_dumpid_one(int id, void *peer, void *data) |
979 | { |
980 | struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; |
981 | int ret; |
982 | |
983 | if (net_cb->idx < net_cb->s_idx) |
984 | goto cont; |
985 | |
986 | net_cb->fillargs.nsid = id; |
987 | if (net_cb->fillargs.add_ref) |
988 | net_cb->fillargs.ref_nsid = __peernet2id(net: net_cb->ref_net, peer); |
989 | ret = rtnl_net_fill(skb: net_cb->skb, args: &net_cb->fillargs); |
990 | if (ret < 0) |
991 | return ret; |
992 | |
993 | cont: |
994 | net_cb->idx++; |
995 | return 0; |
996 | } |
997 | |
998 | static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, |
999 | struct rtnl_net_dump_cb *net_cb, |
1000 | struct netlink_callback *cb) |
1001 | { |
1002 | struct netlink_ext_ack *extack = cb->extack; |
1003 | struct nlattr *tb[NETNSA_MAX + 1]; |
1004 | int err, i; |
1005 | |
1006 | err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb, |
1007 | NETNSA_MAX, policy: rtnl_net_policy, |
1008 | extack); |
1009 | if (err < 0) |
1010 | return err; |
1011 | |
1012 | for (i = 0; i <= NETNSA_MAX; i++) { |
1013 | if (!tb[i]) |
1014 | continue; |
1015 | |
1016 | if (i == NETNSA_TARGET_NSID) { |
1017 | struct net *net; |
1018 | |
1019 | net = rtnl_get_net_ns_capable(sk, netnsid: nla_get_s32(nla: tb[i])); |
1020 | if (IS_ERR(ptr: net)) { |
1021 | NL_SET_BAD_ATTR(extack, tb[i]); |
1022 | NL_SET_ERR_MSG(extack, |
1023 | "Invalid target network namespace id" ); |
1024 | return PTR_ERR(ptr: net); |
1025 | } |
1026 | net_cb->fillargs.add_ref = true; |
1027 | net_cb->ref_net = net_cb->tgt_net; |
1028 | net_cb->tgt_net = net; |
1029 | } else { |
1030 | NL_SET_BAD_ATTR(extack, tb[i]); |
1031 | NL_SET_ERR_MSG(extack, |
1032 | "Unsupported attribute in dump request" ); |
1033 | return -EINVAL; |
1034 | } |
1035 | } |
1036 | |
1037 | return 0; |
1038 | } |
1039 | |
1040 | static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) |
1041 | { |
1042 | struct rtnl_net_dump_cb net_cb = { |
1043 | .tgt_net = sock_net(sk: skb->sk), |
1044 | .skb = skb, |
1045 | .fillargs = { |
1046 | .portid = NETLINK_CB(cb->skb).portid, |
1047 | .seq = cb->nlh->nlmsg_seq, |
1048 | .flags = NLM_F_MULTI, |
1049 | .cmd = RTM_NEWNSID, |
1050 | }, |
1051 | .idx = 0, |
1052 | .s_idx = cb->args[0], |
1053 | }; |
1054 | int err = 0; |
1055 | |
1056 | if (cb->strict_check) { |
1057 | err = rtnl_valid_dump_net_req(nlh: cb->nlh, sk: skb->sk, net_cb: &net_cb, cb); |
1058 | if (err < 0) |
1059 | goto end; |
1060 | } |
1061 | |
1062 | rcu_read_lock(); |
1063 | idr_for_each(&net_cb.tgt_net->netns_ids, fn: rtnl_net_dumpid_one, data: &net_cb); |
1064 | rcu_read_unlock(); |
1065 | |
1066 | cb->args[0] = net_cb.idx; |
1067 | end: |
1068 | if (net_cb.fillargs.add_ref) |
1069 | put_net(net: net_cb.tgt_net); |
1070 | return err < 0 ? err : skb->len; |
1071 | } |
1072 | |
1073 | static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, |
1074 | struct nlmsghdr *nlh, gfp_t gfp) |
1075 | { |
1076 | struct net_fill_args fillargs = { |
1077 | .portid = portid, |
1078 | .seq = nlh ? nlh->nlmsg_seq : 0, |
1079 | .cmd = cmd, |
1080 | .nsid = id, |
1081 | }; |
1082 | struct sk_buff *msg; |
1083 | int err = -ENOMEM; |
1084 | |
1085 | msg = nlmsg_new(payload: rtnl_net_get_size(), flags: gfp); |
1086 | if (!msg) |
1087 | goto out; |
1088 | |
1089 | err = rtnl_net_fill(skb: msg, args: &fillargs); |
1090 | if (err < 0) |
1091 | goto err_out; |
1092 | |
1093 | rtnl_notify(skb: msg, net, pid: portid, RTNLGRP_NSID, nlh, flags: gfp); |
1094 | return; |
1095 | |
1096 | err_out: |
1097 | nlmsg_free(skb: msg); |
1098 | out: |
1099 | rtnl_set_sk_err(net, RTNLGRP_NSID, error: err); |
1100 | } |
1101 | |
1102 | void __init net_ns_init(void) |
1103 | { |
1104 | struct net_generic *ng; |
1105 | |
1106 | #ifdef CONFIG_NET_NS |
1107 | net_cachep = kmem_cache_create(name: "net_namespace" , size: sizeof(struct net), |
1108 | SMP_CACHE_BYTES, |
1109 | SLAB_PANIC|SLAB_ACCOUNT, NULL); |
1110 | |
1111 | /* Create workqueue for cleanup */ |
1112 | netns_wq = create_singlethread_workqueue("netns" ); |
1113 | if (!netns_wq) |
1114 | panic(fmt: "Could not create netns workq" ); |
1115 | #endif |
1116 | |
1117 | ng = net_alloc_generic(); |
1118 | if (!ng) |
1119 | panic(fmt: "Could not allocate generic netns" ); |
1120 | |
1121 | rcu_assign_pointer(init_net.gen, ng); |
1122 | |
1123 | #ifdef CONFIG_KEYS |
1124 | init_net.key_domain = &init_net_key_domain; |
1125 | #endif |
1126 | down_write(sem: &pernet_ops_rwsem); |
1127 | preinit_net(net: &init_net); |
1128 | if (setup_net(net: &init_net, user_ns: &init_user_ns)) |
1129 | panic(fmt: "Could not setup the initial network namespace" ); |
1130 | |
1131 | init_net_initialized = true; |
1132 | up_write(sem: &pernet_ops_rwsem); |
1133 | |
1134 | if (register_pernet_subsys(&net_ns_ops)) |
1135 | panic(fmt: "Could not register network namespace subsystems" ); |
1136 | |
1137 | rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, |
1138 | flags: RTNL_FLAG_DOIT_UNLOCKED); |
1139 | rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, |
1140 | flags: RTNL_FLAG_DOIT_UNLOCKED); |
1141 | } |
1142 | |
1143 | static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) |
1144 | { |
1145 | ops_pre_exit_list(ops, net_exit_list); |
1146 | synchronize_rcu(); |
1147 | ops_exit_list(ops, net_exit_list); |
1148 | ops_free_list(ops, net_exit_list); |
1149 | } |
1150 | |
1151 | #ifdef CONFIG_NET_NS |
1152 | static int __register_pernet_operations(struct list_head *list, |
1153 | struct pernet_operations *ops) |
1154 | { |
1155 | struct net *net; |
1156 | int error; |
1157 | LIST_HEAD(net_exit_list); |
1158 | |
1159 | list_add_tail(new: &ops->list, head: list); |
1160 | if (ops->init || (ops->id && ops->size)) { |
1161 | /* We held write locked pernet_ops_rwsem, and parallel |
1162 | * setup_net() and cleanup_net() are not possible. |
1163 | */ |
1164 | for_each_net(net) { |
1165 | error = ops_init(ops, net); |
1166 | if (error) |
1167 | goto out_undo; |
1168 | list_add_tail(new: &net->exit_list, head: &net_exit_list); |
1169 | } |
1170 | } |
1171 | return 0; |
1172 | |
1173 | out_undo: |
1174 | /* If I have an error cleanup all namespaces I initialized */ |
1175 | list_del(entry: &ops->list); |
1176 | free_exit_list(ops, net_exit_list: &net_exit_list); |
1177 | return error; |
1178 | } |
1179 | |
1180 | static void __unregister_pernet_operations(struct pernet_operations *ops) |
1181 | { |
1182 | struct net *net; |
1183 | LIST_HEAD(net_exit_list); |
1184 | |
1185 | list_del(entry: &ops->list); |
1186 | /* See comment in __register_pernet_operations() */ |
1187 | for_each_net(net) |
1188 | list_add_tail(new: &net->exit_list, head: &net_exit_list); |
1189 | |
1190 | free_exit_list(ops, net_exit_list: &net_exit_list); |
1191 | } |
1192 | |
1193 | #else |
1194 | |
1195 | static int __register_pernet_operations(struct list_head *list, |
1196 | struct pernet_operations *ops) |
1197 | { |
1198 | if (!init_net_initialized) { |
1199 | list_add_tail(&ops->list, list); |
1200 | return 0; |
1201 | } |
1202 | |
1203 | return ops_init(ops, &init_net); |
1204 | } |
1205 | |
1206 | static void __unregister_pernet_operations(struct pernet_operations *ops) |
1207 | { |
1208 | if (!init_net_initialized) { |
1209 | list_del(&ops->list); |
1210 | } else { |
1211 | LIST_HEAD(net_exit_list); |
1212 | list_add(&init_net.exit_list, &net_exit_list); |
1213 | free_exit_list(ops, &net_exit_list); |
1214 | } |
1215 | } |
1216 | |
1217 | #endif /* CONFIG_NET_NS */ |
1218 | |
1219 | static DEFINE_IDA(net_generic_ids); |
1220 | |
1221 | static int register_pernet_operations(struct list_head *list, |
1222 | struct pernet_operations *ops) |
1223 | { |
1224 | int error; |
1225 | |
1226 | if (ops->id) { |
1227 | error = ida_alloc_min(ida: &net_generic_ids, MIN_PERNET_OPS_ID, |
1228 | GFP_KERNEL); |
1229 | if (error < 0) |
1230 | return error; |
1231 | *ops->id = error; |
1232 | max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); |
1233 | } |
1234 | error = __register_pernet_operations(list, ops); |
1235 | if (error) { |
1236 | rcu_barrier(); |
1237 | if (ops->id) |
1238 | ida_free(&net_generic_ids, id: *ops->id); |
1239 | } |
1240 | |
1241 | return error; |
1242 | } |
1243 | |
1244 | static void unregister_pernet_operations(struct pernet_operations *ops) |
1245 | { |
1246 | __unregister_pernet_operations(ops); |
1247 | rcu_barrier(); |
1248 | if (ops->id) |
1249 | ida_free(&net_generic_ids, id: *ops->id); |
1250 | } |
1251 | |
1252 | /** |
1253 | * register_pernet_subsys - register a network namespace subsystem |
1254 | * @ops: pernet operations structure for the subsystem |
1255 | * |
1256 | * Register a subsystem which has init and exit functions |
1257 | * that are called when network namespaces are created and |
1258 | * destroyed respectively. |
1259 | * |
1260 | * When registered all network namespace init functions are |
1261 | * called for every existing network namespace. Allowing kernel |
1262 | * modules to have a race free view of the set of network namespaces. |
1263 | * |
1264 | * When a new network namespace is created all of the init |
1265 | * methods are called in the order in which they were registered. |
1266 | * |
1267 | * When a network namespace is destroyed all of the exit methods |
1268 | * are called in the reverse of the order with which they were |
1269 | * registered. |
1270 | */ |
1271 | int register_pernet_subsys(struct pernet_operations *ops) |
1272 | { |
1273 | int error; |
1274 | down_write(sem: &pernet_ops_rwsem); |
1275 | error = register_pernet_operations(list: first_device, ops); |
1276 | up_write(sem: &pernet_ops_rwsem); |
1277 | return error; |
1278 | } |
1279 | EXPORT_SYMBOL_GPL(register_pernet_subsys); |
1280 | |
1281 | /** |
1282 | * unregister_pernet_subsys - unregister a network namespace subsystem |
1283 | * @ops: pernet operations structure to manipulate |
1284 | * |
1285 | * Remove the pernet operations structure from the list to be |
1286 | * used when network namespaces are created or destroyed. In |
1287 | * addition run the exit method for all existing network |
1288 | * namespaces. |
1289 | */ |
1290 | void unregister_pernet_subsys(struct pernet_operations *ops) |
1291 | { |
1292 | down_write(sem: &pernet_ops_rwsem); |
1293 | unregister_pernet_operations(ops); |
1294 | up_write(sem: &pernet_ops_rwsem); |
1295 | } |
1296 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); |
1297 | |
1298 | /** |
1299 | * register_pernet_device - register a network namespace device |
1300 | * @ops: pernet operations structure for the subsystem |
1301 | * |
1302 | * Register a device which has init and exit functions |
1303 | * that are called when network namespaces are created and |
1304 | * destroyed respectively. |
1305 | * |
1306 | * When registered all network namespace init functions are |
1307 | * called for every existing network namespace. Allowing kernel |
1308 | * modules to have a race free view of the set of network namespaces. |
1309 | * |
1310 | * When a new network namespace is created all of the init |
1311 | * methods are called in the order in which they were registered. |
1312 | * |
1313 | * When a network namespace is destroyed all of the exit methods |
1314 | * are called in the reverse of the order with which they were |
1315 | * registered. |
1316 | */ |
1317 | int register_pernet_device(struct pernet_operations *ops) |
1318 | { |
1319 | int error; |
1320 | down_write(sem: &pernet_ops_rwsem); |
1321 | error = register_pernet_operations(list: &pernet_list, ops); |
1322 | if (!error && (first_device == &pernet_list)) |
1323 | first_device = &ops->list; |
1324 | up_write(sem: &pernet_ops_rwsem); |
1325 | return error; |
1326 | } |
1327 | EXPORT_SYMBOL_GPL(register_pernet_device); |
1328 | |
1329 | /** |
1330 | * unregister_pernet_device - unregister a network namespace netdevice |
1331 | * @ops: pernet operations structure to manipulate |
1332 | * |
1333 | * Remove the pernet operations structure from the list to be |
1334 | * used when network namespaces are created or destroyed. In |
1335 | * addition run the exit method for all existing network |
1336 | * namespaces. |
1337 | */ |
1338 | void unregister_pernet_device(struct pernet_operations *ops) |
1339 | { |
1340 | down_write(sem: &pernet_ops_rwsem); |
1341 | if (&ops->list == first_device) |
1342 | first_device = first_device->next; |
1343 | unregister_pernet_operations(ops); |
1344 | up_write(sem: &pernet_ops_rwsem); |
1345 | } |
1346 | EXPORT_SYMBOL_GPL(unregister_pernet_device); |
1347 | |
1348 | #ifdef CONFIG_NET_NS |
1349 | static struct ns_common *netns_get(struct task_struct *task) |
1350 | { |
1351 | struct net *net = NULL; |
1352 | struct nsproxy *nsproxy; |
1353 | |
1354 | task_lock(p: task); |
1355 | nsproxy = task->nsproxy; |
1356 | if (nsproxy) |
1357 | net = get_net(net: nsproxy->net_ns); |
1358 | task_unlock(p: task); |
1359 | |
1360 | return net ? &net->ns : NULL; |
1361 | } |
1362 | |
1363 | static inline struct net *to_net_ns(struct ns_common *ns) |
1364 | { |
1365 | return container_of(ns, struct net, ns); |
1366 | } |
1367 | |
1368 | static void netns_put(struct ns_common *ns) |
1369 | { |
1370 | put_net(net: to_net_ns(ns)); |
1371 | } |
1372 | |
1373 | static int netns_install(struct nsset *nsset, struct ns_common *ns) |
1374 | { |
1375 | struct nsproxy *nsproxy = nsset->nsproxy; |
1376 | struct net *net = to_net_ns(ns); |
1377 | |
1378 | if (!ns_capable(ns: net->user_ns, CAP_SYS_ADMIN) || |
1379 | !ns_capable(ns: nsset->cred->user_ns, CAP_SYS_ADMIN)) |
1380 | return -EPERM; |
1381 | |
1382 | put_net(net: nsproxy->net_ns); |
1383 | nsproxy->net_ns = get_net(net); |
1384 | return 0; |
1385 | } |
1386 | |
1387 | static struct user_namespace *netns_owner(struct ns_common *ns) |
1388 | { |
1389 | return to_net_ns(ns)->user_ns; |
1390 | } |
1391 | |
1392 | const struct proc_ns_operations netns_operations = { |
1393 | .name = "net" , |
1394 | .type = CLONE_NEWNET, |
1395 | .get = netns_get, |
1396 | .put = netns_put, |
1397 | .install = netns_install, |
1398 | .owner = netns_owner, |
1399 | }; |
1400 | #endif |
1401 | |