1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
3 | |
4 | #include <linux/workqueue.h> |
5 | #include <linux/rtnetlink.h> |
6 | #include <linux/cache.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/list.h> |
9 | #include <linux/delay.h> |
10 | #include <linux/sched.h> |
11 | #include <linux/idr.h> |
12 | #include <linux/rculist.h> |
13 | #include <linux/nsproxy.h> |
14 | #include <linux/fs.h> |
15 | #include <linux/proc_ns.h> |
16 | #include <linux/file.h> |
17 | #include <linux/export.h> |
18 | #include <linux/user_namespace.h> |
19 | #include <linux/net_namespace.h> |
20 | #include <linux/sched/task.h> |
21 | #include <linux/uidgid.h> |
22 | #include <linux/cookie.h> |
23 | #include <linux/proc_fs.h> |
24 | |
25 | #include <net/sock.h> |
26 | #include <net/netlink.h> |
27 | #include <net/net_namespace.h> |
28 | #include <net/netns/generic.h> |
29 | |
30 | /* |
31 | * Our network namespace constructor/destructor lists |
32 | */ |
33 | |
34 | static LIST_HEAD(pernet_list); |
35 | static struct list_head *first_device = &pernet_list; |
36 | |
37 | LIST_HEAD(net_namespace_list); |
38 | EXPORT_SYMBOL_GPL(net_namespace_list); |
39 | |
40 | /* Protects net_namespace_list. Nests iside rtnl_lock() */ |
41 | DECLARE_RWSEM(net_rwsem); |
42 | EXPORT_SYMBOL_GPL(net_rwsem); |
43 | |
44 | #ifdef CONFIG_KEYS |
45 | static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; |
46 | #endif |
47 | |
48 | struct net init_net; |
49 | EXPORT_SYMBOL(init_net); |
50 | |
51 | static bool init_net_initialized; |
52 | /* |
53 | * pernet_ops_rwsem: protects: pernet_list, net_generic_ids, |
54 | * init_net_initialized and first_device pointer. |
55 | * This is internal net namespace object. Please, don't use it |
56 | * outside. |
57 | */ |
58 | DECLARE_RWSEM(pernet_ops_rwsem); |
59 | EXPORT_SYMBOL_GPL(pernet_ops_rwsem); |
60 | |
61 | #define MIN_PERNET_OPS_ID \ |
62 | ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) |
63 | |
64 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
65 | |
66 | static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; |
67 | |
68 | DEFINE_COOKIE(net_cookie); |
69 | |
70 | static struct net_generic *net_alloc_generic(void) |
71 | { |
72 | struct net_generic *ng; |
73 | unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); |
74 | |
75 | ng = kzalloc(size: generic_size, GFP_KERNEL); |
76 | if (ng) |
77 | ng->s.len = max_gen_ptrs; |
78 | |
79 | return ng; |
80 | } |
81 | |
82 | static int net_assign_generic(struct net *net, unsigned int id, void *data) |
83 | { |
84 | struct net_generic *ng, *old_ng; |
85 | |
86 | BUG_ON(id < MIN_PERNET_OPS_ID); |
87 | |
88 | old_ng = rcu_dereference_protected(net->gen, |
89 | lockdep_is_held(&pernet_ops_rwsem)); |
90 | if (old_ng->s.len > id) { |
91 | old_ng->ptr[id] = data; |
92 | return 0; |
93 | } |
94 | |
95 | ng = net_alloc_generic(); |
96 | if (!ng) |
97 | return -ENOMEM; |
98 | |
99 | /* |
100 | * Some synchronisation notes: |
101 | * |
102 | * The net_generic explores the net->gen array inside rcu |
103 | * read section. Besides once set the net->gen->ptr[x] |
104 | * pointer never changes (see rules in netns/generic.h). |
105 | * |
106 | * That said, we simply duplicate this array and schedule |
107 | * the old copy for kfree after a grace period. |
108 | */ |
109 | |
110 | memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], |
111 | (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); |
112 | ng->ptr[id] = data; |
113 | |
114 | rcu_assign_pointer(net->gen, ng); |
115 | kfree_rcu(old_ng, s.rcu); |
116 | return 0; |
117 | } |
118 | |
119 | static int ops_init(const struct pernet_operations *ops, struct net *net) |
120 | { |
121 | struct net_generic *ng; |
122 | int err = -ENOMEM; |
123 | void *data = NULL; |
124 | |
125 | if (ops->id && ops->size) { |
126 | data = kzalloc(size: ops->size, GFP_KERNEL); |
127 | if (!data) |
128 | goto out; |
129 | |
130 | err = net_assign_generic(net, id: *ops->id, data); |
131 | if (err) |
132 | goto cleanup; |
133 | } |
134 | err = 0; |
135 | if (ops->init) |
136 | err = ops->init(net); |
137 | if (!err) |
138 | return 0; |
139 | |
140 | if (ops->id && ops->size) { |
141 | ng = rcu_dereference_protected(net->gen, |
142 | lockdep_is_held(&pernet_ops_rwsem)); |
143 | ng->ptr[*ops->id] = NULL; |
144 | } |
145 | |
146 | cleanup: |
147 | kfree(objp: data); |
148 | |
149 | out: |
150 | return err; |
151 | } |
152 | |
153 | static void ops_pre_exit_list(const struct pernet_operations *ops, |
154 | struct list_head *net_exit_list) |
155 | { |
156 | struct net *net; |
157 | |
158 | if (ops->pre_exit) { |
159 | list_for_each_entry(net, net_exit_list, exit_list) |
160 | ops->pre_exit(net); |
161 | } |
162 | } |
163 | |
164 | static void ops_exit_list(const struct pernet_operations *ops, |
165 | struct list_head *net_exit_list) |
166 | { |
167 | struct net *net; |
168 | if (ops->exit) { |
169 | list_for_each_entry(net, net_exit_list, exit_list) { |
170 | ops->exit(net); |
171 | cond_resched(); |
172 | } |
173 | } |
174 | if (ops->exit_batch) |
175 | ops->exit_batch(net_exit_list); |
176 | } |
177 | |
178 | static void ops_free_list(const struct pernet_operations *ops, |
179 | struct list_head *net_exit_list) |
180 | { |
181 | struct net *net; |
182 | if (ops->size && ops->id) { |
183 | list_for_each_entry(net, net_exit_list, exit_list) |
184 | kfree(objp: net_generic(net, id: *ops->id)); |
185 | } |
186 | } |
187 | |
188 | /* should be called with nsid_lock held */ |
189 | static int alloc_netid(struct net *net, struct net *peer, int reqid) |
190 | { |
191 | int min = 0, max = 0; |
192 | |
193 | if (reqid >= 0) { |
194 | min = reqid; |
195 | max = reqid + 1; |
196 | } |
197 | |
198 | return idr_alloc(&net->netns_ids, ptr: peer, start: min, end: max, GFP_ATOMIC); |
199 | } |
200 | |
201 | /* This function is used by idr_for_each(). If net is equal to peer, the |
202 | * function returns the id so that idr_for_each() stops. Because we cannot |
203 | * returns the id 0 (idr_for_each() will not stop), we return the magic value |
204 | * NET_ID_ZERO (-1) for it. |
205 | */ |
206 | #define NET_ID_ZERO -1 |
207 | static int net_eq_idr(int id, void *net, void *peer) |
208 | { |
209 | if (net_eq(net1: net, net2: peer)) |
210 | return id ? : NET_ID_ZERO; |
211 | return 0; |
212 | } |
213 | |
214 | /* Must be called from RCU-critical section or with nsid_lock held */ |
215 | static int __peernet2id(const struct net *net, struct net *peer) |
216 | { |
217 | int id = idr_for_each(&net->netns_ids, fn: net_eq_idr, data: peer); |
218 | |
219 | /* Magic value for id 0. */ |
220 | if (id == NET_ID_ZERO) |
221 | return 0; |
222 | if (id > 0) |
223 | return id; |
224 | |
225 | return NETNSA_NSID_NOT_ASSIGNED; |
226 | } |
227 | |
228 | static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, |
229 | struct nlmsghdr *nlh, gfp_t gfp); |
230 | /* This function returns the id of a peer netns. If no id is assigned, one will |
231 | * be allocated and returned. |
232 | */ |
233 | int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) |
234 | { |
235 | int id; |
236 | |
237 | if (refcount_read(r: &net->ns.count) == 0) |
238 | return NETNSA_NSID_NOT_ASSIGNED; |
239 | |
240 | spin_lock_bh(lock: &net->nsid_lock); |
241 | id = __peernet2id(net, peer); |
242 | if (id >= 0) { |
243 | spin_unlock_bh(lock: &net->nsid_lock); |
244 | return id; |
245 | } |
246 | |
247 | /* When peer is obtained from RCU lists, we may race with |
248 | * its cleanup. Check whether it's alive, and this guarantees |
249 | * we never hash a peer back to net->netns_ids, after it has |
250 | * just been idr_remove()'d from there in cleanup_net(). |
251 | */ |
252 | if (!maybe_get_net(net: peer)) { |
253 | spin_unlock_bh(lock: &net->nsid_lock); |
254 | return NETNSA_NSID_NOT_ASSIGNED; |
255 | } |
256 | |
257 | id = alloc_netid(net, peer, reqid: -1); |
258 | spin_unlock_bh(lock: &net->nsid_lock); |
259 | |
260 | put_net(net: peer); |
261 | if (id < 0) |
262 | return NETNSA_NSID_NOT_ASSIGNED; |
263 | |
264 | rtnl_net_notifyid(net, RTM_NEWNSID, id, portid: 0, NULL, gfp); |
265 | |
266 | return id; |
267 | } |
268 | EXPORT_SYMBOL_GPL(peernet2id_alloc); |
269 | |
270 | /* This function returns, if assigned, the id of a peer netns. */ |
271 | int peernet2id(const struct net *net, struct net *peer) |
272 | { |
273 | int id; |
274 | |
275 | rcu_read_lock(); |
276 | id = __peernet2id(net, peer); |
277 | rcu_read_unlock(); |
278 | |
279 | return id; |
280 | } |
281 | EXPORT_SYMBOL(peernet2id); |
282 | |
283 | /* This function returns true is the peer netns has an id assigned into the |
284 | * current netns. |
285 | */ |
286 | bool peernet_has_id(const struct net *net, struct net *peer) |
287 | { |
288 | return peernet2id(net, peer) >= 0; |
289 | } |
290 | |
291 | struct net *get_net_ns_by_id(const struct net *net, int id) |
292 | { |
293 | struct net *peer; |
294 | |
295 | if (id < 0) |
296 | return NULL; |
297 | |
298 | rcu_read_lock(); |
299 | peer = idr_find(&net->netns_ids, id); |
300 | if (peer) |
301 | peer = maybe_get_net(net: peer); |
302 | rcu_read_unlock(); |
303 | |
304 | return peer; |
305 | } |
306 | EXPORT_SYMBOL_GPL(get_net_ns_by_id); |
307 | |
308 | /* init code that must occur even if setup_net() is not called. */ |
309 | static __net_init void preinit_net(struct net *net) |
310 | { |
311 | ref_tracker_dir_init(dir: &net->notrefcnt_tracker, quarantine_count: 128, name: "net notrefcnt" ); |
312 | } |
313 | |
314 | /* |
315 | * setup_net runs the initializers for the network namespace object. |
316 | */ |
317 | static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) |
318 | { |
319 | /* Must be called with pernet_ops_rwsem held */ |
320 | const struct pernet_operations *ops, *saved_ops; |
321 | LIST_HEAD(net_exit_list); |
322 | LIST_HEAD(dev_kill_list); |
323 | int error = 0; |
324 | |
325 | refcount_set(r: &net->ns.count, n: 1); |
326 | ref_tracker_dir_init(dir: &net->refcnt_tracker, quarantine_count: 128, name: "net refcnt" ); |
327 | |
328 | refcount_set(r: &net->passive, n: 1); |
329 | get_random_bytes(buf: &net->hash_mix, len: sizeof(u32)); |
330 | preempt_disable(); |
331 | net->net_cookie = gen_cookie_next(gc: &net_cookie); |
332 | preempt_enable(); |
333 | net->dev_base_seq = 1; |
334 | net->user_ns = user_ns; |
335 | idr_init(idr: &net->netns_ids); |
336 | spin_lock_init(&net->nsid_lock); |
337 | mutex_init(&net->ipv4.ra_mutex); |
338 | |
339 | list_for_each_entry(ops, &pernet_list, list) { |
340 | error = ops_init(ops, net); |
341 | if (error < 0) |
342 | goto out_undo; |
343 | } |
344 | down_write(sem: &net_rwsem); |
345 | list_add_tail_rcu(new: &net->list, head: &net_namespace_list); |
346 | up_write(sem: &net_rwsem); |
347 | out: |
348 | return error; |
349 | |
350 | out_undo: |
351 | /* Walk through the list backwards calling the exit functions |
352 | * for the pernet modules whose init functions did not fail. |
353 | */ |
354 | list_add(new: &net->exit_list, head: &net_exit_list); |
355 | saved_ops = ops; |
356 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
357 | ops_pre_exit_list(ops, net_exit_list: &net_exit_list); |
358 | |
359 | synchronize_rcu(); |
360 | |
361 | ops = saved_ops; |
362 | rtnl_lock(); |
363 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) { |
364 | if (ops->exit_batch_rtnl) |
365 | ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); |
366 | } |
367 | unregister_netdevice_many(head: &dev_kill_list); |
368 | rtnl_unlock(); |
369 | |
370 | ops = saved_ops; |
371 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
372 | ops_exit_list(ops, net_exit_list: &net_exit_list); |
373 | |
374 | ops = saved_ops; |
375 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
376 | ops_free_list(ops, net_exit_list: &net_exit_list); |
377 | |
378 | rcu_barrier(); |
379 | goto out; |
380 | } |
381 | |
382 | static int __net_init net_defaults_init_net(struct net *net) |
383 | { |
384 | net->core.sysctl_somaxconn = SOMAXCONN; |
385 | /* Limits per socket sk_omem_alloc usage. |
386 | * TCP zerocopy regular usage needs 128 KB. |
387 | */ |
388 | net->core.sysctl_optmem_max = 128 * 1024; |
389 | net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; |
390 | |
391 | return 0; |
392 | } |
393 | |
394 | static struct pernet_operations net_defaults_ops = { |
395 | .init = net_defaults_init_net, |
396 | }; |
397 | |
398 | static __init int net_defaults_init(void) |
399 | { |
400 | if (register_pernet_subsys(&net_defaults_ops)) |
401 | panic(fmt: "Cannot initialize net default settings" ); |
402 | |
403 | return 0; |
404 | } |
405 | |
406 | core_initcall(net_defaults_init); |
407 | |
408 | #ifdef CONFIG_NET_NS |
409 | static struct ucounts *inc_net_namespaces(struct user_namespace *ns) |
410 | { |
411 | return inc_ucount(ns, current_euid(), type: UCOUNT_NET_NAMESPACES); |
412 | } |
413 | |
414 | static void dec_net_namespaces(struct ucounts *ucounts) |
415 | { |
416 | dec_ucount(ucounts, type: UCOUNT_NET_NAMESPACES); |
417 | } |
418 | |
419 | static struct kmem_cache *net_cachep __ro_after_init; |
420 | static struct workqueue_struct *netns_wq; |
421 | |
422 | static struct net *net_alloc(void) |
423 | { |
424 | struct net *net = NULL; |
425 | struct net_generic *ng; |
426 | |
427 | ng = net_alloc_generic(); |
428 | if (!ng) |
429 | goto out; |
430 | |
431 | net = kmem_cache_zalloc(k: net_cachep, GFP_KERNEL); |
432 | if (!net) |
433 | goto out_free; |
434 | |
435 | #ifdef CONFIG_KEYS |
436 | net->key_domain = kzalloc(size: sizeof(struct key_tag), GFP_KERNEL); |
437 | if (!net->key_domain) |
438 | goto out_free_2; |
439 | refcount_set(r: &net->key_domain->usage, n: 1); |
440 | #endif |
441 | |
442 | rcu_assign_pointer(net->gen, ng); |
443 | out: |
444 | return net; |
445 | |
446 | #ifdef CONFIG_KEYS |
447 | out_free_2: |
448 | kmem_cache_free(s: net_cachep, objp: net); |
449 | net = NULL; |
450 | #endif |
451 | out_free: |
452 | kfree(objp: ng); |
453 | goto out; |
454 | } |
455 | |
456 | static void net_free(struct net *net) |
457 | { |
458 | if (refcount_dec_and_test(r: &net->passive)) { |
459 | kfree(rcu_access_pointer(net->gen)); |
460 | |
461 | /* There should not be any trackers left there. */ |
462 | ref_tracker_dir_exit(dir: &net->notrefcnt_tracker); |
463 | |
464 | kmem_cache_free(s: net_cachep, objp: net); |
465 | } |
466 | } |
467 | |
468 | void net_drop_ns(void *p) |
469 | { |
470 | struct net *net = (struct net *)p; |
471 | |
472 | if (net) |
473 | net_free(net); |
474 | } |
475 | |
476 | struct net *copy_net_ns(unsigned long flags, |
477 | struct user_namespace *user_ns, struct net *old_net) |
478 | { |
479 | struct ucounts *ucounts; |
480 | struct net *net; |
481 | int rv; |
482 | |
483 | if (!(flags & CLONE_NEWNET)) |
484 | return get_net(net: old_net); |
485 | |
486 | ucounts = inc_net_namespaces(ns: user_ns); |
487 | if (!ucounts) |
488 | return ERR_PTR(error: -ENOSPC); |
489 | |
490 | net = net_alloc(); |
491 | if (!net) { |
492 | rv = -ENOMEM; |
493 | goto dec_ucounts; |
494 | } |
495 | |
496 | preinit_net(net); |
497 | refcount_set(r: &net->passive, n: 1); |
498 | net->ucounts = ucounts; |
499 | get_user_ns(ns: user_ns); |
500 | |
501 | rv = down_read_killable(sem: &pernet_ops_rwsem); |
502 | if (rv < 0) |
503 | goto put_userns; |
504 | |
505 | rv = setup_net(net, user_ns); |
506 | |
507 | up_read(sem: &pernet_ops_rwsem); |
508 | |
509 | if (rv < 0) { |
510 | put_userns: |
511 | #ifdef CONFIG_KEYS |
512 | key_remove_domain(domain_tag: net->key_domain); |
513 | #endif |
514 | put_user_ns(ns: user_ns); |
515 | net_free(net); |
516 | dec_ucounts: |
517 | dec_net_namespaces(ucounts); |
518 | return ERR_PTR(error: rv); |
519 | } |
520 | return net; |
521 | } |
522 | |
523 | /** |
524 | * net_ns_get_ownership - get sysfs ownership data for @net |
525 | * @net: network namespace in question (can be NULL) |
526 | * @uid: kernel user ID for sysfs objects |
527 | * @gid: kernel group ID for sysfs objects |
528 | * |
529 | * Returns the uid/gid pair of root in the user namespace associated with the |
530 | * given network namespace. |
531 | */ |
532 | void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid) |
533 | { |
534 | if (net) { |
535 | kuid_t ns_root_uid = make_kuid(from: net->user_ns, uid: 0); |
536 | kgid_t ns_root_gid = make_kgid(from: net->user_ns, gid: 0); |
537 | |
538 | if (uid_valid(uid: ns_root_uid)) |
539 | *uid = ns_root_uid; |
540 | |
541 | if (gid_valid(gid: ns_root_gid)) |
542 | *gid = ns_root_gid; |
543 | } else { |
544 | *uid = GLOBAL_ROOT_UID; |
545 | *gid = GLOBAL_ROOT_GID; |
546 | } |
547 | } |
548 | EXPORT_SYMBOL_GPL(net_ns_get_ownership); |
549 | |
550 | static void unhash_nsid(struct net *net, struct net *last) |
551 | { |
552 | struct net *tmp; |
553 | /* This function is only called from cleanup_net() work, |
554 | * and this work is the only process, that may delete |
555 | * a net from net_namespace_list. So, when the below |
556 | * is executing, the list may only grow. Thus, we do not |
557 | * use for_each_net_rcu() or net_rwsem. |
558 | */ |
559 | for_each_net(tmp) { |
560 | int id; |
561 | |
562 | spin_lock_bh(lock: &tmp->nsid_lock); |
563 | id = __peernet2id(net: tmp, peer: net); |
564 | if (id >= 0) |
565 | idr_remove(&tmp->netns_ids, id); |
566 | spin_unlock_bh(lock: &tmp->nsid_lock); |
567 | if (id >= 0) |
568 | rtnl_net_notifyid(net: tmp, RTM_DELNSID, id, portid: 0, NULL, |
569 | GFP_KERNEL); |
570 | if (tmp == last) |
571 | break; |
572 | } |
573 | spin_lock_bh(lock: &net->nsid_lock); |
574 | idr_destroy(&net->netns_ids); |
575 | spin_unlock_bh(lock: &net->nsid_lock); |
576 | } |
577 | |
578 | static LLIST_HEAD(cleanup_list); |
579 | |
580 | static void cleanup_net(struct work_struct *work) |
581 | { |
582 | const struct pernet_operations *ops; |
583 | struct net *net, *tmp, *last; |
584 | struct llist_node *net_kill_list; |
585 | LIST_HEAD(net_exit_list); |
586 | LIST_HEAD(dev_kill_list); |
587 | |
588 | /* Atomically snapshot the list of namespaces to cleanup */ |
589 | net_kill_list = llist_del_all(head: &cleanup_list); |
590 | |
591 | down_read(sem: &pernet_ops_rwsem); |
592 | |
593 | /* Don't let anyone else find us. */ |
594 | down_write(sem: &net_rwsem); |
595 | llist_for_each_entry(net, net_kill_list, cleanup_list) |
596 | list_del_rcu(entry: &net->list); |
597 | /* Cache last net. After we unlock rtnl, no one new net |
598 | * added to net_namespace_list can assign nsid pointer |
599 | * to a net from net_kill_list (see peernet2id_alloc()). |
600 | * So, we skip them in unhash_nsid(). |
601 | * |
602 | * Note, that unhash_nsid() does not delete nsid links |
603 | * between net_kill_list's nets, as they've already |
604 | * deleted from net_namespace_list. But, this would be |
605 | * useless anyway, as netns_ids are destroyed there. |
606 | */ |
607 | last = list_last_entry(&net_namespace_list, struct net, list); |
608 | up_write(sem: &net_rwsem); |
609 | |
610 | llist_for_each_entry(net, net_kill_list, cleanup_list) { |
611 | unhash_nsid(net, last); |
612 | list_add_tail(new: &net->exit_list, head: &net_exit_list); |
613 | } |
614 | |
615 | /* Run all of the network namespace pre_exit methods */ |
616 | list_for_each_entry_reverse(ops, &pernet_list, list) |
617 | ops_pre_exit_list(ops, net_exit_list: &net_exit_list); |
618 | |
619 | /* |
620 | * Another CPU might be rcu-iterating the list, wait for it. |
621 | * This needs to be before calling the exit() notifiers, so |
622 | * the rcu_barrier() below isn't sufficient alone. |
623 | * Also the pre_exit() and exit() methods need this barrier. |
624 | */ |
625 | synchronize_rcu_expedited(); |
626 | |
627 | rtnl_lock(); |
628 | list_for_each_entry_reverse(ops, &pernet_list, list) { |
629 | if (ops->exit_batch_rtnl) |
630 | ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); |
631 | } |
632 | unregister_netdevice_many(head: &dev_kill_list); |
633 | rtnl_unlock(); |
634 | |
635 | /* Run all of the network namespace exit methods */ |
636 | list_for_each_entry_reverse(ops, &pernet_list, list) |
637 | ops_exit_list(ops, net_exit_list: &net_exit_list); |
638 | |
639 | /* Free the net generic variables */ |
640 | list_for_each_entry_reverse(ops, &pernet_list, list) |
641 | ops_free_list(ops, net_exit_list: &net_exit_list); |
642 | |
643 | up_read(sem: &pernet_ops_rwsem); |
644 | |
645 | /* Ensure there are no outstanding rcu callbacks using this |
646 | * network namespace. |
647 | */ |
648 | rcu_barrier(); |
649 | |
650 | /* Finally it is safe to free my network namespace structure */ |
651 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
652 | list_del_init(entry: &net->exit_list); |
653 | dec_net_namespaces(ucounts: net->ucounts); |
654 | #ifdef CONFIG_KEYS |
655 | key_remove_domain(domain_tag: net->key_domain); |
656 | #endif |
657 | put_user_ns(ns: net->user_ns); |
658 | net_free(net); |
659 | } |
660 | } |
661 | |
662 | /** |
663 | * net_ns_barrier - wait until concurrent net_cleanup_work is done |
664 | * |
665 | * cleanup_net runs from work queue and will first remove namespaces |
666 | * from the global list, then run net exit functions. |
667 | * |
668 | * Call this in module exit path to make sure that all netns |
669 | * ->exit ops have been invoked before the function is removed. |
670 | */ |
671 | void net_ns_barrier(void) |
672 | { |
673 | down_write(sem: &pernet_ops_rwsem); |
674 | up_write(sem: &pernet_ops_rwsem); |
675 | } |
676 | EXPORT_SYMBOL(net_ns_barrier); |
677 | |
678 | static DECLARE_WORK(net_cleanup_work, cleanup_net); |
679 | |
680 | void __put_net(struct net *net) |
681 | { |
682 | ref_tracker_dir_exit(dir: &net->refcnt_tracker); |
683 | /* Cleanup the network namespace in process context */ |
684 | if (llist_add(new: &net->cleanup_list, head: &cleanup_list)) |
685 | queue_work(wq: netns_wq, work: &net_cleanup_work); |
686 | } |
687 | EXPORT_SYMBOL_GPL(__put_net); |
688 | |
689 | /** |
690 | * get_net_ns - increment the refcount of the network namespace |
691 | * @ns: common namespace (net) |
692 | * |
693 | * Returns the net's common namespace. |
694 | */ |
695 | struct ns_common *get_net_ns(struct ns_common *ns) |
696 | { |
697 | return &get_net(container_of(ns, struct net, ns))->ns; |
698 | } |
699 | EXPORT_SYMBOL_GPL(get_net_ns); |
700 | |
701 | struct net *get_net_ns_by_fd(int fd) |
702 | { |
703 | struct fd f = fdget(fd); |
704 | struct net *net = ERR_PTR(error: -EINVAL); |
705 | |
706 | if (!f.file) |
707 | return ERR_PTR(error: -EBADF); |
708 | |
709 | if (proc_ns_file(file: f.file)) { |
710 | struct ns_common *ns = get_proc_ns(file_inode(f.file)); |
711 | if (ns->ops == &netns_operations) |
712 | net = get_net(container_of(ns, struct net, ns)); |
713 | } |
714 | fdput(fd: f); |
715 | |
716 | return net; |
717 | } |
718 | EXPORT_SYMBOL_GPL(get_net_ns_by_fd); |
719 | #endif |
720 | |
721 | struct net *get_net_ns_by_pid(pid_t pid) |
722 | { |
723 | struct task_struct *tsk; |
724 | struct net *net; |
725 | |
726 | /* Lookup the network namespace */ |
727 | net = ERR_PTR(error: -ESRCH); |
728 | rcu_read_lock(); |
729 | tsk = find_task_by_vpid(nr: pid); |
730 | if (tsk) { |
731 | struct nsproxy *nsproxy; |
732 | task_lock(p: tsk); |
733 | nsproxy = tsk->nsproxy; |
734 | if (nsproxy) |
735 | net = get_net(net: nsproxy->net_ns); |
736 | task_unlock(p: tsk); |
737 | } |
738 | rcu_read_unlock(); |
739 | return net; |
740 | } |
741 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); |
742 | |
743 | static __net_init int net_ns_net_init(struct net *net) |
744 | { |
745 | #ifdef CONFIG_NET_NS |
746 | net->ns.ops = &netns_operations; |
747 | #endif |
748 | return ns_alloc_inum(ns: &net->ns); |
749 | } |
750 | |
751 | static __net_exit void net_ns_net_exit(struct net *net) |
752 | { |
753 | ns_free_inum(&net->ns); |
754 | } |
755 | |
756 | static struct pernet_operations __net_initdata net_ns_ops = { |
757 | .init = net_ns_net_init, |
758 | .exit = net_ns_net_exit, |
759 | }; |
760 | |
761 | static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { |
762 | [NETNSA_NONE] = { .type = NLA_UNSPEC }, |
763 | [NETNSA_NSID] = { .type = NLA_S32 }, |
764 | [NETNSA_PID] = { .type = NLA_U32 }, |
765 | [NETNSA_FD] = { .type = NLA_U32 }, |
766 | [NETNSA_TARGET_NSID] = { .type = NLA_S32 }, |
767 | }; |
768 | |
769 | static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, |
770 | struct netlink_ext_ack *extack) |
771 | { |
772 | struct net *net = sock_net(sk: skb->sk); |
773 | struct nlattr *tb[NETNSA_MAX + 1]; |
774 | struct nlattr *nla; |
775 | struct net *peer; |
776 | int nsid, err; |
777 | |
778 | err = nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg), tb, |
779 | NETNSA_MAX, policy: rtnl_net_policy, extack); |
780 | if (err < 0) |
781 | return err; |
782 | if (!tb[NETNSA_NSID]) { |
783 | NL_SET_ERR_MSG(extack, "nsid is missing" ); |
784 | return -EINVAL; |
785 | } |
786 | nsid = nla_get_s32(nla: tb[NETNSA_NSID]); |
787 | |
788 | if (tb[NETNSA_PID]) { |
789 | peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID])); |
790 | nla = tb[NETNSA_PID]; |
791 | } else if (tb[NETNSA_FD]) { |
792 | peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD])); |
793 | nla = tb[NETNSA_FD]; |
794 | } else { |
795 | NL_SET_ERR_MSG(extack, "Peer netns reference is missing" ); |
796 | return -EINVAL; |
797 | } |
798 | if (IS_ERR(ptr: peer)) { |
799 | NL_SET_BAD_ATTR(extack, nla); |
800 | NL_SET_ERR_MSG(extack, "Peer netns reference is invalid" ); |
801 | return PTR_ERR(ptr: peer); |
802 | } |
803 | |
804 | spin_lock_bh(lock: &net->nsid_lock); |
805 | if (__peernet2id(net, peer) >= 0) { |
806 | spin_unlock_bh(lock: &net->nsid_lock); |
807 | err = -EEXIST; |
808 | NL_SET_BAD_ATTR(extack, nla); |
809 | NL_SET_ERR_MSG(extack, |
810 | "Peer netns already has a nsid assigned" ); |
811 | goto out; |
812 | } |
813 | |
814 | err = alloc_netid(net, peer, reqid: nsid); |
815 | spin_unlock_bh(lock: &net->nsid_lock); |
816 | if (err >= 0) { |
817 | rtnl_net_notifyid(net, RTM_NEWNSID, id: err, NETLINK_CB(skb).portid, |
818 | nlh, GFP_KERNEL); |
819 | err = 0; |
820 | } else if (err == -ENOSPC && nsid >= 0) { |
821 | err = -EEXIST; |
822 | NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); |
823 | NL_SET_ERR_MSG(extack, "The specified nsid is already used" ); |
824 | } |
825 | out: |
826 | put_net(net: peer); |
827 | return err; |
828 | } |
829 | |
830 | static int rtnl_net_get_size(void) |
831 | { |
832 | return NLMSG_ALIGN(sizeof(struct rtgenmsg)) |
833 | + nla_total_size(payload: sizeof(s32)) /* NETNSA_NSID */ |
834 | + nla_total_size(payload: sizeof(s32)) /* NETNSA_CURRENT_NSID */ |
835 | ; |
836 | } |
837 | |
838 | struct net_fill_args { |
839 | u32 portid; |
840 | u32 seq; |
841 | int flags; |
842 | int cmd; |
843 | int nsid; |
844 | bool add_ref; |
845 | int ref_nsid; |
846 | }; |
847 | |
848 | static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args) |
849 | { |
850 | struct nlmsghdr *nlh; |
851 | struct rtgenmsg *rth; |
852 | |
853 | nlh = nlmsg_put(skb, portid: args->portid, seq: args->seq, type: args->cmd, payload: sizeof(*rth), |
854 | flags: args->flags); |
855 | if (!nlh) |
856 | return -EMSGSIZE; |
857 | |
858 | rth = nlmsg_data(nlh); |
859 | rth->rtgen_family = AF_UNSPEC; |
860 | |
861 | if (nla_put_s32(skb, attrtype: NETNSA_NSID, value: args->nsid)) |
862 | goto nla_put_failure; |
863 | |
864 | if (args->add_ref && |
865 | nla_put_s32(skb, attrtype: NETNSA_CURRENT_NSID, value: args->ref_nsid)) |
866 | goto nla_put_failure; |
867 | |
868 | nlmsg_end(skb, nlh); |
869 | return 0; |
870 | |
871 | nla_put_failure: |
872 | nlmsg_cancel(skb, nlh); |
873 | return -EMSGSIZE; |
874 | } |
875 | |
876 | static int rtnl_net_valid_getid_req(struct sk_buff *skb, |
877 | const struct nlmsghdr *nlh, |
878 | struct nlattr **tb, |
879 | struct netlink_ext_ack *extack) |
880 | { |
881 | int i, err; |
882 | |
883 | if (!netlink_strict_get_check(skb)) |
884 | return nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg), |
885 | tb, NETNSA_MAX, policy: rtnl_net_policy, |
886 | extack); |
887 | |
888 | err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb, |
889 | NETNSA_MAX, policy: rtnl_net_policy, |
890 | extack); |
891 | if (err) |
892 | return err; |
893 | |
894 | for (i = 0; i <= NETNSA_MAX; i++) { |
895 | if (!tb[i]) |
896 | continue; |
897 | |
898 | switch (i) { |
899 | case NETNSA_PID: |
900 | case NETNSA_FD: |
901 | case NETNSA_NSID: |
902 | case NETNSA_TARGET_NSID: |
903 | break; |
904 | default: |
905 | NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request" ); |
906 | return -EINVAL; |
907 | } |
908 | } |
909 | |
910 | return 0; |
911 | } |
912 | |
913 | static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, |
914 | struct netlink_ext_ack *extack) |
915 | { |
916 | struct net *net = sock_net(sk: skb->sk); |
917 | struct nlattr *tb[NETNSA_MAX + 1]; |
918 | struct net_fill_args fillargs = { |
919 | .portid = NETLINK_CB(skb).portid, |
920 | .seq = nlh->nlmsg_seq, |
921 | .cmd = RTM_NEWNSID, |
922 | }; |
923 | struct net *peer, *target = net; |
924 | struct nlattr *nla; |
925 | struct sk_buff *msg; |
926 | int err; |
927 | |
928 | err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); |
929 | if (err < 0) |
930 | return err; |
931 | if (tb[NETNSA_PID]) { |
932 | peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID])); |
933 | nla = tb[NETNSA_PID]; |
934 | } else if (tb[NETNSA_FD]) { |
935 | peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD])); |
936 | nla = tb[NETNSA_FD]; |
937 | } else if (tb[NETNSA_NSID]) { |
938 | peer = get_net_ns_by_id(net, nla_get_s32(nla: tb[NETNSA_NSID])); |
939 | if (!peer) |
940 | peer = ERR_PTR(error: -ENOENT); |
941 | nla = tb[NETNSA_NSID]; |
942 | } else { |
943 | NL_SET_ERR_MSG(extack, "Peer netns reference is missing" ); |
944 | return -EINVAL; |
945 | } |
946 | |
947 | if (IS_ERR(ptr: peer)) { |
948 | NL_SET_BAD_ATTR(extack, nla); |
949 | NL_SET_ERR_MSG(extack, "Peer netns reference is invalid" ); |
950 | return PTR_ERR(ptr: peer); |
951 | } |
952 | |
953 | if (tb[NETNSA_TARGET_NSID]) { |
954 | int id = nla_get_s32(nla: tb[NETNSA_TARGET_NSID]); |
955 | |
956 | target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid: id); |
957 | if (IS_ERR(ptr: target)) { |
958 | NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]); |
959 | NL_SET_ERR_MSG(extack, |
960 | "Target netns reference is invalid" ); |
961 | err = PTR_ERR(ptr: target); |
962 | goto out; |
963 | } |
964 | fillargs.add_ref = true; |
965 | fillargs.ref_nsid = peernet2id(net, peer); |
966 | } |
967 | |
968 | msg = nlmsg_new(payload: rtnl_net_get_size(), GFP_KERNEL); |
969 | if (!msg) { |
970 | err = -ENOMEM; |
971 | goto out; |
972 | } |
973 | |
974 | fillargs.nsid = peernet2id(target, peer); |
975 | err = rtnl_net_fill(skb: msg, args: &fillargs); |
976 | if (err < 0) |
977 | goto err_out; |
978 | |
979 | err = rtnl_unicast(skb: msg, net, NETLINK_CB(skb).portid); |
980 | goto out; |
981 | |
982 | err_out: |
983 | nlmsg_free(skb: msg); |
984 | out: |
985 | if (fillargs.add_ref) |
986 | put_net(net: target); |
987 | put_net(net: peer); |
988 | return err; |
989 | } |
990 | |
991 | struct rtnl_net_dump_cb { |
992 | struct net *tgt_net; |
993 | struct net *ref_net; |
994 | struct sk_buff *skb; |
995 | struct net_fill_args fillargs; |
996 | int idx; |
997 | int s_idx; |
998 | }; |
999 | |
1000 | /* Runs in RCU-critical section. */ |
1001 | static int rtnl_net_dumpid_one(int id, void *peer, void *data) |
1002 | { |
1003 | struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; |
1004 | int ret; |
1005 | |
1006 | if (net_cb->idx < net_cb->s_idx) |
1007 | goto cont; |
1008 | |
1009 | net_cb->fillargs.nsid = id; |
1010 | if (net_cb->fillargs.add_ref) |
1011 | net_cb->fillargs.ref_nsid = __peernet2id(net: net_cb->ref_net, peer); |
1012 | ret = rtnl_net_fill(skb: net_cb->skb, args: &net_cb->fillargs); |
1013 | if (ret < 0) |
1014 | return ret; |
1015 | |
1016 | cont: |
1017 | net_cb->idx++; |
1018 | return 0; |
1019 | } |
1020 | |
1021 | static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, |
1022 | struct rtnl_net_dump_cb *net_cb, |
1023 | struct netlink_callback *cb) |
1024 | { |
1025 | struct netlink_ext_ack *extack = cb->extack; |
1026 | struct nlattr *tb[NETNSA_MAX + 1]; |
1027 | int err, i; |
1028 | |
1029 | err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb, |
1030 | NETNSA_MAX, policy: rtnl_net_policy, |
1031 | extack); |
1032 | if (err < 0) |
1033 | return err; |
1034 | |
1035 | for (i = 0; i <= NETNSA_MAX; i++) { |
1036 | if (!tb[i]) |
1037 | continue; |
1038 | |
1039 | if (i == NETNSA_TARGET_NSID) { |
1040 | struct net *net; |
1041 | |
1042 | net = rtnl_get_net_ns_capable(sk, netnsid: nla_get_s32(nla: tb[i])); |
1043 | if (IS_ERR(ptr: net)) { |
1044 | NL_SET_BAD_ATTR(extack, tb[i]); |
1045 | NL_SET_ERR_MSG(extack, |
1046 | "Invalid target network namespace id" ); |
1047 | return PTR_ERR(ptr: net); |
1048 | } |
1049 | net_cb->fillargs.add_ref = true; |
1050 | net_cb->ref_net = net_cb->tgt_net; |
1051 | net_cb->tgt_net = net; |
1052 | } else { |
1053 | NL_SET_BAD_ATTR(extack, tb[i]); |
1054 | NL_SET_ERR_MSG(extack, |
1055 | "Unsupported attribute in dump request" ); |
1056 | return -EINVAL; |
1057 | } |
1058 | } |
1059 | |
1060 | return 0; |
1061 | } |
1062 | |
1063 | static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) |
1064 | { |
1065 | struct rtnl_net_dump_cb net_cb = { |
1066 | .tgt_net = sock_net(sk: skb->sk), |
1067 | .skb = skb, |
1068 | .fillargs = { |
1069 | .portid = NETLINK_CB(cb->skb).portid, |
1070 | .seq = cb->nlh->nlmsg_seq, |
1071 | .flags = NLM_F_MULTI, |
1072 | .cmd = RTM_NEWNSID, |
1073 | }, |
1074 | .idx = 0, |
1075 | .s_idx = cb->args[0], |
1076 | }; |
1077 | int err = 0; |
1078 | |
1079 | if (cb->strict_check) { |
1080 | err = rtnl_valid_dump_net_req(nlh: cb->nlh, sk: skb->sk, net_cb: &net_cb, cb); |
1081 | if (err < 0) |
1082 | goto end; |
1083 | } |
1084 | |
1085 | rcu_read_lock(); |
1086 | idr_for_each(&net_cb.tgt_net->netns_ids, fn: rtnl_net_dumpid_one, data: &net_cb); |
1087 | rcu_read_unlock(); |
1088 | |
1089 | cb->args[0] = net_cb.idx; |
1090 | end: |
1091 | if (net_cb.fillargs.add_ref) |
1092 | put_net(net: net_cb.tgt_net); |
1093 | return err < 0 ? err : skb->len; |
1094 | } |
1095 | |
1096 | static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, |
1097 | struct nlmsghdr *nlh, gfp_t gfp) |
1098 | { |
1099 | struct net_fill_args fillargs = { |
1100 | .portid = portid, |
1101 | .seq = nlh ? nlh->nlmsg_seq : 0, |
1102 | .cmd = cmd, |
1103 | .nsid = id, |
1104 | }; |
1105 | struct sk_buff *msg; |
1106 | int err = -ENOMEM; |
1107 | |
1108 | msg = nlmsg_new(payload: rtnl_net_get_size(), flags: gfp); |
1109 | if (!msg) |
1110 | goto out; |
1111 | |
1112 | err = rtnl_net_fill(skb: msg, args: &fillargs); |
1113 | if (err < 0) |
1114 | goto err_out; |
1115 | |
1116 | rtnl_notify(skb: msg, net, pid: portid, RTNLGRP_NSID, nlh, flags: gfp); |
1117 | return; |
1118 | |
1119 | err_out: |
1120 | nlmsg_free(skb: msg); |
1121 | out: |
1122 | rtnl_set_sk_err(net, RTNLGRP_NSID, error: err); |
1123 | } |
1124 | |
1125 | #ifdef CONFIG_NET_NS |
1126 | static void __init netns_ipv4_struct_check(void) |
1127 | { |
1128 | /* TX readonly hotpath cache lines */ |
1129 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1130 | sysctl_tcp_early_retrans); |
1131 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1132 | sysctl_tcp_tso_win_divisor); |
1133 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1134 | sysctl_tcp_tso_rtt_log); |
1135 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1136 | sysctl_tcp_autocorking); |
1137 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1138 | sysctl_tcp_min_snd_mss); |
1139 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1140 | sysctl_tcp_notsent_lowat); |
1141 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1142 | sysctl_tcp_limit_output_bytes); |
1143 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1144 | sysctl_tcp_min_rtt_wlen); |
1145 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1146 | sysctl_tcp_wmem); |
1147 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, |
1148 | sysctl_ip_fwd_use_pmtu); |
1149 | CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33); |
1150 | |
1151 | /* TXRX readonly hotpath cache lines */ |
1152 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx, |
1153 | sysctl_tcp_moderate_rcvbuf); |
1154 | CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1); |
1155 | |
1156 | /* RX readonly hotpath cache line */ |
1157 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, |
1158 | sysctl_ip_early_demux); |
1159 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, |
1160 | sysctl_tcp_early_demux); |
1161 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, |
1162 | sysctl_tcp_reordering); |
1163 | CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, |
1164 | sysctl_tcp_rmem); |
1165 | CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); |
1166 | } |
1167 | #endif |
1168 | |
1169 | void __init net_ns_init(void) |
1170 | { |
1171 | struct net_generic *ng; |
1172 | |
1173 | #ifdef CONFIG_NET_NS |
1174 | netns_ipv4_struct_check(); |
1175 | net_cachep = kmem_cache_create(name: "net_namespace" , size: sizeof(struct net), |
1176 | SMP_CACHE_BYTES, |
1177 | SLAB_PANIC|SLAB_ACCOUNT, NULL); |
1178 | |
1179 | /* Create workqueue for cleanup */ |
1180 | netns_wq = create_singlethread_workqueue("netns" ); |
1181 | if (!netns_wq) |
1182 | panic(fmt: "Could not create netns workq" ); |
1183 | #endif |
1184 | |
1185 | ng = net_alloc_generic(); |
1186 | if (!ng) |
1187 | panic(fmt: "Could not allocate generic netns" ); |
1188 | |
1189 | rcu_assign_pointer(init_net.gen, ng); |
1190 | |
1191 | #ifdef CONFIG_KEYS |
1192 | init_net.key_domain = &init_net_key_domain; |
1193 | #endif |
1194 | down_write(sem: &pernet_ops_rwsem); |
1195 | preinit_net(net: &init_net); |
1196 | if (setup_net(net: &init_net, user_ns: &init_user_ns)) |
1197 | panic(fmt: "Could not setup the initial network namespace" ); |
1198 | |
1199 | init_net_initialized = true; |
1200 | up_write(sem: &pernet_ops_rwsem); |
1201 | |
1202 | if (register_pernet_subsys(&net_ns_ops)) |
1203 | panic(fmt: "Could not register network namespace subsystems" ); |
1204 | |
1205 | rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, |
1206 | flags: RTNL_FLAG_DOIT_UNLOCKED); |
1207 | rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, |
1208 | flags: RTNL_FLAG_DOIT_UNLOCKED); |
1209 | } |
1210 | |
1211 | static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) |
1212 | { |
1213 | ops_pre_exit_list(ops, net_exit_list); |
1214 | synchronize_rcu(); |
1215 | |
1216 | if (ops->exit_batch_rtnl) { |
1217 | LIST_HEAD(dev_kill_list); |
1218 | |
1219 | rtnl_lock(); |
1220 | ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); |
1221 | unregister_netdevice_many(head: &dev_kill_list); |
1222 | rtnl_unlock(); |
1223 | } |
1224 | ops_exit_list(ops, net_exit_list); |
1225 | |
1226 | ops_free_list(ops, net_exit_list); |
1227 | } |
1228 | |
1229 | #ifdef CONFIG_NET_NS |
1230 | static int __register_pernet_operations(struct list_head *list, |
1231 | struct pernet_operations *ops) |
1232 | { |
1233 | struct net *net; |
1234 | int error; |
1235 | LIST_HEAD(net_exit_list); |
1236 | |
1237 | list_add_tail(new: &ops->list, head: list); |
1238 | if (ops->init || (ops->id && ops->size)) { |
1239 | /* We held write locked pernet_ops_rwsem, and parallel |
1240 | * setup_net() and cleanup_net() are not possible. |
1241 | */ |
1242 | for_each_net(net) { |
1243 | error = ops_init(ops, net); |
1244 | if (error) |
1245 | goto out_undo; |
1246 | list_add_tail(new: &net->exit_list, head: &net_exit_list); |
1247 | } |
1248 | } |
1249 | return 0; |
1250 | |
1251 | out_undo: |
1252 | /* If I have an error cleanup all namespaces I initialized */ |
1253 | list_del(entry: &ops->list); |
1254 | free_exit_list(ops, net_exit_list: &net_exit_list); |
1255 | return error; |
1256 | } |
1257 | |
1258 | static void __unregister_pernet_operations(struct pernet_operations *ops) |
1259 | { |
1260 | struct net *net; |
1261 | LIST_HEAD(net_exit_list); |
1262 | |
1263 | list_del(entry: &ops->list); |
1264 | /* See comment in __register_pernet_operations() */ |
1265 | for_each_net(net) |
1266 | list_add_tail(new: &net->exit_list, head: &net_exit_list); |
1267 | |
1268 | free_exit_list(ops, net_exit_list: &net_exit_list); |
1269 | } |
1270 | |
1271 | #else |
1272 | |
1273 | static int __register_pernet_operations(struct list_head *list, |
1274 | struct pernet_operations *ops) |
1275 | { |
1276 | if (!init_net_initialized) { |
1277 | list_add_tail(&ops->list, list); |
1278 | return 0; |
1279 | } |
1280 | |
1281 | return ops_init(ops, &init_net); |
1282 | } |
1283 | |
1284 | static void __unregister_pernet_operations(struct pernet_operations *ops) |
1285 | { |
1286 | if (!init_net_initialized) { |
1287 | list_del(&ops->list); |
1288 | } else { |
1289 | LIST_HEAD(net_exit_list); |
1290 | list_add(&init_net.exit_list, &net_exit_list); |
1291 | free_exit_list(ops, &net_exit_list); |
1292 | } |
1293 | } |
1294 | |
1295 | #endif /* CONFIG_NET_NS */ |
1296 | |
1297 | static DEFINE_IDA(net_generic_ids); |
1298 | |
1299 | static int register_pernet_operations(struct list_head *list, |
1300 | struct pernet_operations *ops) |
1301 | { |
1302 | int error; |
1303 | |
1304 | if (ops->id) { |
1305 | error = ida_alloc_min(ida: &net_generic_ids, MIN_PERNET_OPS_ID, |
1306 | GFP_KERNEL); |
1307 | if (error < 0) |
1308 | return error; |
1309 | *ops->id = error; |
1310 | max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); |
1311 | } |
1312 | error = __register_pernet_operations(list, ops); |
1313 | if (error) { |
1314 | rcu_barrier(); |
1315 | if (ops->id) |
1316 | ida_free(&net_generic_ids, id: *ops->id); |
1317 | } |
1318 | |
1319 | return error; |
1320 | } |
1321 | |
1322 | static void unregister_pernet_operations(struct pernet_operations *ops) |
1323 | { |
1324 | __unregister_pernet_operations(ops); |
1325 | rcu_barrier(); |
1326 | if (ops->id) |
1327 | ida_free(&net_generic_ids, id: *ops->id); |
1328 | } |
1329 | |
1330 | /** |
1331 | * register_pernet_subsys - register a network namespace subsystem |
1332 | * @ops: pernet operations structure for the subsystem |
1333 | * |
1334 | * Register a subsystem which has init and exit functions |
1335 | * that are called when network namespaces are created and |
1336 | * destroyed respectively. |
1337 | * |
1338 | * When registered all network namespace init functions are |
1339 | * called for every existing network namespace. Allowing kernel |
1340 | * modules to have a race free view of the set of network namespaces. |
1341 | * |
1342 | * When a new network namespace is created all of the init |
1343 | * methods are called in the order in which they were registered. |
1344 | * |
1345 | * When a network namespace is destroyed all of the exit methods |
1346 | * are called in the reverse of the order with which they were |
1347 | * registered. |
1348 | */ |
1349 | int register_pernet_subsys(struct pernet_operations *ops) |
1350 | { |
1351 | int error; |
1352 | down_write(sem: &pernet_ops_rwsem); |
1353 | error = register_pernet_operations(list: first_device, ops); |
1354 | up_write(sem: &pernet_ops_rwsem); |
1355 | return error; |
1356 | } |
1357 | EXPORT_SYMBOL_GPL(register_pernet_subsys); |
1358 | |
1359 | /** |
1360 | * unregister_pernet_subsys - unregister a network namespace subsystem |
1361 | * @ops: pernet operations structure to manipulate |
1362 | * |
1363 | * Remove the pernet operations structure from the list to be |
1364 | * used when network namespaces are created or destroyed. In |
1365 | * addition run the exit method for all existing network |
1366 | * namespaces. |
1367 | */ |
1368 | void unregister_pernet_subsys(struct pernet_operations *ops) |
1369 | { |
1370 | down_write(sem: &pernet_ops_rwsem); |
1371 | unregister_pernet_operations(ops); |
1372 | up_write(sem: &pernet_ops_rwsem); |
1373 | } |
1374 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); |
1375 | |
1376 | /** |
1377 | * register_pernet_device - register a network namespace device |
1378 | * @ops: pernet operations structure for the subsystem |
1379 | * |
1380 | * Register a device which has init and exit functions |
1381 | * that are called when network namespaces are created and |
1382 | * destroyed respectively. |
1383 | * |
1384 | * When registered all network namespace init functions are |
1385 | * called for every existing network namespace. Allowing kernel |
1386 | * modules to have a race free view of the set of network namespaces. |
1387 | * |
1388 | * When a new network namespace is created all of the init |
1389 | * methods are called in the order in which they were registered. |
1390 | * |
1391 | * When a network namespace is destroyed all of the exit methods |
1392 | * are called in the reverse of the order with which they were |
1393 | * registered. |
1394 | */ |
1395 | int register_pernet_device(struct pernet_operations *ops) |
1396 | { |
1397 | int error; |
1398 | down_write(sem: &pernet_ops_rwsem); |
1399 | error = register_pernet_operations(list: &pernet_list, ops); |
1400 | if (!error && (first_device == &pernet_list)) |
1401 | first_device = &ops->list; |
1402 | up_write(sem: &pernet_ops_rwsem); |
1403 | return error; |
1404 | } |
1405 | EXPORT_SYMBOL_GPL(register_pernet_device); |
1406 | |
1407 | /** |
1408 | * unregister_pernet_device - unregister a network namespace netdevice |
1409 | * @ops: pernet operations structure to manipulate |
1410 | * |
1411 | * Remove the pernet operations structure from the list to be |
1412 | * used when network namespaces are created or destroyed. In |
1413 | * addition run the exit method for all existing network |
1414 | * namespaces. |
1415 | */ |
1416 | void unregister_pernet_device(struct pernet_operations *ops) |
1417 | { |
1418 | down_write(sem: &pernet_ops_rwsem); |
1419 | if (&ops->list == first_device) |
1420 | first_device = first_device->next; |
1421 | unregister_pernet_operations(ops); |
1422 | up_write(sem: &pernet_ops_rwsem); |
1423 | } |
1424 | EXPORT_SYMBOL_GPL(unregister_pernet_device); |
1425 | |
1426 | #ifdef CONFIG_NET_NS |
1427 | static struct ns_common *netns_get(struct task_struct *task) |
1428 | { |
1429 | struct net *net = NULL; |
1430 | struct nsproxy *nsproxy; |
1431 | |
1432 | task_lock(p: task); |
1433 | nsproxy = task->nsproxy; |
1434 | if (nsproxy) |
1435 | net = get_net(net: nsproxy->net_ns); |
1436 | task_unlock(p: task); |
1437 | |
1438 | return net ? &net->ns : NULL; |
1439 | } |
1440 | |
1441 | static inline struct net *to_net_ns(struct ns_common *ns) |
1442 | { |
1443 | return container_of(ns, struct net, ns); |
1444 | } |
1445 | |
1446 | static void netns_put(struct ns_common *ns) |
1447 | { |
1448 | put_net(net: to_net_ns(ns)); |
1449 | } |
1450 | |
1451 | static int netns_install(struct nsset *nsset, struct ns_common *ns) |
1452 | { |
1453 | struct nsproxy *nsproxy = nsset->nsproxy; |
1454 | struct net *net = to_net_ns(ns); |
1455 | |
1456 | if (!ns_capable(ns: net->user_ns, CAP_SYS_ADMIN) || |
1457 | !ns_capable(ns: nsset->cred->user_ns, CAP_SYS_ADMIN)) |
1458 | return -EPERM; |
1459 | |
1460 | put_net(net: nsproxy->net_ns); |
1461 | nsproxy->net_ns = get_net(net); |
1462 | return 0; |
1463 | } |
1464 | |
1465 | static struct user_namespace *netns_owner(struct ns_common *ns) |
1466 | { |
1467 | return to_net_ns(ns)->user_ns; |
1468 | } |
1469 | |
1470 | const struct proc_ns_operations netns_operations = { |
1471 | .name = "net" , |
1472 | .type = CLONE_NEWNET, |
1473 | .get = netns_get, |
1474 | .put = netns_put, |
1475 | .install = netns_install, |
1476 | .owner = netns_owner, |
1477 | }; |
1478 | #endif |
1479 | |