net_namespace.c source code [linux/net/core/net_namespace.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4	#include <linux/workqueue.h>
5	#include <linux/rtnetlink.h>
6	#include <linux/cache.h>
7	#include <linux/slab.h>
8	#include <linux/list.h>
9	#include <linux/delay.h>
10	#include <linux/sched.h>
11	#include <linux/idr.h>
12	#include <linux/rculist.h>
13	#include <linux/nsproxy.h>
14	#include <linux/fs.h>
15	#include <linux/proc_ns.h>
16	#include <linux/file.h>
17	#include <linux/export.h>
18	#include <linux/user_namespace.h>
19	#include <linux/net_namespace.h>
20	#include <linux/sched/task.h>
21	#include <linux/uidgid.h>
22	#include <linux/cookie.h>
23	#include <linux/proc_fs.h>
24
25	#include <net/sock.h>
26	#include <net/netlink.h>
27	#include <net/net_namespace.h>
28	#include <net/netns/generic.h>
29
30	/*
31	* Our network namespace constructor/destructor lists
32	*/
33
34	static LIST_HEAD(pernet_list);
35	static struct list_head *first_device = &pernet_list;
36
37	LIST_HEAD(net_namespace_list);
38	EXPORT_SYMBOL_GPL(net_namespace_list);
39
40	/ Protects net_namespace_list. Nests iside rtnl_lock() /
41	DECLARE_RWSEM(net_rwsem);
42	EXPORT_SYMBOL_GPL(net_rwsem);
43
44	#ifdef CONFIG_KEYS
45	static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(`1`) };
46	#endif
47
48	struct net init_net;
49	EXPORT_SYMBOL(init_net);
50
51	static bool init_net_initialized;
52	/*
53	* pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
54	* init_net_initialized and first_device pointer.
55	* This is internal net namespace object. Please, don't use it
56	* outside.
57	*/
58	DECLARE_RWSEM(pernet_ops_rwsem);
59	EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
60
61	#define MIN_PERNET_OPS_ID \
62	((sizeof(struct net_generic) + sizeof(void ) - 1) / sizeof(void ))
63
64	#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
65
66	static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
67
68	DEFINE_COOKIE(net_cookie);
69
70	static struct net_generic net_alloc_generic(void*)
71	{
72	struct net_generic *ng;
73	unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
74
75	ng = kzalloc(size: generic_size, GFP_KERNEL);
76	if (ng)
77	ng->s.len = max_gen_ptrs;
78
79	return ng;
80	}
81
82	static int net_assign_generic(struct net net, unsigned* int id, void *data)
83	{
84	struct net_generic ng, old_ng;
85
86	BUG_ON(id < MIN_PERNET_OPS_ID);
87
88	old_ng = rcu_dereference_protected(net->gen,
89	lockdep_is_held(&pernet_ops_rwsem));
90	if (old_ng->s.len > id) {
91	old_ng->ptr[id] = data;
92	return `0`;
93	}
94
95	ng = net_alloc_generic();
96	if (!ng)
97	return -ENOMEM;
98
99	/*
100	* Some synchronisation notes:
101	*
102	* The net_generic explores the net->gen array inside rcu
103	* read section. Besides once set the net->gen->ptr[x]
104	* pointer never changes (see rules in netns/generic.h).
105	*
106	* That said, we simply duplicate this array and schedule
107	* the old copy for kfree after a grace period.
108	*/
109
110	memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
111	(old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
112	ng->ptr[id] = data;
113
114	rcu_assign_pointer(net->gen, ng);
115	kfree_rcu(old_ng, s.rcu);
116	return `0`;
117	}
118
119	static int ops_init(const struct pernet_operations ops, struct* net *net)
120	{
121	struct net_generic *ng;
122	int err = -ENOMEM;
123	void *data = NULL;
124
125	if (ops->id && ops->size) {
126	data = kzalloc(size: ops->size, GFP_KERNEL);
127	if (!data)
128	goto out;
129
130	err = net_assign_generic(net, id: *ops->id, data);
131	if (err)
132	goto cleanup;
133	}
134	err = `0`;
135	if (ops->init)
136	err = ops->init(net);
137	if (!err)
138	return `0`;
139
140	if (ops->id && ops->size) {
141	ng = rcu_dereference_protected(net->gen,
142	lockdep_is_held(&pernet_ops_rwsem));
143	ng->ptr[*ops->id] = NULL;
144	}
145
146	cleanup:
147	kfree(objp: data);
148
149	out:
150	return err;
151	}
152
153	static void ops_pre_exit_list(const struct pernet_operations *ops,
154	struct list_head *net_exit_list)
155	{
156	struct net *net;
157
158	if (ops->pre_exit) {
159	list_for_each_entry(net, net_exit_list, exit_list)
160	ops->pre_exit(net);
161	}
162	}
163
164	static void ops_exit_list(const struct pernet_operations *ops,
165	struct list_head *net_exit_list)
166	{
167	struct net *net;
168	if (ops->exit) {
169	list_for_each_entry(net, net_exit_list, exit_list) {
170	ops->exit(net);
171	cond_resched();
172	}
173	}
174	if (ops->exit_batch)
175	ops->exit_batch(net_exit_list);
176	}
177
178	static void ops_free_list(const struct pernet_operations *ops,
179	struct list_head *net_exit_list)
180	{
181	struct net *net;
182	if (ops->size && ops->id) {
183	list_for_each_entry(net, net_exit_list, exit_list)
184	kfree(objp: net_generic(net, id: *ops->id));
185	}
186	}
187
188	/ should be called with nsid_lock held /
189	static int alloc_netid(struct net net, struct* net peer, int* reqid)
190	{
191	int min = `0`, max = `0`;
192
193	if (reqid >= `0`) {
194	min = reqid;
195	max = reqid + `1`;
196	}
197
198	return idr_alloc(&net->netns_ids, ptr: peer, start: min, end: max, GFP_ATOMIC);
199	}
200
201	/ This function is used by idr_for_each(). If net is equal to peer, the*
202	* function returns the id so that idr_for_each() stops. Because we cannot
203	* returns the id 0 (idr_for_each() will not stop), we return the magic value
204	* NET_ID_ZERO (-1) for it.
205	*/
206	#define NET_ID_ZERO -1
207	static int net_eq_idr(int id, void net, void* *peer)
208	{
209	if (net_eq(net1: net, net2: peer))
210	return id ? : NET_ID_ZERO;
211	return `0`;
212	}
213
214	/ Must be called from RCU-critical section or with nsid_lock held /
215	static int __peernet2id(const struct net net, struct* net *peer)
216	{
217	int id = idr_for_each(&net->netns_ids, fn: net_eq_idr, data: peer);
218
219	/ Magic value for id 0. /
220	if (id == NET_ID_ZERO)
221	return `0`;
222	if (id > `0`)
223	return id;
224
225	return NETNSA_NSID_NOT_ASSIGNED;
226	}
227
228	static void rtnl_net_notifyid(struct net net, int* cmd, int id, u32 portid,
229	struct nlmsghdr *nlh, gfp_t gfp);
230	/ This function returns the id of a peer netns. If no id is assigned, one will*
231	* be allocated and returned.
232	*/
233	int peernet2id_alloc(struct net net, struct* net *peer, gfp_t gfp)
234	{
235	int id;
236
237	if (refcount_read(r: &net->ns.count) == `0`)
238	return NETNSA_NSID_NOT_ASSIGNED;
239
240	spin_lock_bh(lock: &net->nsid_lock);
241	id = __peernet2id(net, peer);
242	if (id >= `0`) {
243	spin_unlock_bh(lock: &net->nsid_lock);
244	return id;
245	}
246
247	/ When peer is obtained from RCU lists, we may race with*
248	* its cleanup. Check whether it's alive, and this guarantees
249	* we never hash a peer back to net->netns_ids, after it has
250	* just been idr_remove()'d from there in cleanup_net().
251	*/
252	if (!maybe_get_net(net: peer)) {
253	spin_unlock_bh(lock: &net->nsid_lock);
254	return NETNSA_NSID_NOT_ASSIGNED;
255	}
256
257	id = alloc_netid(net, peer, reqid: -`1`);
258	spin_unlock_bh(lock: &net->nsid_lock);
259
260	put_net(net: peer);
261	if (id < `0`)
262	return NETNSA_NSID_NOT_ASSIGNED;
263
264	rtnl_net_notifyid(net, RTM_NEWNSID, id, portid: `0`, NULL, gfp);
265
266	return id;
267	}
268	EXPORT_SYMBOL_GPL(peernet2id_alloc);
269
270	/ This function returns, if assigned, the id of a peer netns. /
271	int peernet2id(const struct net net, struct* net *peer)
272	{
273	int id;
274
275	rcu_read_lock();
276	id = __peernet2id(net, peer);
277	rcu_read_unlock();
278
279	return id;
280	}
281	EXPORT_SYMBOL(peernet2id);
282
283	/ This function returns true is the peer netns has an id assigned into the*
284	* current netns.
285	*/
286	bool peernet_has_id(const struct net net, struct* net *peer)
287	{
288	return peernet2id(net, peer) >= `0`;
289	}
290
291	struct net get_net_ns_by_id(const* struct net net, int* id)
292	{
293	struct net *peer;
294
295	if (id < `0`)
296	return NULL;
297
298	rcu_read_lock();
299	peer = idr_find(&net->netns_ids, id);
300	if (peer)
301	peer = maybe_get_net(net: peer);
302	rcu_read_unlock();
303
304	return peer;
305	}
306	EXPORT_SYMBOL_GPL(get_net_ns_by_id);
307
308	/ init code that must occur even if setup_net() is not called. /
309	static __net_init void preinit_net(struct net *net)
310	{
311	ref_tracker_dir_init(dir: &net->notrefcnt_tracker, quarantine_count: `128`, name: "net notrefcnt");
312	}
313
314	/*
315	* setup_net runs the initializers for the network namespace object.
316	*/
317	static __net_init int setup_net(struct net net, struct* user_namespace *user_ns)
318	{
319	/ Must be called with pernet_ops_rwsem held /
320	const struct pernet_operations ops, saved_ops;
321	LIST_HEAD(net_exit_list);
322	LIST_HEAD(dev_kill_list);
323	int error = `0`;
324
325	refcount_set(r: &net->ns.count, n: `1`);
326	ref_tracker_dir_init(dir: &net->refcnt_tracker, quarantine_count: `128`, name: "net refcnt");
327
328	refcount_set(r: &net->passive, n: `1`);
329	get_random_bytes(buf: &net->hash_mix, len: sizeof(u32));
330	preempt_disable();
331	net->net_cookie = gen_cookie_next(gc: &net_cookie);
332	preempt_enable();
333	net->dev_base_seq = `1`;
334	net->user_ns = user_ns;
335	idr_init(idr: &net->netns_ids);
336	spin_lock_init(&net->nsid_lock);
337	mutex_init(&net->ipv4.ra_mutex);
338
339	list_for_each_entry(ops, &pernet_list, list) {
340	error = ops_init(ops, net);
341	if (error < `0`)
342	goto out_undo;
343	}
344	down_write(sem: &net_rwsem);
345	list_add_tail_rcu(new: &net->list, head: &net_namespace_list);
346	up_write(sem: &net_rwsem);
347	out:
348	return error;
349
350	out_undo:
351	/ Walk through the list backwards calling the exit functions*
352	* for the pernet modules whose init functions did not fail.
353	*/
354	list_add(new: &net->exit_list, head: &net_exit_list);
355	saved_ops = ops;
356	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
357	ops_pre_exit_list(ops, net_exit_list: &net_exit_list);
358
359	synchronize_rcu();
360
361	ops = saved_ops;
362	rtnl_lock();
363	list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
364	if (ops->exit_batch_rtnl)
365	ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
366	}
367	unregister_netdevice_many(head: &dev_kill_list);
368	rtnl_unlock();
369
370	ops = saved_ops;
371	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
372	ops_exit_list(ops, net_exit_list: &net_exit_list);
373
374	ops = saved_ops;
375	list_for_each_entry_continue_reverse(ops, &pernet_list, list)
376	ops_free_list(ops, net_exit_list: &net_exit_list);
377
378	rcu_barrier();
379	goto out;
380	}
381
382	static int __net_init net_defaults_init_net(struct net *net)
383	{
384	net->core.sysctl_somaxconn = SOMAXCONN;
385	/ Limits per socket sk_omem_alloc usage.*
386	* TCP zerocopy regular usage needs 128 KB.
387	*/
388	net->core.sysctl_optmem_max = `128` * `1024`;
389	net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
390
391	return `0`;
392	}
393
394	static struct pernet_operations net_defaults_ops = {
395	.init = net_defaults_init_net,
396	};
397
398	static __init int net_defaults_init(void)
399	{
400	if (register_pernet_subsys(&net_defaults_ops))
401	panic(fmt: "Cannot initialize net default settings");
402
403	return `0`;
404	}
405
406	core_initcall(net_defaults_init);
407
408	#ifdef CONFIG_NET_NS
409	static struct ucounts inc_net_namespaces(struct* user_namespace *ns)
410	{
411	return inc_ucount(ns, current_euid(), type: UCOUNT_NET_NAMESPACES);
412	}
413
414	static void dec_net_namespaces(struct ucounts *ucounts)
415	{
416	dec_ucount(ucounts, type: UCOUNT_NET_NAMESPACES);
417	}
418
419	static struct kmem_cache *net_cachep __ro_after_init;
420	static struct workqueue_struct *netns_wq;
421
422	static struct net net_alloc(void*)
423	{
424	struct net *net = NULL;
425	struct net_generic *ng;
426
427	ng = net_alloc_generic();
428	if (!ng)
429	goto out;
430
431	net = kmem_cache_zalloc(k: net_cachep, GFP_KERNEL);
432	if (!net)
433	goto out_free;
434
435	#ifdef CONFIG_KEYS
436	net->key_domain = kzalloc(size: sizeof(struct key_tag), GFP_KERNEL);
437	if (!net->key_domain)
438	goto out_free_2;
439	refcount_set(r: &net->key_domain->usage, n: `1`);
440	#endif
441
442	rcu_assign_pointer(net->gen, ng);
443	out:
444	return net;
445
446	#ifdef CONFIG_KEYS
447	out_free_2:
448	kmem_cache_free(s: net_cachep, objp: net);
449	net = NULL;
450	#endif
451	out_free:
452	kfree(objp: ng);
453	goto out;
454	}
455
456	static void net_free(struct net *net)
457	{
458	if (refcount_dec_and_test(r: &net->passive)) {
459	kfree(rcu_access_pointer(net->gen));
460
461	/ There should not be any trackers left there. /
462	ref_tracker_dir_exit(dir: &net->notrefcnt_tracker);
463
464	kmem_cache_free(s: net_cachep, objp: net);
465	}
466	}
467
468	void net_drop_ns(void *p)
469	{
470	struct net net = (struct* net *)p;
471
472	if (net)
473	net_free(net);
474	}
475
476	struct net copy_net_ns(unsigned* long flags,
477	struct user_namespace user_ns, struct* net *old_net)
478	{
479	struct ucounts *ucounts;
480	struct net *net;
481	int rv;
482
483	if (!(flags & CLONE_NEWNET))
484	return get_net(net: old_net);
485
486	ucounts = inc_net_namespaces(ns: user_ns);
487	if (!ucounts)
488	return ERR_PTR(error: -ENOSPC);
489
490	net = net_alloc();
491	if (!net) {
492	rv = -ENOMEM;
493	goto dec_ucounts;
494	}
495
496	preinit_net(net);
497	refcount_set(r: &net->passive, n: `1`);
498	net->ucounts = ucounts;
499	get_user_ns(ns: user_ns);
500
501	rv = down_read_killable(sem: &pernet_ops_rwsem);
502	if (rv < `0`)
503	goto put_userns;
504
505	rv = setup_net(net, user_ns);
506
507	up_read(sem: &pernet_ops_rwsem);
508
509	if (rv < `0`) {
510	put_userns:
511	#ifdef CONFIG_KEYS
512	key_remove_domain(domain_tag: net->key_domain);
513	#endif
514	put_user_ns(ns: user_ns);
515	net_free(net);
516	dec_ucounts:
517	dec_net_namespaces(ucounts);
518	return ERR_PTR(error: rv);
519	}
520	return net;
521	}
522
523	/**
524	* net_ns_get_ownership - get sysfs ownership data for @net
525	* @net: network namespace in question (can be NULL)
526	* @uid: kernel user ID for sysfs objects
527	* @gid: kernel group ID for sysfs objects
528	*
529	* Returns the uid/gid pair of root in the user namespace associated with the
530	* given network namespace.
531	*/
532	void net_ns_get_ownership(const struct net net, kuid_t uid, kgid_t *gid)
533	{
534	if (net) {
535	kuid_t ns_root_uid = make_kuid(from: net->user_ns, uid: `0`);
536	kgid_t ns_root_gid = make_kgid(from: net->user_ns, gid: `0`);
537
538	if (uid_valid(uid: ns_root_uid))
539	*uid = ns_root_uid;
540
541	if (gid_valid(gid: ns_root_gid))
542	*gid = ns_root_gid;
543	} else {
544	*uid = GLOBAL_ROOT_UID;
545	*gid = GLOBAL_ROOT_GID;
546	}
547	}
548	EXPORT_SYMBOL_GPL(net_ns_get_ownership);
549
550	static void unhash_nsid(struct net net, struct* net *last)
551	{
552	struct net *tmp;
553	/ This function is only called from cleanup_net() work,*
554	* and this work is the only process, that may delete
555	* a net from net_namespace_list. So, when the below
556	* is executing, the list may only grow. Thus, we do not
557	* use for_each_net_rcu() or net_rwsem.
558	*/
559	for_each_net(tmp) {
560	int id;
561
562	spin_lock_bh(lock: &tmp->nsid_lock);
563	id = __peernet2id(net: tmp, peer: net);
564	if (id >= `0`)
565	idr_remove(&tmp->netns_ids, id);
566	spin_unlock_bh(lock: &tmp->nsid_lock);
567	if (id >= `0`)
568	rtnl_net_notifyid(net: tmp, RTM_DELNSID, id, portid: `0`, NULL,
569	GFP_KERNEL);
570	if (tmp == last)
571	break;
572	}
573	spin_lock_bh(lock: &net->nsid_lock);
574	idr_destroy(&net->netns_ids);
575	spin_unlock_bh(lock: &net->nsid_lock);
576	}
577
578	static LLIST_HEAD(cleanup_list);
579
580	static void cleanup_net(struct work_struct *work)
581	{
582	const struct pernet_operations *ops;
583	struct net net, tmp, *last;
584	struct llist_node *net_kill_list;
585	LIST_HEAD(net_exit_list);
586	LIST_HEAD(dev_kill_list);
587
588	/ Atomically snapshot the list of namespaces to cleanup /
589	net_kill_list = llist_del_all(head: &cleanup_list);
590
591	down_read(sem: &pernet_ops_rwsem);
592
593	/ Don't let anyone else find us. /
594	down_write(sem: &net_rwsem);
595	llist_for_each_entry(net, net_kill_list, cleanup_list)
596	list_del_rcu(entry: &net->list);
597	/ Cache last net. After we unlock rtnl, no one new net*
598	* added to net_namespace_list can assign nsid pointer
599	* to a net from net_kill_list (see peernet2id_alloc()).
600	* So, we skip them in unhash_nsid().
601	*
602	* Note, that unhash_nsid() does not delete nsid links
603	* between net_kill_list's nets, as they've already
604	* deleted from net_namespace_list. But, this would be
605	* useless anyway, as netns_ids are destroyed there.
606	*/
607	last = list_last_entry(&net_namespace_list, struct net, list);
608	up_write(sem: &net_rwsem);
609
610	llist_for_each_entry(net, net_kill_list, cleanup_list) {
611	unhash_nsid(net, last);
612	list_add_tail(new: &net->exit_list, head: &net_exit_list);
613	}
614
615	/ Run all of the network namespace pre_exit methods /
616	list_for_each_entry_reverse(ops, &pernet_list, list)
617	ops_pre_exit_list(ops, net_exit_list: &net_exit_list);
618
619	/*
620	* Another CPU might be rcu-iterating the list, wait for it.
621	* This needs to be before calling the exit() notifiers, so
622	* the rcu_barrier() below isn't sufficient alone.
623	* Also the pre_exit() and exit() methods need this barrier.
624	*/
625	synchronize_rcu_expedited();
626
627	rtnl_lock();
628	list_for_each_entry_reverse(ops, &pernet_list, list) {
629	if (ops->exit_batch_rtnl)
630	ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
631	}
632	unregister_netdevice_many(head: &dev_kill_list);
633	rtnl_unlock();
634
635	/ Run all of the network namespace exit methods /
636	list_for_each_entry_reverse(ops, &pernet_list, list)
637	ops_exit_list(ops, net_exit_list: &net_exit_list);
638
639	/ Free the net generic variables /
640	list_for_each_entry_reverse(ops, &pernet_list, list)
641	ops_free_list(ops, net_exit_list: &net_exit_list);
642
643	up_read(sem: &pernet_ops_rwsem);
644
645	/ Ensure there are no outstanding rcu callbacks using this*
646	* network namespace.
647	*/
648	rcu_barrier();
649
650	/ Finally it is safe to free my network namespace structure /
651	list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
652	list_del_init(entry: &net->exit_list);
653	dec_net_namespaces(ucounts: net->ucounts);
654	#ifdef CONFIG_KEYS
655	key_remove_domain(domain_tag: net->key_domain);
656	#endif
657	put_user_ns(ns: net->user_ns);
658	net_free(net);
659	}
660	}
661
662	/**
663	* net_ns_barrier - wait until concurrent net_cleanup_work is done
664	*
665	* cleanup_net runs from work queue and will first remove namespaces
666	* from the global list, then run net exit functions.
667	*
668	* Call this in module exit path to make sure that all netns
669	* ->exit ops have been invoked before the function is removed.
670	*/
671	void net_ns_barrier(void)
672	{
673	down_write(sem: &pernet_ops_rwsem);
674	up_write(sem: &pernet_ops_rwsem);
675	}
676	EXPORT_SYMBOL(net_ns_barrier);
677
678	static DECLARE_WORK(net_cleanup_work, cleanup_net);
679
680	void __put_net(struct net *net)
681	{
682	ref_tracker_dir_exit(dir: &net->refcnt_tracker);
683	/ Cleanup the network namespace in process context /
684	if (llist_add(new: &net->cleanup_list, head: &cleanup_list))
685	queue_work(wq: netns_wq, work: &net_cleanup_work);
686	}
687	EXPORT_SYMBOL_GPL(__put_net);
688
689	/**
690	* get_net_ns - increment the refcount of the network namespace
691	* @ns: common namespace (net)
692	*
693	* Returns the net's common namespace.
694	*/
695	struct ns_common get_net_ns(struct* ns_common *ns)
696	{
697	return &get_net(container_of(ns, struct net, ns))->ns;
698	}
699	EXPORT_SYMBOL_GPL(get_net_ns);
700
701	struct net get_net_ns_by_fd(int* fd)
702	{
703	struct fd f = fdget(fd);
704	struct net *net = ERR_PTR(error: -EINVAL);
705
706	if (!f.file)
707	return ERR_PTR(error: -EBADF);
708
709	if (proc_ns_file(file: f.file)) {
710	struct ns_common *ns = get_proc_ns(file_inode(f.file));
711	if (ns->ops == &netns_operations)
712	net = get_net(container_of(ns, struct net, ns));
713	}
714	fdput(fd: f);
715
716	return net;
717	}
718	EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
719	#endif
720
721	struct net *get_net_ns_by_pid(pid_t pid)
722	{
723	struct task_struct *tsk;
724	struct net *net;
725
726	/ Lookup the network namespace /
727	net = ERR_PTR(error: -ESRCH);
728	rcu_read_lock();
729	tsk = find_task_by_vpid(nr: pid);
730	if (tsk) {
731	struct nsproxy *nsproxy;
732	task_lock(p: tsk);
733	nsproxy = tsk->nsproxy;
734	if (nsproxy)
735	net = get_net(net: nsproxy->net_ns);
736	task_unlock(p: tsk);
737	}
738	rcu_read_unlock();
739	return net;
740	}
741	EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
742
743	static __net_init int net_ns_net_init(struct net *net)
744	{
745	#ifdef CONFIG_NET_NS
746	net->ns.ops = &netns_operations;
747	#endif
748	return ns_alloc_inum(ns: &net->ns);
749	}
750
751	static __net_exit void net_ns_net_exit(struct net *net)
752	{
753	ns_free_inum(&net->ns);
754	}
755
756	static struct pernet_operations __net_initdata net_ns_ops = {
757	.init = net_ns_net_init,
758	.exit = net_ns_net_exit,
759	};
760
761	static const struct nla_policy rtnl_net_policy[NETNSA_MAX + `1`] = {
762	[NETNSA_NONE] = { .type = NLA_UNSPEC },
763	[NETNSA_NSID] = { .type = NLA_S32 },
764	[NETNSA_PID] = { .type = NLA_U32 },
765	[NETNSA_FD] = { .type = NLA_U32 },
766	[NETNSA_TARGET_NSID] = { .type = NLA_S32 },
767	};
768
769	static int rtnl_net_newid(struct sk_buff skb, struct* nlmsghdr *nlh,
770	struct netlink_ext_ack *extack)
771	{
772	struct net *net = sock_net(sk: skb->sk);
773	struct nlattr *tb[NETNSA_MAX + `1`];
774	struct nlattr *nla;
775	struct net *peer;
776	int nsid, err;
777
778	err = nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg), tb,
779	NETNSA_MAX, policy: rtnl_net_policy, extack);
780	if (err < `0`)
781	return err;
782	if (!tb[NETNSA_NSID]) {
783	NL_SET_ERR_MSG(extack, "nsid is missing");
784	return -EINVAL;
785	}
786	nsid = nla_get_s32(nla: tb[NETNSA_NSID]);
787
788	if (tb[NETNSA_PID]) {
789	peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID]));
790	nla = tb[NETNSA_PID];
791	} else if (tb[NETNSA_FD]) {
792	peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD]));
793	nla = tb[NETNSA_FD];
794	} else {
795	NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
796	return -EINVAL;
797	}
798	if (IS_ERR(ptr: peer)) {
799	NL_SET_BAD_ATTR(extack, nla);
800	NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
801	return PTR_ERR(ptr: peer);
802	}
803
804	spin_lock_bh(lock: &net->nsid_lock);
805	if (__peernet2id(net, peer) >= `0`) {
806	spin_unlock_bh(lock: &net->nsid_lock);
807	err = -EEXIST;
808	NL_SET_BAD_ATTR(extack, nla);
809	NL_SET_ERR_MSG(extack,
810	"Peer netns already has a nsid assigned");
811	goto out;
812	}
813
814	err = alloc_netid(net, peer, reqid: nsid);
815	spin_unlock_bh(lock: &net->nsid_lock);
816	if (err >= `0`) {
817	rtnl_net_notifyid(net, RTM_NEWNSID, id: err, NETLINK_CB(skb).portid,
818	nlh, GFP_KERNEL);
819	err = `0`;
820	} else if (err == -ENOSPC && nsid >= `0`) {
821	err = -EEXIST;
822	NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
823	NL_SET_ERR_MSG(extack, "The specified nsid is already used");
824	}
825	out:
826	put_net(net: peer);
827	return err;
828	}
829
830	static int rtnl_net_get_size(void)
831	{
832	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
833	+ nla_total_size(payload: sizeof(s32)) / NETNSA_NSID /
834	+ nla_total_size(payload: sizeof(s32)) / NETNSA_CURRENT_NSID /
835	;
836	}
837
838	struct net_fill_args {
839	u32 portid;
840	u32 seq;
841	int flags;
842	int cmd;
843	int nsid;
844	bool add_ref;
845	int ref_nsid;
846	};
847
848	static int rtnl_net_fill(struct sk_buff skb, struct* net_fill_args *args)
849	{
850	struct nlmsghdr *nlh;
851	struct rtgenmsg *rth;
852
853	nlh = nlmsg_put(skb, portid: args->portid, seq: args->seq, type: args->cmd, payload: sizeof(*rth),
854	flags: args->flags);
855	if (!nlh)
856	return -EMSGSIZE;
857
858	rth = nlmsg_data(nlh);
859	rth->rtgen_family = AF_UNSPEC;
860
861	if (nla_put_s32(skb, attrtype: NETNSA_NSID, value: args->nsid))
862	goto nla_put_failure;
863
864	if (args->add_ref &&
865	nla_put_s32(skb, attrtype: NETNSA_CURRENT_NSID, value: args->ref_nsid))
866	goto nla_put_failure;
867
868	nlmsg_end(skb, nlh);
869	return `0`;
870
871	nla_put_failure:
872	nlmsg_cancel(skb, nlh);
873	return -EMSGSIZE;
874	}
875
876	static int rtnl_net_valid_getid_req(struct sk_buff *skb,
877	const struct nlmsghdr *nlh,
878	struct nlattr **tb,
879	struct netlink_ext_ack *extack)
880	{
881	int i, err;
882
883	if (!netlink_strict_get_check(skb))
884	return nlmsg_parse_deprecated(nlh, hdrlen: sizeof(struct rtgenmsg),
885	tb, NETNSA_MAX, policy: rtnl_net_policy,
886	extack);
887
888	err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb,
889	NETNSA_MAX, policy: rtnl_net_policy,
890	extack);
891	if (err)
892	return err;
893
894	for (i = `0`; i <= NETNSA_MAX; i++) {
895	if (!tb[i])
896	continue;
897
898	switch (i) {
899	case NETNSA_PID:
900	case NETNSA_FD:
901	case NETNSA_NSID:
902	case NETNSA_TARGET_NSID:
903	break;
904	default:
905	NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
906	return -EINVAL;
907	}
908	}
909
910	return `0`;
911	}
912
913	static int rtnl_net_getid(struct sk_buff skb, struct* nlmsghdr *nlh,
914	struct netlink_ext_ack *extack)
915	{
916	struct net *net = sock_net(sk: skb->sk);
917	struct nlattr *tb[NETNSA_MAX + `1`];
918	struct net_fill_args fillargs = {
919	.portid = NETLINK_CB(skb).portid,
920	.seq = nlh->nlmsg_seq,
921	.cmd = RTM_NEWNSID,
922	};
923	struct net peer, target = net;
924	struct nlattr *nla;
925	struct sk_buff *msg;
926	int err;
927
928	err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
929	if (err < `0`)
930	return err;
931	if (tb[NETNSA_PID]) {
932	peer = get_net_ns_by_pid(nla_get_u32(nla: tb[NETNSA_PID]));
933	nla = tb[NETNSA_PID];
934	} else if (tb[NETNSA_FD]) {
935	peer = get_net_ns_by_fd(nla_get_u32(nla: tb[NETNSA_FD]));
936	nla = tb[NETNSA_FD];
937	} else if (tb[NETNSA_NSID]) {
938	peer = get_net_ns_by_id(net, nla_get_s32(nla: tb[NETNSA_NSID]));
939	if (!peer)
940	peer = ERR_PTR(error: -ENOENT);
941	nla = tb[NETNSA_NSID];
942	} else {
943	NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
944	return -EINVAL;
945	}
946
947	if (IS_ERR(ptr: peer)) {
948	NL_SET_BAD_ATTR(extack, nla);
949	NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
950	return PTR_ERR(ptr: peer);
951	}
952
953	if (tb[NETNSA_TARGET_NSID]) {
954	int id = nla_get_s32(nla: tb[NETNSA_TARGET_NSID]);
955
956	target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid: id);
957	if (IS_ERR(ptr: target)) {
958	NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
959	NL_SET_ERR_MSG(extack,
960	"Target netns reference is invalid");
961	err = PTR_ERR(ptr: target);
962	goto out;
963	}
964	fillargs.add_ref = true;
965	fillargs.ref_nsid = peernet2id(net, peer);
966	}
967
968	msg = nlmsg_new(payload: rtnl_net_get_size(), GFP_KERNEL);
969	if (!msg) {
970	err = -ENOMEM;
971	goto out;
972	}
973
974	fillargs.nsid = peernet2id(target, peer);
975	err = rtnl_net_fill(skb: msg, args: &fillargs);
976	if (err < `0`)
977	goto err_out;
978
979	err = rtnl_unicast(skb: msg, net, NETLINK_CB(skb).portid);
980	goto out;
981
982	err_out:
983	nlmsg_free(skb: msg);
984	out:
985	if (fillargs.add_ref)
986	put_net(net: target);
987	put_net(net: peer);
988	return err;
989	}
990
991	struct rtnl_net_dump_cb {
992	struct net *tgt_net;
993	struct net *ref_net;
994	struct sk_buff *skb;
995	struct net_fill_args fillargs;
996	int idx;
997	int s_idx;
998	};
999
1000	/ Runs in RCU-critical section. /
1001	static int rtnl_net_dumpid_one(int id, void peer, void* *data)
1002	{
1003	struct rtnl_net_dump_cb net_cb = (struct* rtnl_net_dump_cb *)data;
1004	int ret;
1005
1006	if (net_cb->idx < net_cb->s_idx)
1007	goto cont;
1008
1009	net_cb->fillargs.nsid = id;
1010	if (net_cb->fillargs.add_ref)
1011	net_cb->fillargs.ref_nsid = __peernet2id(net: net_cb->ref_net, peer);
1012	ret = rtnl_net_fill(skb: net_cb->skb, args: &net_cb->fillargs);
1013	if (ret < `0`)
1014	return ret;
1015
1016	cont:
1017	net_cb->idx++;
1018	return `0`;
1019	}
1020
1021	static int rtnl_valid_dump_net_req(const struct nlmsghdr nlh, struct* sock *sk,
1022	struct rtnl_net_dump_cb *net_cb,
1023	struct netlink_callback *cb)
1024	{
1025	struct netlink_ext_ack *extack = cb->extack;
1026	struct nlattr *tb[NETNSA_MAX + `1`];
1027	int err, i;
1028
1029	err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(struct rtgenmsg), tb,
1030	NETNSA_MAX, policy: rtnl_net_policy,
1031	extack);
1032	if (err < `0`)
1033	return err;
1034
1035	for (i = `0`; i <= NETNSA_MAX; i++) {
1036	if (!tb[i])
1037	continue;
1038
1039	if (i == NETNSA_TARGET_NSID) {
1040	struct net *net;
1041
1042	net = rtnl_get_net_ns_capable(sk, netnsid: nla_get_s32(nla: tb[i]));
1043	if (IS_ERR(ptr: net)) {
1044	NL_SET_BAD_ATTR(extack, tb[i]);
1045	NL_SET_ERR_MSG(extack,
1046	"Invalid target network namespace id");
1047	return PTR_ERR(ptr: net);
1048	}
1049	net_cb->fillargs.add_ref = true;
1050	net_cb->ref_net = net_cb->tgt_net;
1051	net_cb->tgt_net = net;
1052	} else {
1053	NL_SET_BAD_ATTR(extack, tb[i]);
1054	NL_SET_ERR_MSG(extack,
1055	"Unsupported attribute in dump request");
1056	return -EINVAL;
1057	}
1058	}
1059
1060	return `0`;
1061	}
1062
1063	static int rtnl_net_dumpid(struct sk_buff skb, struct* netlink_callback *cb)
1064	{
1065	struct rtnl_net_dump_cb net_cb = {
1066	.tgt_net = sock_net(sk: skb->sk),
1067	.skb = skb,
1068	.fillargs = {
1069	.portid = NETLINK_CB(cb->skb).portid,
1070	.seq = cb->nlh->nlmsg_seq,
1071	.flags = NLM_F_MULTI,
1072	.cmd = RTM_NEWNSID,
1073	},
1074	.idx = `0`,
1075	.s_idx = cb->args[`0`],
1076	};
1077	int err = `0`;
1078
1079	if (cb->strict_check) {
1080	err = rtnl_valid_dump_net_req(nlh: cb->nlh, sk: skb->sk, net_cb: &net_cb, cb);
1081	if (err < `0`)
1082	goto end;
1083	}
1084
1085	rcu_read_lock();
1086	idr_for_each(&net_cb.tgt_net->netns_ids, fn: rtnl_net_dumpid_one, data: &net_cb);
1087	rcu_read_unlock();
1088
1089	cb->args[`0`] = net_cb.idx;
1090	end:
1091	if (net_cb.fillargs.add_ref)
1092	put_net(net: net_cb.tgt_net);
1093	return err < `0` ? err : skb->len;
1094	}
1095
1096	static void rtnl_net_notifyid(struct net net, int* cmd, int id, u32 portid,
1097	struct nlmsghdr *nlh, gfp_t gfp)
1098	{
1099	struct net_fill_args fillargs = {
1100	.portid = portid,
1101	.seq = nlh ? nlh->nlmsg_seq : `0`,
1102	.cmd = cmd,
1103	.nsid = id,
1104	};
1105	struct sk_buff *msg;
1106	int err = -ENOMEM;
1107
1108	msg = nlmsg_new(payload: rtnl_net_get_size(), flags: gfp);
1109	if (!msg)
1110	goto out;
1111
1112	err = rtnl_net_fill(skb: msg, args: &fillargs);
1113	if (err < `0`)
1114	goto err_out;
1115
1116	rtnl_notify(skb: msg, net, pid: portid, RTNLGRP_NSID, nlh, flags: gfp);
1117	return;
1118
1119	err_out:
1120	nlmsg_free(skb: msg);
1121	out:
1122	rtnl_set_sk_err(net, RTNLGRP_NSID, error: err);
1123	}
1124
1125	#ifdef CONFIG_NET_NS
1126	static void __init netns_ipv4_struct_check(void)
1127	{
1128	/ TX readonly hotpath cache lines /
1129	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1130	sysctl_tcp_early_retrans);
1131	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1132	sysctl_tcp_tso_win_divisor);
1133	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1134	sysctl_tcp_tso_rtt_log);
1135	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1136	sysctl_tcp_autocorking);
1137	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1138	sysctl_tcp_min_snd_mss);
1139	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1140	sysctl_tcp_notsent_lowat);
1141	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1142	sysctl_tcp_limit_output_bytes);
1143	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1144	sysctl_tcp_min_rtt_wlen);
1145	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1146	sysctl_tcp_wmem);
1147	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
1148	sysctl_ip_fwd_use_pmtu);
1149	CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, `33`);
1150
1151	/ TXRX readonly hotpath cache lines /
1152	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
1153	sysctl_tcp_moderate_rcvbuf);
1154	CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, `1`);
1155
1156	/ RX readonly hotpath cache line /
1157	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1158	sysctl_ip_early_demux);
1159	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1160	sysctl_tcp_early_demux);
1161	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1162	sysctl_tcp_reordering);
1163	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
1164	sysctl_tcp_rmem);
1165	CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, `18`);
1166	}
1167	#endif
1168
1169	void __init net_ns_init(void)
1170	{
1171	struct net_generic *ng;
1172
1173	#ifdef CONFIG_NET_NS
1174	netns_ipv4_struct_check();
1175	net_cachep = kmem_cache_create(name: "net_namespace", size: sizeof(struct net),
1176	SMP_CACHE_BYTES,
1177	SLAB_PANIC\|SLAB_ACCOUNT, NULL);
1178
1179	/ Create workqueue for cleanup /
1180	netns_wq = create_singlethread_workqueue("netns");
1181	if (!netns_wq)
1182	panic(fmt: "Could not create netns workq");
1183	#endif
1184
1185	ng = net_alloc_generic();
1186	if (!ng)
1187	panic(fmt: "Could not allocate generic netns");
1188
1189	rcu_assign_pointer(init_net.gen, ng);
1190
1191	#ifdef CONFIG_KEYS
1192	init_net.key_domain = &init_net_key_domain;
1193	#endif
1194	down_write(sem: &pernet_ops_rwsem);
1195	preinit_net(net: &init_net);
1196	if (setup_net(net: &init_net, user_ns: &init_user_ns))
1197	panic(fmt: "Could not setup the initial network namespace");
1198
1199	init_net_initialized = true;
1200	up_write(sem: &pernet_ops_rwsem);
1201
1202	if (register_pernet_subsys(&net_ns_ops))
1203	panic(fmt: "Could not register network namespace subsystems");
1204
1205	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1206	flags: RTNL_FLAG_DOIT_UNLOCKED);
1207	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1208	flags: RTNL_FLAG_DOIT_UNLOCKED);
1209	}
1210
1211	static void free_exit_list(struct pernet_operations ops, struct* list_head *net_exit_list)
1212	{
1213	ops_pre_exit_list(ops, net_exit_list);
1214	synchronize_rcu();
1215
1216	if (ops->exit_batch_rtnl) {
1217	LIST_HEAD(dev_kill_list);
1218
1219	rtnl_lock();
1220	ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
1221	unregister_netdevice_many(head: &dev_kill_list);
1222	rtnl_unlock();
1223	}
1224	ops_exit_list(ops, net_exit_list);
1225
1226	ops_free_list(ops, net_exit_list);
1227	}
1228
1229	#ifdef CONFIG_NET_NS
1230	static int __register_pernet_operations(struct list_head *list,
1231	struct pernet_operations *ops)
1232	{
1233	struct net *net;
1234	int error;
1235	LIST_HEAD(net_exit_list);
1236
1237	list_add_tail(new: &ops->list, head: list);
1238	if (ops->init \|\| (ops->id && ops->size)) {
1239	/ We held write locked pernet_ops_rwsem, and parallel*
1240	* setup_net() and cleanup_net() are not possible.
1241	*/
1242	for_each_net(net) {
1243	error = ops_init(ops, net);
1244	if (error)
1245	goto out_undo;
1246	list_add_tail(new: &net->exit_list, head: &net_exit_list);
1247	}
1248	}
1249	return `0`;
1250
1251	out_undo:
1252	/ If I have an error cleanup all namespaces I initialized /
1253	list_del(entry: &ops->list);
1254	free_exit_list(ops, net_exit_list: &net_exit_list);
1255	return error;
1256	}
1257
1258	static void __unregister_pernet_operations(struct pernet_operations *ops)
1259	{
1260	struct net *net;
1261	LIST_HEAD(net_exit_list);
1262
1263	list_del(entry: &ops->list);
1264	/ See comment in __register_pernet_operations() /
1265	for_each_net(net)
1266	list_add_tail(new: &net->exit_list, head: &net_exit_list);
1267
1268	free_exit_list(ops, net_exit_list: &net_exit_list);
1269	}
1270
1271	#else
1272
1273	static int __register_pernet_operations(struct list_head *list,
1274	struct pernet_operations *ops)
1275	{
1276	if (!init_net_initialized) {
1277	list_add_tail(&ops->list, list);
1278	return `0`;
1279	}
1280
1281	return ops_init(ops, &init_net);
1282	}
1283
1284	static void __unregister_pernet_operations(struct pernet_operations *ops)
1285	{
1286	if (!init_net_initialized) {
1287	list_del(&ops->list);
1288	} else {
1289	LIST_HEAD(net_exit_list);
1290	list_add(&init_net.exit_list, &net_exit_list);
1291	free_exit_list(ops, &net_exit_list);
1292	}
1293	}
1294
1295	#endif /* CONFIG_NET_NS */
1296
1297	static DEFINE_IDA(net_generic_ids);
1298
1299	static int register_pernet_operations(struct list_head *list,
1300	struct pernet_operations *ops)
1301	{
1302	int error;
1303
1304	if (ops->id) {
1305	error = ida_alloc_min(ida: &net_generic_ids, MIN_PERNET_OPS_ID,
1306	GFP_KERNEL);
1307	if (error < `0`)
1308	return error;
1309	*ops->id = error;
1310	max_gen_ptrs = max(max_gen_ptrs, *ops->id + `1`);
1311	}
1312	error = __register_pernet_operations(list, ops);
1313	if (error) {
1314	rcu_barrier();
1315	if (ops->id)
1316	ida_free(&net_generic_ids, id: *ops->id);
1317	}
1318
1319	return error;
1320	}
1321
1322	static void unregister_pernet_operations(struct pernet_operations *ops)
1323	{
1324	__unregister_pernet_operations(ops);
1325	rcu_barrier();
1326	if (ops->id)
1327	ida_free(&net_generic_ids, id: *ops->id);
1328	}
1329
1330	/**
1331	* register_pernet_subsys - register a network namespace subsystem
1332	* @ops: pernet operations structure for the subsystem
1333	*
1334	* Register a subsystem which has init and exit functions
1335	* that are called when network namespaces are created and
1336	* destroyed respectively.
1337	*
1338	* When registered all network namespace init functions are
1339	* called for every existing network namespace. Allowing kernel
1340	* modules to have a race free view of the set of network namespaces.
1341	*
1342	* When a new network namespace is created all of the init
1343	* methods are called in the order in which they were registered.
1344	*
1345	* When a network namespace is destroyed all of the exit methods
1346	* are called in the reverse of the order with which they were
1347	* registered.
1348	*/
1349	int register_pernet_subsys(struct pernet_operations *ops)
1350	{
1351	int error;
1352	down_write(sem: &pernet_ops_rwsem);
1353	error = register_pernet_operations(list: first_device, ops);
1354	up_write(sem: &pernet_ops_rwsem);
1355	return error;
1356	}
1357	EXPORT_SYMBOL_GPL(register_pernet_subsys);
1358
1359	/**
1360	* unregister_pernet_subsys - unregister a network namespace subsystem
1361	* @ops: pernet operations structure to manipulate
1362	*
1363	* Remove the pernet operations structure from the list to be
1364	* used when network namespaces are created or destroyed. In
1365	* addition run the exit method for all existing network
1366	* namespaces.
1367	*/
1368	void unregister_pernet_subsys(struct pernet_operations *ops)
1369	{
1370	down_write(sem: &pernet_ops_rwsem);
1371	unregister_pernet_operations(ops);
1372	up_write(sem: &pernet_ops_rwsem);
1373	}
1374	EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1375
1376	/**
1377	* register_pernet_device - register a network namespace device
1378	* @ops: pernet operations structure for the subsystem
1379	*
1380	* Register a device which has init and exit functions
1381	* that are called when network namespaces are created and
1382	* destroyed respectively.
1383	*
1384	* When registered all network namespace init functions are
1385	* called for every existing network namespace. Allowing kernel
1386	* modules to have a race free view of the set of network namespaces.
1387	*
1388	* When a new network namespace is created all of the init
1389	* methods are called in the order in which they were registered.
1390	*
1391	* When a network namespace is destroyed all of the exit methods
1392	* are called in the reverse of the order with which they were
1393	* registered.
1394	*/
1395	int register_pernet_device(struct pernet_operations *ops)
1396	{
1397	int error;
1398	down_write(sem: &pernet_ops_rwsem);
1399	error = register_pernet_operations(list: &pernet_list, ops);
1400	if (!error && (first_device == &pernet_list))
1401	first_device = &ops->list;
1402	up_write(sem: &pernet_ops_rwsem);
1403	return error;
1404	}
1405	EXPORT_SYMBOL_GPL(register_pernet_device);
1406
1407	/**
1408	* unregister_pernet_device - unregister a network namespace netdevice
1409	* @ops: pernet operations structure to manipulate
1410	*
1411	* Remove the pernet operations structure from the list to be
1412	* used when network namespaces are created or destroyed. In
1413	* addition run the exit method for all existing network
1414	* namespaces.
1415	*/
1416	void unregister_pernet_device(struct pernet_operations *ops)
1417	{
1418	down_write(sem: &pernet_ops_rwsem);
1419	if (&ops->list == first_device)
1420	first_device = first_device->next;
1421	unregister_pernet_operations(ops);
1422	up_write(sem: &pernet_ops_rwsem);
1423	}
1424	EXPORT_SYMBOL_GPL(unregister_pernet_device);
1425
1426	#ifdef CONFIG_NET_NS
1427	static struct ns_common netns_get(struct* task_struct *task)
1428	{
1429	struct net *net = NULL;
1430	struct nsproxy *nsproxy;
1431
1432	task_lock(p: task);
1433	nsproxy = task->nsproxy;
1434	if (nsproxy)
1435	net = get_net(net: nsproxy->net_ns);
1436	task_unlock(p: task);
1437
1438	return net ? &net->ns : NULL;
1439	}
1440
1441	static inline struct net to_net_ns(struct* ns_common *ns)
1442	{
1443	return container_of(ns, struct net, ns);
1444	}
1445
1446	static void netns_put(struct ns_common *ns)
1447	{
1448	put_net(net: to_net_ns(ns));
1449	}
1450
1451	static int netns_install(struct nsset nsset, struct* ns_common *ns)
1452	{
1453	struct nsproxy *nsproxy = nsset->nsproxy;
1454	struct net *net = to_net_ns(ns);
1455
1456	if (!ns_capable(ns: net->user_ns, CAP_SYS_ADMIN) \|\|
1457	!ns_capable(ns: nsset->cred->user_ns, CAP_SYS_ADMIN))
1458	return -EPERM;
1459
1460	put_net(net: nsproxy->net_ns);
1461	nsproxy->net_ns = get_net(net);
1462	return `0`;
1463	}
1464
1465	static struct user_namespace netns_owner(struct* ns_common *ns)
1466	{
1467	return to_net_ns(ns)->user_ns;
1468	}
1469
1470	const struct proc_ns_operations netns_operations = {
1471	.name = "net",
1472	.type = CLONE_NEWNET,
1473	.get = netns_get,
1474	.put = netns_put,
1475	.install = netns_install,
1476	.owner = netns_owner,
1477	};
1478	#endif
1479

source code of linux/net/core/net_namespace.c