socket.c source code [linux/net/socket.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* NET An implementation of the SOCKET network access protocol.
4	*
5	* Version: @(#)socket.c 1.1.93 18/02/95
6	*
7	* Authors: Orest Zborowski, <obz@Kodak.COM>
8	* Ross Biro
9	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10	*
11	* Fixes:
12	* Anonymous : NOTSOCK/BADF cleanup. Error fix in
13	* shutdown()
14	* Alan Cox : verify_area() fixes
15	* Alan Cox : Removed DDI
16	* Jonathan Kamens : SOCK_DGRAM reconnect bug
17	* Alan Cox : Moved a load of checks to the very
18	* top level.
19	* Alan Cox : Move address structures to/from user
20	* mode above the protocol layers.
21	* Rob Janssen : Allow 0 length sends.
22	* Alan Cox : Asynchronous I/O support (cribbed from the
23	* tty drivers).
24	* Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25	* Jeff Uphoff : Made max number of sockets command-line
26	* configurable.
27	* Matti Aarnio : Made the number of sockets dynamic,
28	* to be allocated when needed, and mr.
29	* Uphoff's max is used as max to be
30	* allowed to allocate.
31	* Linus : Argh. removed all the socket allocation
32	* altogether: it's in the inode now.
33	* Alan Cox : Made sock_alloc()/sock_release() public
34	* for NetROM and future kernel nfsd type
35	* stuff.
36	* Alan Cox : sendmsg/recvmsg basics.
37	* Tom Dyas : Export net symbols.
38	* Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39	* Alan Cox : Added thread locking to sys_* calls
40	* for sockets. May have errors at the
41	* moment.
42	* Kevin Buhr : Fixed the dumb errors in the above.
43	* Andi Kleen : Some small cleanups, optimizations,
44	* and fixed a copy_from_user() bug.
45	* Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
46	* Tigran Aivazian : Made listen(2) backlog sanity checks
47	* protocol-independent
48	*
49	* This module is effectively the top level interface to the BSD socket
50	* paradigm.
51	*
52	* Based upon Swansea University Computer Society NET3.039
53	*/
54
55	#include <linux/bpf-cgroup.h>
56	#include <linux/ethtool.h>
57	#include <linux/mm.h>
58	#include <linux/socket.h>
59	#include <linux/file.h>
60	#include <linux/splice.h>
61	#include <linux/net.h>
62	#include <linux/interrupt.h>
63	#include <linux/thread_info.h>
64	#include <linux/rcupdate.h>
65	#include <linux/netdevice.h>
66	#include <linux/proc_fs.h>
67	#include <linux/seq_file.h>
68	#include <linux/mutex.h>
69	#include <linux/if_bridge.h>
70	#include <linux/if_vlan.h>
71	#include <linux/ptp_classify.h>
72	#include <linux/init.h>
73	#include <linux/poll.h>
74	#include <linux/cache.h>
75	#include <linux/module.h>
76	#include <linux/highmem.h>
77	#include <linux/mount.h>
78	#include <linux/pseudo_fs.h>
79	#include <linux/security.h>
80	#include <linux/syscalls.h>
81	#include <linux/compat.h>
82	#include <linux/kmod.h>
83	#include <linux/audit.h>
84	#include <linux/wireless.h>
85	#include <linux/nsproxy.h>
86	#include <linux/magic.h>
87	#include <linux/slab.h>
88	#include <linux/xattr.h>
89	#include <linux/nospec.h>
90	#include <linux/indirect_call_wrapper.h>
91	#include <linux/io_uring.h>
92
93	#include <linux/uaccess.h>
94	#include <asm/unistd.h>
95
96	#include <net/compat.h>
97	#include <net/wext.h>
98	#include <net/cls_cgroup.h>
99
100	#include <net/sock.h>
101	#include <linux/netfilter.h>
102
103	#include <linux/if_tun.h>
104	#include <linux/ipv6_route.h>
105	#include <linux/route.h>
106	#include <linux/termios.h>
107	#include <linux/sockios.h>
108	#include <net/busy_poll.h>
109	#include <linux/errqueue.h>
110	#include <linux/ptp_clock_kernel.h>
111	#include <trace/events/sock.h>
112
113	#ifdef CONFIG_NET_RX_BUSY_POLL
114	unsigned int sysctl_net_busy_read __read_mostly;
115	unsigned int sysctl_net_busy_poll __read_mostly;
116	#endif
117
118	static ssize_t sock_read_iter(struct kiocb iocb, struct* iov_iter *to);
119	static ssize_t sock_write_iter(struct kiocb iocb, struct* iov_iter *from);
120	static int sock_mmap(struct file file, struct* vm_area_struct *vma);
121
122	static int sock_close(struct inode inode, struct* file *file);
123	static __poll_t sock_poll(struct file *file,
124	struct poll_table_struct *wait);
125	static long sock_ioctl(struct file file, unsigned* int cmd, unsigned long arg);
126	#ifdef CONFIG_COMPAT
127	static long compat_sock_ioctl(struct file *file,
128	unsigned int cmd, unsigned long arg);
129	#endif
130	static int sock_fasync(int fd, struct file filp, int* on);
131	static ssize_t sock_splice_read(struct file file, loff_t ppos,
132	struct pipe_inode_info *pipe, size_t len,
133	unsigned int flags);
134	static void sock_splice_eof(struct file *file);
135
136	#ifdef CONFIG_PROC_FS
137	static void sock_show_fdinfo(struct seq_file m, struct* file *f)
138	{
139	struct socket *sock = f->private_data;
140	const struct proto_ops *ops = READ_ONCE(sock->ops);
141
142	if (ops->show_fdinfo)
143	ops->show_fdinfo(m, sock);
144	}
145	#else
146	#define sock_show_fdinfo NULL
147	#endif
148
149	/*
150	* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
151	* in the operation structures but are done directly via the socketcall() multiplexor.
152	*/
153
154	static const struct file_operations socket_file_ops = {
155	.owner = THIS_MODULE,
156	.llseek = no_llseek,
157	.read_iter = sock_read_iter,
158	.write_iter = sock_write_iter,
159	.poll = sock_poll,
160	.unlocked_ioctl = sock_ioctl,
161	#ifdef CONFIG_COMPAT
162	.compat_ioctl = compat_sock_ioctl,
163	#endif
164	.uring_cmd = io_uring_cmd_sock,
165	.mmap = sock_mmap,
166	.release = sock_close,
167	.fasync = sock_fasync,
168	.splice_write = splice_to_socket,
169	.splice_read = sock_splice_read,
170	.splice_eof = sock_splice_eof,
171	.show_fdinfo = sock_show_fdinfo,
172	};
173
174	static const char * const pf_family_names[] = {
175	[PF_UNSPEC] = "PF_UNSPEC",
176	[PF_UNIX] = "PF_UNIX/PF_LOCAL",
177	[PF_INET] = "PF_INET",
178	[PF_AX25] = "PF_AX25",
179	[PF_IPX] = "PF_IPX",
180	[PF_APPLETALK] = "PF_APPLETALK",
181	[PF_NETROM] = "PF_NETROM",
182	[PF_BRIDGE] = "PF_BRIDGE",
183	[PF_ATMPVC] = "PF_ATMPVC",
184	[PF_X25] = "PF_X25",
185	[PF_INET6] = "PF_INET6",
186	[PF_ROSE] = "PF_ROSE",
187	[PF_DECnet] = "PF_DECnet",
188	[PF_NETBEUI] = "PF_NETBEUI",
189	[PF_SECURITY] = "PF_SECURITY",
190	[PF_KEY] = "PF_KEY",
191	[PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
192	[PF_PACKET] = "PF_PACKET",
193	[PF_ASH] = "PF_ASH",
194	[PF_ECONET] = "PF_ECONET",
195	[PF_ATMSVC] = "PF_ATMSVC",
196	[PF_RDS] = "PF_RDS",
197	[PF_SNA] = "PF_SNA",
198	[PF_IRDA] = "PF_IRDA",
199	[PF_PPPOX] = "PF_PPPOX",
200	[PF_WANPIPE] = "PF_WANPIPE",
201	[PF_LLC] = "PF_LLC",
202	[PF_IB] = "PF_IB",
203	[PF_MPLS] = "PF_MPLS",
204	[PF_CAN] = "PF_CAN",
205	[PF_TIPC] = "PF_TIPC",
206	[PF_BLUETOOTH] = "PF_BLUETOOTH",
207	[PF_IUCV] = "PF_IUCV",
208	[PF_RXRPC] = "PF_RXRPC",
209	[PF_ISDN] = "PF_ISDN",
210	[PF_PHONET] = "PF_PHONET",
211	[PF_IEEE802154] = "PF_IEEE802154",
212	[PF_CAIF] = "PF_CAIF",
213	[PF_ALG] = "PF_ALG",
214	[PF_NFC] = "PF_NFC",
215	[PF_VSOCK] = "PF_VSOCK",
216	[PF_KCM] = "PF_KCM",
217	[PF_QIPCRTR] = "PF_QIPCRTR",
218	[PF_SMC] = "PF_SMC",
219	[PF_XDP] = "PF_XDP",
220	[PF_MCTP] = "PF_MCTP",
221	};
222
223	/*
224	* The protocol list. Each protocol is registered in here.
225	*/
226
227	static DEFINE_SPINLOCK(net_family_lock);
228	static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
229
230	/*
231	* Support routines.
232	* Move socket addresses back and forth across the kernel/user
233	* divide and look after the messy bits.
234	*/
235
236	/**
237	* move_addr_to_kernel - copy a socket address into kernel space
238	* @uaddr: Address in user space
239	* @kaddr: Address in kernel space
240	* @ulen: Length in user space
241	*
242	* The address is copied into kernel space. If the provided address is
243	* too long an error code of -EINVAL is returned. If the copy gives
244	* invalid addresses -EFAULT is returned. On a success 0 is returned.
245	*/
246
247	int move_addr_to_kernel(void __user uaddr, int* ulen, struct sockaddr_storage *kaddr)
248	{
249	if (ulen < `0` \|\| ulen > sizeof(struct sockaddr_storage))
250	return -EINVAL;
251	if (ulen == `0`)
252	return `0`;
253	if (copy_from_user(to: kaddr, from: uaddr, n: ulen))
254	return -EFAULT;
255	return audit_sockaddr(len: ulen, addr: kaddr);
256	}
257
258	/**
259	* move_addr_to_user - copy an address to user space
260	* @kaddr: kernel space address
261	* @klen: length of address in kernel
262	* @uaddr: user space address
263	* @ulen: pointer to user length field
264	*
265	* The value pointed to by ulen on entry is the buffer length available.
266	* This is overwritten with the buffer space used. -EINVAL is returned
267	* if an overlong buffer is specified or a negative buffer size. -EFAULT
268	* is returned if either the buffer or the length field are not
269	* accessible.
270	* After copying the data up to the limit the user specifies, the true
271	* length of the data is written over the length limit the user
272	* specified. Zero is returned for a success.
273	*/
274
275	static int move_addr_to_user(struct sockaddr_storage kaddr, int* klen,
276	void __user uaddr, int* __user *ulen)
277	{
278	int err;
279	int len;
280
281	BUG_ON(klen > sizeof(struct sockaddr_storage));
282	err = get_user(len, ulen);
283	if (err)
284	return err;
285	if (len > klen)
286	len = klen;
287	if (len < `0`)
288	return -EINVAL;
289	if (len) {
290	if (audit_sockaddr(len: klen, addr: kaddr))
291	return -ENOMEM;
292	if (copy_to_user(to: uaddr, from: kaddr, n: len))
293	return -EFAULT;
294	}
295	/*
296	* "fromlen shall refer to the value before truncation.."
297	* 1003.1g
298	*/
299	return __put_user(klen, ulen);
300	}
301
302	static struct kmem_cache *sock_inode_cachep __ro_after_init;
303
304	static struct inode sock_alloc_inode(struct* super_block *sb)
305	{
306	struct socket_alloc *ei;
307
308	ei = alloc_inode_sb(sb, cache: sock_inode_cachep, GFP_KERNEL);
309	if (!ei)
310	return NULL;
311	init_waitqueue_head(&ei->socket.wq.wait);
312	ei->socket.wq.fasync_list = NULL;
313	ei->socket.wq.flags = `0`;
314
315	ei->socket.state = SS_UNCONNECTED;
316	ei->socket.flags = `0`;
317	ei->socket.ops = NULL;
318	ei->socket.sk = NULL;
319	ei->socket.file = NULL;
320
321	return &ei->vfs_inode;
322	}
323
324	static void sock_free_inode(struct inode *inode)
325	{
326	struct socket_alloc *ei;
327
328	ei = container_of(inode, struct socket_alloc, vfs_inode);
329	kmem_cache_free(s: sock_inode_cachep, objp: ei);
330	}
331
332	static void init_once(void *foo)
333	{
334	struct socket_alloc ei = (struct* socket_alloc *)foo;
335
336	inode_init_once(&ei->vfs_inode);
337	}
338
339	static void init_inodecache(void)
340	{
341	sock_inode_cachep = kmem_cache_create(name: "sock_inode_cache",
342	size: sizeof(struct socket_alloc),
343	align: `0`,
344	flags: (SLAB_HWCACHE_ALIGN \|
345	SLAB_RECLAIM_ACCOUNT \|
346	SLAB_MEM_SPREAD \| SLAB_ACCOUNT),
347	ctor: init_once);
348	BUG_ON(sock_inode_cachep == NULL);
349	}
350
351	static const struct super_operations sockfs_ops = {
352	.alloc_inode = sock_alloc_inode,
353	.free_inode = sock_free_inode,
354	.statfs = simple_statfs,
355	};
356
357	/*
358	* sockfs_dname() is called from d_path().
359	*/
360	static char sockfs_dname(struct* dentry dentry, char* buffer, int* buflen)
361	{
362	return dynamic_dname(buffer, buflen, "socket:[%lu]",
363	d_inode(dentry)->i_ino);
364	}
365
366	static const struct dentry_operations sockfs_dentry_operations = {
367	.d_dname = sockfs_dname,
368	};
369
370	static int sockfs_xattr_get(const struct xattr_handler *handler,
371	struct dentry dentry, struct* inode *inode,
372	const char suffix, void* *value, size_t size)
373	{
374	if (value) {
375	if (dentry->d_name.len + `1` > size)
376	return -ERANGE;
377	memcpy(value, dentry->d_name.name, dentry->d_name.len + `1`);
378	}
379	return dentry->d_name.len + `1`;
380	}
381
382	#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
383	#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
384	#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
385
386	static const struct xattr_handler sockfs_xattr_handler = {
387	.name = XATTR_NAME_SOCKPROTONAME,
388	.get = sockfs_xattr_get,
389	};
390
391	static int sockfs_security_xattr_set(const struct xattr_handler *handler,
392	struct mnt_idmap *idmap,
393	struct dentry dentry, struct* inode *inode,
394	const char suffix, const* void *value,
395	size_t size, int flags)
396	{
397	/ Handled by LSM. /
398	return -EAGAIN;
399	}
400
401	static const struct xattr_handler sockfs_security_xattr_handler = {
402	.prefix = XATTR_SECURITY_PREFIX,
403	.set = sockfs_security_xattr_set,
404	};
405
406	static const struct xattr_handler * const sockfs_xattr_handlers[] = {
407	&sockfs_xattr_handler,
408	&sockfs_security_xattr_handler,
409	NULL
410	};
411
412	static int sockfs_init_fs_context(struct fs_context *fc)
413	{
414	struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
415	if (!ctx)
416	return -ENOMEM;
417	ctx->ops = &sockfs_ops;
418	ctx->dops = &sockfs_dentry_operations;
419	ctx->xattr = sockfs_xattr_handlers;
420	return `0`;
421	}
422
423	static struct vfsmount *sock_mnt __read_mostly;
424
425	static struct file_system_type sock_fs_type = {
426	.name = "sockfs",
427	.init_fs_context = sockfs_init_fs_context,
428	.kill_sb = kill_anon_super,
429	};
430
431	/*
432	* Obtains the first available file descriptor and sets it up for use.
433	*
434	* These functions create file structures and maps them to fd space
435	* of the current process. On success it returns file descriptor
436	* and file struct implicitly stored in sock->file.
437	* Note that another thread may close file descriptor before we return
438	* from this function. We use the fact that now we do not refer
439	* to socket after mapping. If one day we will need it, this
440	* function will increment ref. count on file by 1.
441	*
442	* In any case returned fd MAY BE not valid!
443	* This race condition is unavoidable
444	* with shared fd spaces, we cannot solve it inside kernel,
445	* but we take care of internal coherence yet.
446	*/
447
448	/**
449	* sock_alloc_file - Bind a &socket to a &file
450	* @sock: socket
451	* @flags: file status flags
452	* @dname: protocol name
453	*
454	* Returns the &file bound with @sock, implicitly storing it
455	* in sock->file. If dname is %NULL, sets to "".
456	*
457	* On failure @sock is released, and an ERR pointer is returned.
458	*
459	* This function uses GFP_KERNEL internally.
460	*/
461
462	struct file sock_alloc_file(struct* socket sock, int* flags, const char *dname)
463	{
464	struct file *file;
465
466	if (!dname)
467	dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
468
469	file = alloc_file_pseudo(SOCK_INODE(socket: sock), sock_mnt, dname,
470	O_RDWR \| (flags & O_NONBLOCK),
471	&socket_file_ops);
472	if (IS_ERR(ptr: file)) {
473	sock_release(sock);
474	return file;
475	}
476
477	file->f_mode \|= FMODE_NOWAIT;
478	sock->file = file;
479	file->private_data = sock;
480	stream_open(inode: SOCK_INODE(socket: sock), filp: file);
481	return file;
482	}
483	EXPORT_SYMBOL(sock_alloc_file);
484
485	static int sock_map_fd(struct socket sock, int* flags)
486	{
487	struct file *newfile;
488	int fd = get_unused_fd_flags(flags);
489	if (unlikely(fd < `0`)) {
490	sock_release(sock);
491	return fd;
492	}
493
494	newfile = sock_alloc_file(sock, flags, NULL);
495	if (!IS_ERR(ptr: newfile)) {
496	fd_install(fd, file: newfile);
497	return fd;
498	}
499
500	put_unused_fd(fd);
501	return PTR_ERR(ptr: newfile);
502	}
503
504	/**
505	* sock_from_file - Return the &socket bounded to @file.
506	* @file: file
507	*
508	* On failure returns %NULL.
509	*/
510
511	struct socket sock_from_file(struct* file *file)
512	{
513	if (file->f_op == &socket_file_ops)
514	return file->private_data; / set in sock_alloc_file /
515
516	return NULL;
517	}
518	EXPORT_SYMBOL(sock_from_file);
519
520	/**
521	* sockfd_lookup - Go from a file number to its socket slot
522	* @fd: file handle
523	* @err: pointer to an error code return
524	*
525	* The file handle passed in is locked and the socket it is bound
526	* to is returned. If an error occurs the err pointer is overwritten
527	* with a negative errno code and NULL is returned. The function checks
528	* for both invalid handles and passing a handle which is not a socket.
529	*
530	* On a success the socket object pointer is returned.
531	*/
532
533	struct socket sockfd_lookup(int* fd, int *err)
534	{
535	struct file *file;
536	struct socket *sock;
537
538	file = fget(fd);
539	if (!file) {
540	*err = -EBADF;
541	return NULL;
542	}
543
544	sock = sock_from_file(file);
545	if (!sock) {
546	*err = -ENOTSOCK;
547	fput(file);
548	}
549	return sock;
550	}
551	EXPORT_SYMBOL(sockfd_lookup);
552
553	static struct socket sockfd_lookup_light(int* fd, int err, int* *fput_needed)
554	{
555	struct fd f = fdget(fd);
556	struct socket *sock;
557
558	*err = -EBADF;
559	if (f.file) {
560	sock = sock_from_file(f.file);
561	if (likely(sock)) {
562	*fput_needed = f.flags & FDPUT_FPUT;
563	return sock;
564	}
565	*err = -ENOTSOCK;
566	fdput(fd: f);
567	}
568	return NULL;
569	}
570
571	static ssize_t sockfs_listxattr(struct dentry dentry, char* *buffer,
572	size_t size)
573	{
574	ssize_t len;
575	ssize_t used = `0`;
576
577	len = security_inode_listsecurity(inode: d_inode(dentry), buffer, buffer_size: size);
578	if (len < `0`)
579	return len;
580	used += len;
581	if (buffer) {
582	if (size < used)
583	return -ERANGE;
584	buffer += len;
585	}
586
587	len = (XATTR_NAME_SOCKPROTONAME_LEN + `1`);
588	used += len;
589	if (buffer) {
590	if (size < used)
591	return -ERANGE;
592	memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
593	buffer += len;
594	}
595
596	return used;
597	}
598
599	static int sockfs_setattr(struct mnt_idmap *idmap,
600	struct dentry dentry, struct* iattr *iattr)
601	{
602	int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
603
604	if (!err && (iattr->ia_valid & ATTR_UID)) {
605	struct socket *sock = SOCKET_I(inode: d_inode(dentry));
606
607	if (sock->sk)
608	sock->sk->sk_uid = iattr->ia_uid;
609	else
610	err = -ENOENT;
611	}
612
613	return err;
614	}
615
616	static const struct inode_operations sockfs_inode_ops = {
617	.listxattr = sockfs_listxattr,
618	.setattr = sockfs_setattr,
619	};
620
621	/**
622	* sock_alloc - allocate a socket
623	*
624	* Allocate a new inode and socket object. The two are bound together
625	* and initialised. The socket is then returned. If we are out of inodes
626	* NULL is returned. This functions uses GFP_KERNEL internally.
627	*/
628
629	struct socket sock_alloc(void*)
630	{
631	struct inode *inode;
632	struct socket *sock;
633
634	inode = new_inode_pseudo(sb: sock_mnt->mnt_sb);
635	if (!inode)
636	return NULL;
637
638	sock = SOCKET_I(inode);
639
640	inode->i_ino = get_next_ino();
641	inode->i_mode = S_IFSOCK \| S_IRWXUGO;
642	inode->i_uid = current_fsuid();
643	inode->i_gid = current_fsgid();
644	inode->i_op = &sockfs_inode_ops;
645
646	return sock;
647	}
648	EXPORT_SYMBOL(sock_alloc);
649
650	static void __sock_release(struct socket sock, struct* inode *inode)
651	{
652	const struct proto_ops *ops = READ_ONCE(sock->ops);
653
654	if (ops) {
655	struct module *owner = ops->owner;
656
657	if (inode)
658	inode_lock(inode);
659	ops->release(sock);
660	sock->sk = NULL;
661	if (inode)
662	inode_unlock(inode);
663	sock->ops = NULL;
664	module_put(module: owner);
665	}
666
667	if (sock->wq.fasync_list)
668	pr_err("%s: fasync list not empty!\n", __func__);
669
670	if (!sock->file) {
671	iput(SOCK_INODE(socket: sock));
672	return;
673	}
674	sock->file = NULL;
675	}
676
677	/**
678	* sock_release - close a socket
679	* @sock: socket to close
680	*
681	* The socket is released from the protocol stack if it has a release
682	* callback, and the inode is then released if the socket is bound to
683	* an inode not a file.
684	*/
685	void sock_release(struct socket *sock)
686	{
687	__sock_release(sock, NULL);
688	}
689	EXPORT_SYMBOL(sock_release);
690
691	void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
692	{
693	u8 flags = *tx_flags;
694
695	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
696	flags \|= SKBTX_HW_TSTAMP;
697
698	/ PTP hardware clocks can provide a free running cycle counter*
699	* as a time base for virtual clocks. Tell driver to use the
700	* free running cycle counter for timestamp if socket is bound
701	* to virtual clock.
702	*/
703	if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
704	flags \|= SKBTX_HW_TSTAMP_USE_CYCLES;
705	}
706
707	if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
708	flags \|= SKBTX_SW_TSTAMP;
709
710	if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
711	flags \|= SKBTX_SCHED_TSTAMP;
712
713	*tx_flags = flags;
714	}
715	EXPORT_SYMBOL(__sock_tx_timestamp);
716
717	INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket , struct* msghdr *,
718	size_t));
719	INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket , struct* msghdr *,
720	size_t));
721
722	static noinline void call_trace_sock_send_length(struct sock sk, int* ret,
723	int flags)
724	{
725	trace_sock_send_length(sk, ret, flags: `0`);
726	}
727
728	static inline int sock_sendmsg_nosec(struct socket sock, struct* msghdr *msg)
729	{
730	int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
731	inet_sendmsg, sock, msg,
732	msg_data_left(msg));
733	BUG_ON(ret == -EIOCBQUEUED);
734
735	if (trace_sock_send_length_enabled())
736	call_trace_sock_send_length(sk: sock->sk, ret, flags: `0`);
737	return ret;
738	}
739
740	static int __sock_sendmsg(struct socket sock, struct* msghdr *msg)
741	{
742	int err = security_socket_sendmsg(sock, msg,
743	size: msg_data_left(msg));
744
745	return err ?: sock_sendmsg_nosec(sock, msg);
746	}
747
748	/**
749	* sock_sendmsg - send a message through @sock
750	* @sock: socket
751	* @msg: message to send
752	*
753	* Sends @msg through @sock, passing through LSM.
754	* Returns the number of bytes sent, or an error code.
755	*/
756	int sock_sendmsg(struct socket sock, struct* msghdr *msg)
757	{
758	struct sockaddr_storage save_addr = (struct* sockaddr_storage *)msg->msg_name;
759	struct sockaddr_storage address;
760	int ret;
761
762	if (msg->msg_name) {
763	memcpy(&address, msg->msg_name, msg->msg_namelen);
764	msg->msg_name = &address;
765	}
766
767	ret = __sock_sendmsg(sock, msg);
768	msg->msg_name = save_addr;
769
770	return ret;
771	}
772	EXPORT_SYMBOL(sock_sendmsg);
773
774	/**
775	* kernel_sendmsg - send a message through @sock (kernel-space)
776	* @sock: socket
777	* @msg: message header
778	* @vec: kernel vec
779	* @num: vec array length
780	* @size: total message data size
781	*
782	* Builds the message data with @vec and sends it through @sock.
783	* Returns the number of bytes sent, or an error code.
784	*/
785
786	int kernel_sendmsg(struct socket sock, struct* msghdr *msg,
787	struct kvec *vec, size_t num, size_t size)
788	{
789	iov_iter_kvec(i: &msg->msg_iter, ITER_SOURCE, kvec: vec, nr_segs: num, count: size);
790	return sock_sendmsg(sock, msg);
791	}
792	EXPORT_SYMBOL(kernel_sendmsg);
793
794	/**
795	* kernel_sendmsg_locked - send a message through @sock (kernel-space)
796	* @sk: sock
797	* @msg: message header
798	* @vec: output s/g array
799	* @num: output s/g array length
800	* @size: total message data size
801	*
802	* Builds the message data with @vec and sends it through @sock.
803	* Returns the number of bytes sent, or an error code.
804	* Caller must hold @sk.
805	*/
806
807	int kernel_sendmsg_locked(struct sock sk, struct* msghdr *msg,
808	struct kvec *vec, size_t num, size_t size)
809	{
810	struct socket *sock = sk->sk_socket;
811	const struct proto_ops *ops = READ_ONCE(sock->ops);
812
813	if (!ops->sendmsg_locked)
814	return sock_no_sendmsg_locked(sk, msg, len: size);
815
816	iov_iter_kvec(i: &msg->msg_iter, ITER_SOURCE, kvec: vec, nr_segs: num, count: size);
817
818	return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
819	}
820	EXPORT_SYMBOL(kernel_sendmsg_locked);
821
822	static bool skb_is_err_queue(const struct sk_buff *skb)
823	{
824	/ pkt_type of skbs enqueued on the error queue are set to*
825	* PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
826	* in recvmsg, since skbs received on a local socket will never
827	* have a pkt_type of PACKET_OUTGOING.
828	*/
829	return skb->pkt_type == PACKET_OUTGOING;
830	}
831
832	/ On transmit, software and hardware timestamps are returned independently.*
833	* As the two skb clones share the hardware timestamp, which may be updated
834	* before the software timestamp is received, a hardware TX timestamp may be
835	* returned only if there is no software TX timestamp. Ignore false software
836	* timestamps, which may be made in the __sock_recv_timestamp() call when the
837	* option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
838	* hardware timestamp.
839	*/
840	static bool skb_is_swtx_tstamp(const struct sk_buff skb, int* false_tstamp)
841	{
842	return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
843	}
844
845	static ktime_t get_timestamp(struct sock sk, struct* sk_buff skb, int* *if_index)
846	{
847	bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
848	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
849	struct net_device *orig_dev;
850	ktime_t hwtstamp;
851
852	rcu_read_lock();
853	orig_dev = dev_get_by_napi_id(napi_id: skb_napi_id(skb));
854	if (orig_dev) {
855	*if_index = orig_dev->ifindex;
856	hwtstamp = netdev_get_tstamp(dev: orig_dev, hwtstamps: shhwtstamps, cycles);
857	} else {
858	hwtstamp = shhwtstamps->hwtstamp;
859	}
860	rcu_read_unlock();
861
862	return hwtstamp;
863	}
864
865	static void put_ts_pktinfo(struct msghdr msg, struct* sk_buff *skb,
866	int if_index)
867	{
868	struct scm_ts_pktinfo ts_pktinfo;
869	struct net_device *orig_dev;
870
871	if (!skb_mac_header_was_set(skb))
872	return;
873
874	memset(&ts_pktinfo, `0`, sizeof(ts_pktinfo));
875
876	if (!if_index) {
877	rcu_read_lock();
878	orig_dev = dev_get_by_napi_id(napi_id: skb_napi_id(skb));
879	if (orig_dev)
880	if_index = orig_dev->ifindex;
881	rcu_read_unlock();
882	}
883	ts_pktinfo.if_index = if_index;
884
885	ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
886	put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
887	len: sizeof(ts_pktinfo), data: &ts_pktinfo);
888	}
889
890	/*
891	* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
892	*/
893	void __sock_recv_timestamp(struct msghdr msg, struct* sock *sk,
894	struct sk_buff *skb)
895	{
896	int need_software_tstamp = sock_flag(sk, flag: SOCK_RCVTSTAMP);
897	int new_tstamp = sock_flag(sk, flag: SOCK_TSTAMP_NEW);
898	struct scm_timestamping_internal tss;
899	int empty = `1`, false_tstamp = `0`;
900	struct skb_shared_hwtstamps *shhwtstamps =
901	skb_hwtstamps(skb);
902	int if_index;
903	ktime_t hwtstamp;
904	u32 tsflags;
905
906	/ Race occurred between timestamp enabling and packet*
907	receiving. Fill in the current time for now. /*
908	if (need_software_tstamp && skb->tstamp == `0`) {
909	__net_timestamp(skb);
910	false_tstamp = `1`;
911	}
912
913	if (need_software_tstamp) {
914	if (!sock_flag(sk, flag: SOCK_RCVTSTAMPNS)) {
915	if (new_tstamp) {
916	struct __kernel_sock_timeval tv;
917
918	skb_get_new_timestamp(skb, stamp: &tv);
919	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
920	len: sizeof(tv), data: &tv);
921	} else {
922	struct __kernel_old_timeval tv;
923
924	skb_get_timestamp(skb, stamp: &tv);
925	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
926	len: sizeof(tv), data: &tv);
927	}
928	} else {
929	if (new_tstamp) {
930	struct __kernel_timespec ts;
931
932	skb_get_new_timestampns(skb, stamp: &ts);
933	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
934	len: sizeof(ts), data: &ts);
935	} else {
936	struct __kernel_old_timespec ts;
937
938	skb_get_timestampns(skb, stamp: &ts);
939	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
940	len: sizeof(ts), data: &ts);
941	}
942	}
943	}
944
945	memset(&tss, `0`, sizeof(tss));
946	tsflags = READ_ONCE(sk->sk_tsflags);
947	if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
948	ktime_to_timespec64_cond(kt: skb->tstamp, ts: tss.ts + `0`))
949	empty = `0`;
950	if (shhwtstamps &&
951	(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
952	!skb_is_swtx_tstamp(skb, false_tstamp)) {
953	if_index = `0`;
954	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
955	hwtstamp = get_timestamp(sk, skb, if_index: &if_index);
956	else
957	hwtstamp = shhwtstamps->hwtstamp;
958
959	if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
960	hwtstamp = ptp_convert_timestamp(hwtstamp: &hwtstamp,
961	READ_ONCE(sk->sk_bind_phc));
962
963	if (ktime_to_timespec64_cond(kt: hwtstamp, ts: tss.ts + `2`)) {
964	empty = `0`;
965
966	if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
967	!skb_is_err_queue(skb))
968	put_ts_pktinfo(msg, skb, if_index);
969	}
970	}
971	if (!empty) {
972	if (sock_flag(sk, flag: SOCK_TSTAMP_NEW))
973	put_cmsg_scm_timestamping64(msg, tss: &tss);
974	else
975	put_cmsg_scm_timestamping(msg, tss: &tss);
976
977	if (skb_is_err_queue(skb) && skb->len &&
978	SKB_EXT_ERR(skb)->opt_stats)
979	put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
980	len: skb->len, data: skb->data);
981	}
982	}
983	EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
984
985	#ifdef CONFIG_WIRELESS
986	void __sock_recv_wifi_status(struct msghdr msg, struct* sock *sk,
987	struct sk_buff *skb)
988	{
989	int ack;
990
991	if (!sock_flag(sk, flag: SOCK_WIFI_STATUS))
992	return;
993	if (!skb->wifi_acked_valid)
994	return;
995
996	ack = skb->wifi_acked;
997
998	put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, len: sizeof(ack), data: &ack);
999	}
1000	EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
1001	#endif
1002
1003	static inline void sock_recv_drops(struct msghdr msg, struct* sock *sk,
1004	struct sk_buff *skb)
1005	{
1006	if (sock_flag(sk, flag: SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
1007	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
1008	len: sizeof(__u32), data: &SOCK_SKB_CB(skb)->dropcount);
1009	}
1010
1011	static void sock_recv_mark(struct msghdr msg, struct* sock *sk,
1012	struct sk_buff *skb)
1013	{
1014	if (sock_flag(sk, flag: SOCK_RCVMARK) && skb) {
1015	/ We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y /
1016	__u32 mark = skb->mark;
1017
1018	put_cmsg(msg, SOL_SOCKET, SO_MARK, len: sizeof(__u32), data: &mark);
1019	}
1020	}
1021
1022	void __sock_recv_cmsgs(struct msghdr msg, struct* sock *sk,
1023	struct sk_buff *skb)
1024	{
1025	sock_recv_timestamp(msg, sk, skb);
1026	sock_recv_drops(msg, sk, skb);
1027	sock_recv_mark(msg, sk, skb);
1028	}
1029	EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
1030
1031	INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket , struct* msghdr *,
1032	size_t, int));
1033	INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket , struct* msghdr *,
1034	size_t, int));
1035
1036	static noinline void call_trace_sock_recv_length(struct sock sk, int* ret, int flags)
1037	{
1038	trace_sock_recv_length(sk, ret, flags);
1039	}
1040
1041	static inline int sock_recvmsg_nosec(struct socket sock, struct* msghdr *msg,
1042	int flags)
1043	{
1044	int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1045	inet6_recvmsg,
1046	inet_recvmsg, sock, msg,
1047	msg_data_left(msg), flags);
1048	if (trace_sock_recv_length_enabled())
1049	call_trace_sock_recv_length(sk: sock->sk, ret, flags);
1050	return ret;
1051	}
1052
1053	/**
1054	* sock_recvmsg - receive a message from @sock
1055	* @sock: socket
1056	* @msg: message to receive
1057	* @flags: message flags
1058	*
1059	* Receives @msg from @sock, passing through LSM. Returns the total number
1060	* of bytes received, or an error.
1061	*/
1062	int sock_recvmsg(struct socket sock, struct* msghdr msg, int* flags)
1063	{
1064	int err = security_socket_recvmsg(sock, msg, size: msg_data_left(msg), flags);
1065
1066	return err ?: sock_recvmsg_nosec(sock, msg, flags);
1067	}
1068	EXPORT_SYMBOL(sock_recvmsg);
1069
1070	/**
1071	* kernel_recvmsg - Receive a message from a socket (kernel space)
1072	* @sock: The socket to receive the message from
1073	* @msg: Received message
1074	* @vec: Input s/g array for message data
1075	* @num: Size of input s/g array
1076	* @size: Number of bytes to read
1077	* @flags: Message flags (MSG_DONTWAIT, etc...)
1078	*
1079	* On return the msg structure contains the scatter/gather array passed in the
1080	* vec argument. The array is modified so that it consists of the unfilled
1081	* portion of the original array.
1082	*
1083	* The returned value is the total number of bytes received, or an error.
1084	*/
1085
1086	int kernel_recvmsg(struct socket sock, struct* msghdr *msg,
1087	struct kvec vec, size_t num, size_t size, int* flags)
1088	{
1089	msg->msg_control_is_user = false;
1090	iov_iter_kvec(i: &msg->msg_iter, ITER_DEST, kvec: vec, nr_segs: num, count: size);
1091	return sock_recvmsg(sock, msg, flags);
1092	}
1093	EXPORT_SYMBOL(kernel_recvmsg);
1094
1095	static ssize_t sock_splice_read(struct file file, loff_t ppos,
1096	struct pipe_inode_info *pipe, size_t len,
1097	unsigned int flags)
1098	{
1099	struct socket *sock = file->private_data;
1100	const struct proto_ops *ops;
1101
1102	ops = READ_ONCE(sock->ops);
1103	if (unlikely(!ops->splice_read))
1104	return copy_splice_read(in: file, ppos, pipe, len, flags);
1105
1106	return ops->splice_read(sock, ppos, pipe, len, flags);
1107	}
1108
1109	static void sock_splice_eof(struct file *file)
1110	{
1111	struct socket *sock = file->private_data;
1112	const struct proto_ops *ops;
1113
1114	ops = READ_ONCE(sock->ops);
1115	if (ops->splice_eof)
1116	ops->splice_eof(sock);
1117	}
1118
1119	static ssize_t sock_read_iter(struct kiocb iocb, struct* iov_iter *to)
1120	{
1121	struct file *file = iocb->ki_filp;
1122	struct socket *sock = file->private_data;
1123	struct msghdr msg = {.msg_iter = *to,
1124	.msg_iocb = iocb};
1125	ssize_t res;
1126
1127	if (file->f_flags & O_NONBLOCK \|\| (iocb->ki_flags & IOCB_NOWAIT))
1128	msg.msg_flags = MSG_DONTWAIT;
1129
1130	if (iocb->ki_pos != `0`)
1131	return -ESPIPE;
1132
1133	if (!iov_iter_count(i: to)) / Match SYS5 behaviour /
1134	return `0`;
1135
1136	res = sock_recvmsg(sock, &msg, msg.msg_flags);
1137	*to = msg.msg_iter;
1138	return res;
1139	}
1140
1141	static ssize_t sock_write_iter(struct kiocb iocb, struct* iov_iter *from)
1142	{
1143	struct file *file = iocb->ki_filp;
1144	struct socket *sock = file->private_data;
1145	struct msghdr msg = {.msg_iter = *from,
1146	.msg_iocb = iocb};
1147	ssize_t res;
1148
1149	if (iocb->ki_pos != `0`)
1150	return -ESPIPE;
1151
1152	if (file->f_flags & O_NONBLOCK \|\| (iocb->ki_flags & IOCB_NOWAIT))
1153	msg.msg_flags = MSG_DONTWAIT;
1154
1155	if (sock->type == SOCK_SEQPACKET)
1156	msg.msg_flags \|= MSG_EOR;
1157
1158	res = __sock_sendmsg(sock, msg: &msg);
1159	*from = msg.msg_iter;
1160	return res;
1161	}
1162
1163	/*
1164	* Atomic setting of ioctl hooks to avoid race
1165	* with module unload.
1166	*/
1167
1168	static DEFINE_MUTEX(br_ioctl_mutex);
1169	static int (br_ioctl_hook)(struct* net net, struct* net_bridge *br,
1170	unsigned int cmd, struct ifreq *ifr,
1171	void __user *uarg);
1172
1173	void brioctl_set(int (hook)(struct* net net, struct* net_bridge *br,
1174	unsigned int cmd, struct ifreq *ifr,
1175	void __user *uarg))
1176	{
1177	mutex_lock(&br_ioctl_mutex);
1178	br_ioctl_hook = hook;
1179	mutex_unlock(lock: &br_ioctl_mutex);
1180	}
1181	EXPORT_SYMBOL(brioctl_set);
1182
1183	int br_ioctl_call(struct net net, struct* net_bridge br, unsigned* int cmd,
1184	struct ifreq ifr, void* __user *uarg)
1185	{
1186	int err = -ENOPKG;
1187
1188	if (!br_ioctl_hook)
1189	request_module("bridge");
1190
1191	mutex_lock(&br_ioctl_mutex);
1192	if (br_ioctl_hook)
1193	err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1194	mutex_unlock(lock: &br_ioctl_mutex);
1195
1196	return err;
1197	}
1198
1199	static DEFINE_MUTEX(vlan_ioctl_mutex);
1200	static int (vlan_ioctl_hook) (struct* net , void* __user *arg);
1201
1202	void vlan_ioctl_set(int (hook) (struct* net , void* __user *))
1203	{
1204	mutex_lock(&vlan_ioctl_mutex);
1205	vlan_ioctl_hook = hook;
1206	mutex_unlock(lock: &vlan_ioctl_mutex);
1207	}
1208	EXPORT_SYMBOL(vlan_ioctl_set);
1209
1210	static long sock_do_ioctl(struct net net, struct* socket *sock,
1211	unsigned int cmd, unsigned long arg)
1212	{
1213	const struct proto_ops *ops = READ_ONCE(sock->ops);
1214	struct ifreq ifr;
1215	bool need_copyout;
1216	int err;
1217	void __user argp = (void* __user *)arg;
1218	void __user *data;
1219
1220	err = ops->ioctl(sock, cmd, arg);
1221
1222	/*
1223	* If this ioctl is unknown try to hand it down
1224	* to the NIC driver.
1225	*/
1226	if (err != -ENOIOCTLCMD)
1227	return err;
1228
1229	if (!is_socket_ioctl_cmd(cmd))
1230	return -ENOTTY;
1231
1232	if (get_user_ifreq(ifr: &ifr, ifrdata: &data, arg: argp))
1233	return -EFAULT;
1234	err = dev_ioctl(net, cmd, ifr: &ifr, data, need_copyout: &need_copyout);
1235	if (!err && need_copyout)
1236	if (put_user_ifreq(ifr: &ifr, arg: argp))
1237	return -EFAULT;
1238
1239	return err;
1240	}
1241
1242	/*
1243	* With an ioctl, arg may well be a user mode pointer, but we don't know
1244	* what to do with it - that's up to the protocol still.
1245	*/
1246
1247	static long sock_ioctl(struct file file, unsigned* cmd, unsigned long arg)
1248	{
1249	const struct proto_ops *ops;
1250	struct socket *sock;
1251	struct sock *sk;
1252	void __user argp = (void* __user *)arg;
1253	int pid, err;
1254	struct net *net;
1255
1256	sock = file->private_data;
1257	ops = READ_ONCE(sock->ops);
1258	sk = sock->sk;
1259	net = sock_net(sk);
1260	if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + `15`))) {
1261	struct ifreq ifr;
1262	void __user *data;
1263	bool need_copyout;
1264	if (get_user_ifreq(ifr: &ifr, ifrdata: &data, arg: argp))
1265	return -EFAULT;
1266	err = dev_ioctl(net, cmd, ifr: &ifr, data, need_copyout: &need_copyout);
1267	if (!err && need_copyout)
1268	if (put_user_ifreq(ifr: &ifr, arg: argp))
1269	return -EFAULT;
1270	} else
1271	#ifdef CONFIG_WEXT_CORE
1272	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1273	err = wext_handle_ioctl(net, cmd, arg: argp);
1274	} else
1275	#endif
1276	switch (cmd) {
1277	case FIOSETOWN:
1278	case SIOCSPGRP:
1279	err = -EFAULT;
1280	if (get_user(pid, (int __user *)argp))
1281	break;
1282	err = f_setown(filp: sock->file, who: pid, force: `1`);
1283	break;
1284	case FIOGETOWN:
1285	case SIOCGPGRP:
1286	err = put_user(f_getown(sock->file),
1287	(int __user *)argp);
1288	break;
1289	case SIOCGIFBR:
1290	case SIOCSIFBR:
1291	case SIOCBRADDBR:
1292	case SIOCBRDELBR:
1293	err = br_ioctl_call(net, NULL, cmd, NULL, uarg: argp);
1294	break;
1295	case SIOCGIFVLAN:
1296	case SIOCSIFVLAN:
1297	err = -ENOPKG;
1298	if (!vlan_ioctl_hook)
1299	request_module("8021q");
1300
1301	mutex_lock(&vlan_ioctl_mutex);
1302	if (vlan_ioctl_hook)
1303	err = vlan_ioctl_hook(net, argp);
1304	mutex_unlock(lock: &vlan_ioctl_mutex);
1305	break;
1306	case SIOCGSKNS:
1307	err = -EPERM;
1308	if (!ns_capable(ns: net->user_ns, CAP_NET_ADMIN))
1309	break;
1310
1311	err = open_related_ns(ns: &net->ns, get_ns: get_net_ns);
1312	break;
1313	case SIOCGSTAMP_OLD:
1314	case SIOCGSTAMPNS_OLD:
1315	if (!ops->gettstamp) {
1316	err = -ENOIOCTLCMD;
1317	break;
1318	}
1319	err = ops->gettstamp(sock, argp,
1320	cmd == SIOCGSTAMP_OLD,
1321	!IS_ENABLED(CONFIG_64BIT));
1322	break;
1323	case SIOCGSTAMP_NEW:
1324	case SIOCGSTAMPNS_NEW:
1325	if (!ops->gettstamp) {
1326	err = -ENOIOCTLCMD;
1327	break;
1328	}
1329	err = ops->gettstamp(sock, argp,
1330	cmd == SIOCGSTAMP_NEW,
1331	false);
1332	break;
1333
1334	case SIOCGIFCONF:
1335	err = dev_ifconf(net, ifc: argp);
1336	break;
1337
1338	default:
1339	err = sock_do_ioctl(net, sock, cmd, arg);
1340	break;
1341	}
1342	return err;
1343	}
1344
1345	/**
1346	* sock_create_lite - creates a socket
1347	* @family: protocol family (AF_INET, ...)
1348	* @type: communication type (SOCK_STREAM, ...)
1349	* @protocol: protocol (0, ...)
1350	* @res: new socket
1351	*
1352	* Creates a new socket and assigns it to @res, passing through LSM.
1353	* The new socket initialization is not complete, see kernel_accept().
1354	* Returns 0 or an error. On failure @res is set to %NULL.
1355	* This function internally uses GFP_KERNEL.
1356	*/
1357
1358	int sock_create_lite(int family, int type, int protocol, struct socket **res)
1359	{
1360	int err;
1361	struct socket *sock = NULL;
1362
1363	err = security_socket_create(family, type, protocol, kern: `1`);
1364	if (err)
1365	goto out;
1366
1367	sock = sock_alloc();
1368	if (!sock) {
1369	err = -ENOMEM;
1370	goto out;
1371	}
1372
1373	sock->type = type;
1374	err = security_socket_post_create(sock, family, type, protocol, kern: `1`);
1375	if (err)
1376	goto out_release;
1377
1378	out:
1379	*res = sock;
1380	return err;
1381	out_release:
1382	sock_release(sock);
1383	sock = NULL;
1384	goto out;
1385	}
1386	EXPORT_SYMBOL(sock_create_lite);
1387
1388	/ No kernel lock held - perfect /
1389	static __poll_t sock_poll(struct file file, poll_table wait)
1390	{
1391	struct socket *sock = file->private_data;
1392	const struct proto_ops *ops = READ_ONCE(sock->ops);
1393	__poll_t events = poll_requested_events(p: wait), flag = `0`;
1394
1395	if (!ops->poll)
1396	return `0`;
1397
1398	if (sk_can_busy_loop(sk: sock->sk)) {
1399	/ poll once if requested by the syscall /
1400	if (events & POLL_BUSY_LOOP)
1401	sk_busy_loop(sk: sock->sk, nonblock: `1`);
1402
1403	/ if this socket can poll_ll, tell the system call /
1404	flag = POLL_BUSY_LOOP;
1405	}
1406
1407	return ops->poll(file, sock, wait) \| flag;
1408	}
1409
1410	static int sock_mmap(struct file file, struct* vm_area_struct *vma)
1411	{
1412	struct socket *sock = file->private_data;
1413
1414	return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1415	}
1416
1417	static int sock_close(struct inode inode, struct* file *filp)
1418	{
1419	__sock_release(sock: SOCKET_I(inode), inode);
1420	return `0`;
1421	}
1422
1423	/*
1424	* Update the socket async list
1425	*
1426	* Fasync_list locking strategy.
1427	*
1428	* 1. fasync_list is modified only under process context socket lock
1429	* i.e. under semaphore.
1430	* 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1431	* or under socket lock
1432	*/
1433
1434	static int sock_fasync(int fd, struct file filp, int* on)
1435	{
1436	struct socket *sock = filp->private_data;
1437	struct sock *sk = sock->sk;
1438	struct socket_wq *wq = &sock->wq;
1439
1440	if (sk == NULL)
1441	return -EINVAL;
1442
1443	lock_sock(sk);
1444	fasync_helper(fd, filp, on, &wq->fasync_list);
1445
1446	if (!wq->fasync_list)
1447	sock_reset_flag(sk, flag: SOCK_FASYNC);
1448	else
1449	sock_set_flag(sk, flag: SOCK_FASYNC);
1450
1451	release_sock(sk);
1452	return `0`;
1453	}
1454
1455	/ This function may be called only under rcu_lock /
1456
1457	int sock_wake_async(struct socket_wq wq, int* how, int band)
1458	{
1459	if (!wq \|\| !wq->fasync_list)
1460	return -`1`;
1461
1462	switch (how) {
1463	case SOCK_WAKE_WAITD:
1464	if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1465	break;
1466	goto call_kill;
1467	case SOCK_WAKE_SPACE:
1468	if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, addr: &wq->flags))
1469	break;
1470	fallthrough;
1471	case SOCK_WAKE_IO:
1472	call_kill:
1473	kill_fasync(&wq->fasync_list, SIGIO, band);
1474	break;
1475	case SOCK_WAKE_URG:
1476	kill_fasync(&wq->fasync_list, SIGURG, band);
1477	}
1478
1479	return `0`;
1480	}
1481	EXPORT_SYMBOL(sock_wake_async);
1482
1483	/**
1484	* __sock_create - creates a socket
1485	* @net: net namespace
1486	* @family: protocol family (AF_INET, ...)
1487	* @type: communication type (SOCK_STREAM, ...)
1488	* @protocol: protocol (0, ...)
1489	* @res: new socket
1490	* @kern: boolean for kernel space sockets
1491	*
1492	* Creates a new socket and assigns it to @res, passing through LSM.
1493	* Returns 0 or an error. On failure @res is set to %NULL. @kern must
1494	* be set to true if the socket resides in kernel space.
1495	* This function internally uses GFP_KERNEL.
1496	*/
1497
1498	int __sock_create(struct net net, int* family, int type, int protocol,
1499	struct socket *res, int* kern)
1500	{
1501	int err;
1502	struct socket *sock;
1503	const struct net_proto_family *pf;
1504
1505	/*
1506	* Check protocol is in range
1507	*/
1508	if (family < `0` \|\| family >= NPROTO)
1509	return -EAFNOSUPPORT;
1510	if (type < `0` \|\| type >= SOCK_MAX)
1511	return -EINVAL;
1512
1513	/ Compatibility.*
1514
1515	This uglymoron is moved from INET layer to here to avoid
1516	deadlock in module load.
1517	*/
1518	if (family == PF_INET && type == SOCK_PACKET) {
1519	pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1520	current->comm);
1521	family = PF_PACKET;
1522	}
1523
1524	err = security_socket_create(family, type, protocol, kern);
1525	if (err)
1526	return err;
1527
1528	/*
1529	* Allocate the socket and allow the family to set things up. if
1530	* the protocol is 0, the family is instructed to select an appropriate
1531	* default.
1532	*/
1533	sock = sock_alloc();
1534	if (!sock) {
1535	net_warn_ratelimited("socket: no more sockets\n");
1536	return -ENFILE; / Not exactly a match, but its the*
1537	closest posix thing /*
1538	}
1539
1540	sock->type = type;
1541
1542	#ifdef CONFIG_MODULES
1543	/ Attempt to load a protocol module if the find failed.*
1544	*
1545	* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1546	* requested real, full-featured networking support upon configuration.
1547	* Otherwise module support will break!
1548	*/
1549	if (rcu_access_pointer(net_families[family]) == NULL)
1550	request_module("net-pf-%d", family);
1551	#endif
1552
1553	rcu_read_lock();
1554	pf = rcu_dereference(net_families[family]);
1555	err = -EAFNOSUPPORT;
1556	if (!pf)
1557	goto out_release;
1558
1559	/*
1560	* We will call the ->create function, that possibly is in a loadable
1561	* module, so we have to bump that loadable module refcnt first.
1562	*/
1563	if (!try_module_get(module: pf->owner))
1564	goto out_release;
1565
1566	/ Now protected by module ref count /
1567	rcu_read_unlock();
1568
1569	err = pf->create(net, sock, protocol, kern);
1570	if (err < `0`)
1571	goto out_module_put;
1572
1573	/*
1574	* Now to bump the refcnt of the [loadable] module that owns this
1575	* socket at sock_release time we decrement its refcnt.
1576	*/
1577	if (!try_module_get(module: sock->ops->owner))
1578	goto out_module_busy;
1579
1580	/*
1581	* Now that we're done with the ->create function, the [loadable]
1582	* module can have its refcnt decremented
1583	*/
1584	module_put(module: pf->owner);
1585	err = security_socket_post_create(sock, family, type, protocol, kern);
1586	if (err)
1587	goto out_sock_release;
1588	*res = sock;
1589
1590	return `0`;
1591
1592	out_module_busy:
1593	err = -EAFNOSUPPORT;
1594	out_module_put:
1595	sock->ops = NULL;
1596	module_put(module: pf->owner);
1597	out_sock_release:
1598	sock_release(sock);
1599	return err;
1600
1601	out_release:
1602	rcu_read_unlock();
1603	goto out_sock_release;
1604	}
1605	EXPORT_SYMBOL(__sock_create);
1606
1607	/**
1608	* sock_create - creates a socket
1609	* @family: protocol family (AF_INET, ...)
1610	* @type: communication type (SOCK_STREAM, ...)
1611	* @protocol: protocol (0, ...)
1612	* @res: new socket
1613	*
1614	* A wrapper around __sock_create().
1615	* Returns 0 or an error. This function internally uses GFP_KERNEL.
1616	*/
1617
1618	int sock_create(int family, int type, int protocol, struct socket **res)
1619	{
1620	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, `0`);
1621	}
1622	EXPORT_SYMBOL(sock_create);
1623
1624	/**
1625	* sock_create_kern - creates a socket (kernel space)
1626	* @net: net namespace
1627	* @family: protocol family (AF_INET, ...)
1628	* @type: communication type (SOCK_STREAM, ...)
1629	* @protocol: protocol (0, ...)
1630	* @res: new socket
1631	*
1632	* A wrapper around __sock_create().
1633	* Returns 0 or an error. This function internally uses GFP_KERNEL.
1634	*/
1635
1636	int sock_create_kern(struct net net, int* family, int type, int protocol, struct socket **res)
1637	{
1638	return __sock_create(net, family, type, protocol, res, `1`);
1639	}
1640	EXPORT_SYMBOL(sock_create_kern);
1641
1642	static struct socket __sys_socket_create(int* family, int type, int protocol)
1643	{
1644	struct socket *sock;
1645	int retval;
1646
1647	/ Check the SOCK_* constants for consistency. /
1648	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1649	BUILD_BUG_ON((SOCK_MAX \| SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1650	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1651	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1652
1653	if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC \| SOCK_NONBLOCK))
1654	return ERR_PTR(error: -EINVAL);
1655	type &= SOCK_TYPE_MASK;
1656
1657	retval = sock_create(family, type, protocol, &sock);
1658	if (retval < `0`)
1659	return ERR_PTR(error: retval);
1660
1661	return sock;
1662	}
1663
1664	struct file __sys_socket_file(int* family, int type, int protocol)
1665	{
1666	struct socket *sock;
1667	int flags;
1668
1669	sock = __sys_socket_create(family, type, protocol);
1670	if (IS_ERR(ptr: sock))
1671	return ERR_CAST(ptr: sock);
1672
1673	flags = type & ~SOCK_TYPE_MASK;
1674	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1675	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1676
1677	return sock_alloc_file(sock, flags, NULL);
1678	}
1679
1680	/ A hook for bpf progs to attach to and update socket protocol.*
1681	*
1682	* A static noinline declaration here could cause the compiler to
1683	* optimize away the function. A global noinline declaration will
1684	* keep the definition, but may optimize away the callsite.
1685	* Therefore, __weak is needed to ensure that the call is still
1686	* emitted, by telling the compiler that we don't know what the
1687	* function might eventually be.
1688	*
1689	* __diag_* below are needed to dismiss the missing prototype warning.
1690	*/
1691
1692	__diag_push();
1693	__diag_ignore_all("-Wmissing-prototypes",
1694	"A fmod_ret entry point for BPF programs");
1695
1696	__weak noinline int update_socket_protocol(int family, int type, int protocol)
1697	{
1698	return protocol;
1699	}
1700
1701	__diag_pop();
1702
1703	int __sys_socket(int family, int type, int protocol)
1704	{
1705	struct socket *sock;
1706	int flags;
1707
1708	sock = __sys_socket_create(family, type,
1709	protocol: update_socket_protocol(family, type, protocol));
1710	if (IS_ERR(ptr: sock))
1711	return PTR_ERR(ptr: sock);
1712
1713	flags = type & ~SOCK_TYPE_MASK;
1714	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1715	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1716
1717	return sock_map_fd(sock, flags: flags & (O_CLOEXEC \| O_NONBLOCK));
1718	}
1719
1720	SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1721	{
1722	return __sys_socket(family, type, protocol);
1723	}
1724
1725	/*
1726	* Create a pair of connected sockets.
1727	*/
1728
1729	int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1730	{
1731	struct socket sock1, sock2;
1732	int fd1, fd2, err;
1733	struct file newfile1, newfile2;
1734	int flags;
1735
1736	flags = type & ~SOCK_TYPE_MASK;
1737	if (flags & ~(SOCK_CLOEXEC \| SOCK_NONBLOCK))
1738	return -EINVAL;
1739	type &= SOCK_TYPE_MASK;
1740
1741	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1742	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1743
1744	/*
1745	* reserve descriptors and make sure we won't fail
1746	* to return them to userland.
1747	*/
1748	fd1 = get_unused_fd_flags(flags);
1749	if (unlikely(fd1 < `0`))
1750	return fd1;
1751
1752	fd2 = get_unused_fd_flags(flags);
1753	if (unlikely(fd2 < `0`)) {
1754	put_unused_fd(fd: fd1);
1755	return fd2;
1756	}
1757
1758	err = put_user(fd1, &usockvec[`0`]);
1759	if (err)
1760	goto out;
1761
1762	err = put_user(fd2, &usockvec[`1`]);
1763	if (err)
1764	goto out;
1765
1766	/*
1767	* Obtain the first socket and check if the underlying protocol
1768	* supports the socketpair call.
1769	*/
1770
1771	err = sock_create(family, type, protocol, &sock1);
1772	if (unlikely(err < `0`))
1773	goto out;
1774
1775	err = sock_create(family, type, protocol, &sock2);
1776	if (unlikely(err < `0`)) {
1777	sock_release(sock1);
1778	goto out;
1779	}
1780
1781	err = security_socket_socketpair(socka: sock1, sockb: sock2);
1782	if (unlikely(err)) {
1783	sock_release(sock2);
1784	sock_release(sock1);
1785	goto out;
1786	}
1787
1788	err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
1789	if (unlikely(err < `0`)) {
1790	sock_release(sock2);
1791	sock_release(sock1);
1792	goto out;
1793	}
1794
1795	newfile1 = sock_alloc_file(sock1, flags, NULL);
1796	if (IS_ERR(ptr: newfile1)) {
1797	err = PTR_ERR(ptr: newfile1);
1798	sock_release(sock2);
1799	goto out;
1800	}
1801
1802	newfile2 = sock_alloc_file(sock2, flags, NULL);
1803	if (IS_ERR(ptr: newfile2)) {
1804	err = PTR_ERR(ptr: newfile2);
1805	fput(newfile1);
1806	goto out;
1807	}
1808
1809	audit_fd_pair(fd1, fd2);
1810
1811	fd_install(fd: fd1, file: newfile1);
1812	fd_install(fd: fd2, file: newfile2);
1813	return `0`;
1814
1815	out:
1816	put_unused_fd(fd: fd2);
1817	put_unused_fd(fd: fd1);
1818	return err;
1819	}
1820
1821	SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1822	int __user *, usockvec)
1823	{
1824	return __sys_socketpair(family, type, protocol, usockvec);
1825	}
1826
1827	/*
1828	* Bind a name to a socket. Nothing much to do here since it's
1829	* the protocol's responsibility to handle the local address.
1830	*
1831	* We move the socket address to kernel space before we call
1832	* the protocol layer (having also checked the address is ok).
1833	*/
1834
1835	int __sys_bind(int fd, struct sockaddr __user umyaddr, int* addrlen)
1836	{
1837	struct socket *sock;
1838	struct sockaddr_storage address;
1839	int err, fput_needed;
1840
1841	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
1842	if (sock) {
1843	err = move_addr_to_kernel(uaddr: umyaddr, ulen: addrlen, kaddr: &address);
1844	if (!err) {
1845	err = security_socket_bind(sock,
1846	address: (struct sockaddr *)&address,
1847	addrlen);
1848	if (!err)
1849	err = READ_ONCE(sock->ops)->bind(sock,
1850	(struct sockaddr *)
1851	&address, addrlen);
1852	}
1853	fput_light(file: sock->file, fput_needed);
1854	}
1855	return err;
1856	}
1857
1858	SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user , umyaddr, int*, addrlen)
1859	{
1860	return __sys_bind(fd, umyaddr, addrlen);
1861	}
1862
1863	/*
1864	* Perform a listen. Basically, we allow the protocol to do anything
1865	* necessary for a listen, and if that works, we mark the socket as
1866	* ready for listening.
1867	*/
1868
1869	int __sys_listen(int fd, int backlog)
1870	{
1871	struct socket *sock;
1872	int err, fput_needed;
1873	int somaxconn;
1874
1875	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
1876	if (sock) {
1877	somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
1878	if ((unsigned int)backlog > somaxconn)
1879	backlog = somaxconn;
1880
1881	err = security_socket_listen(sock, backlog);
1882	if (!err)
1883	err = READ_ONCE(sock->ops)->listen(sock, backlog);
1884
1885	fput_light(file: sock->file, fput_needed);
1886	}
1887	return err;
1888	}
1889
1890	SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1891	{
1892	return __sys_listen(fd, backlog);
1893	}
1894
1895	struct file do_accept(struct* file file, unsigned* file_flags,
1896	struct sockaddr __user *upeer_sockaddr,
1897	int __user upeer_addrlen, int* flags)
1898	{
1899	struct socket sock, newsock;
1900	struct file *newfile;
1901	int err, len;
1902	struct sockaddr_storage address;
1903	const struct proto_ops *ops;
1904
1905	sock = sock_from_file(file);
1906	if (!sock)
1907	return ERR_PTR(error: -ENOTSOCK);
1908
1909	newsock = sock_alloc();
1910	if (!newsock)
1911	return ERR_PTR(error: -ENFILE);
1912	ops = READ_ONCE(sock->ops);
1913
1914	newsock->type = sock->type;
1915	newsock->ops = ops;
1916
1917	/*
1918	* We don't need try_module_get here, as the listening socket (sock)
1919	* has the protocol module (sock->ops->owner) held.
1920	*/
1921	__module_get(module: ops->owner);
1922
1923	newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1924	if (IS_ERR(ptr: newfile))
1925	return newfile;
1926
1927	err = security_socket_accept(sock, newsock);
1928	if (err)
1929	goto out_fd;
1930
1931	err = ops->accept(sock, newsock, sock->file->f_flags \| file_flags,
1932	false);
1933	if (err < `0`)
1934	goto out_fd;
1935
1936	if (upeer_sockaddr) {
1937	len = ops->getname(newsock, (struct sockaddr *)&address, `2`);
1938	if (len < `0`) {
1939	err = -ECONNABORTED;
1940	goto out_fd;
1941	}
1942	err = move_addr_to_user(kaddr: &address,
1943	klen: len, uaddr: upeer_sockaddr, ulen: upeer_addrlen);
1944	if (err < `0`)
1945	goto out_fd;
1946	}
1947
1948	/ File flags are not inherited via accept() unlike another OSes. /
1949	return newfile;
1950	out_fd:
1951	fput(newfile);
1952	return ERR_PTR(error: err);
1953	}
1954
1955	static int __sys_accept4_file(struct file file, struct* sockaddr __user *upeer_sockaddr,
1956	int __user upeer_addrlen, int* flags)
1957	{
1958	struct file *newfile;
1959	int newfd;
1960
1961	if (flags & ~(SOCK_CLOEXEC \| SOCK_NONBLOCK))
1962	return -EINVAL;
1963
1964	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1965	flags = (flags & ~SOCK_NONBLOCK) \| O_NONBLOCK;
1966
1967	newfd = get_unused_fd_flags(flags);
1968	if (unlikely(newfd < `0`))
1969	return newfd;
1970
1971	newfile = do_accept(file, file_flags: `0`, upeer_sockaddr, upeer_addrlen,
1972	flags);
1973	if (IS_ERR(ptr: newfile)) {
1974	put_unused_fd(fd: newfd);
1975	return PTR_ERR(ptr: newfile);
1976	}
1977	fd_install(fd: newfd, file: newfile);
1978	return newfd;
1979	}
1980
1981	/*
1982	* For accept, we attempt to create a new socket, set up the link
1983	* with the client, wake up the client, then return the new
1984	* connected fd. We collect the address of the connector in kernel
1985	* space and move it to user at the very end. This is unclean because
1986	* we open the socket then return an error.
1987	*
1988	* 1003.1g adds the ability to recvmsg() to query connection pending
1989	* status to recvmsg. We need to add that support in a way thats
1990	* clean when we restructure accept also.
1991	*/
1992
1993	int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1994	int __user upeer_addrlen, int* flags)
1995	{
1996	int ret = -EBADF;
1997	struct fd f;
1998
1999	f = fdget(fd);
2000	if (f.file) {
2001	ret = __sys_accept4_file(file: f.file, upeer_sockaddr,
2002	upeer_addrlen, flags);
2003	fdput(fd: f);
2004	}
2005
2006	return ret;
2007	}
2008
2009	SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
2010	int __user , upeer_addrlen, int*, flags)
2011	{
2012	return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
2013	}
2014
2015	SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
2016	int __user *, upeer_addrlen)
2017	{
2018	return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags: `0`);
2019	}
2020
2021	/*
2022	* Attempt to connect to a socket with the server address. The address
2023	* is in user space so we verify it is OK and move it to kernel space.
2024	*
2025	* For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2026	* break bindings
2027	*
2028	* NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2029	* other SEQPACKET protocols that take time to connect() as it doesn't
2030	* include the -EINPROGRESS status for such sockets.
2031	*/
2032
2033	int __sys_connect_file(struct file file, struct* sockaddr_storage *address,
2034	int addrlen, int file_flags)
2035	{
2036	struct socket *sock;
2037	int err;
2038
2039	sock = sock_from_file(file);
2040	if (!sock) {
2041	err = -ENOTSOCK;
2042	goto out;
2043	}
2044
2045	err =
2046	security_socket_connect(sock, address: (struct sockaddr *)address, addrlen);
2047	if (err)
2048	goto out;
2049
2050	err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2051	addrlen, sock->file->f_flags \| file_flags);
2052	out:
2053	return err;
2054	}
2055
2056	int __sys_connect(int fd, struct sockaddr __user uservaddr, int* addrlen)
2057	{
2058	int ret = -EBADF;
2059	struct fd f;
2060
2061	f = fdget(fd);
2062	if (f.file) {
2063	struct sockaddr_storage address;
2064
2065	ret = move_addr_to_kernel(uaddr: uservaddr, ulen: addrlen, kaddr: &address);
2066	if (!ret)
2067	ret = __sys_connect_file(file: f.file, address: &address, addrlen, file_flags: `0`);
2068	fdput(fd: f);
2069	}
2070
2071	return ret;
2072	}
2073
2074	SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2075	int, addrlen)
2076	{
2077	return __sys_connect(fd, uservaddr, addrlen);
2078	}
2079
2080	/*
2081	* Get the local address ('name') of a socket object. Move the obtained
2082	* name to user space.
2083	*/
2084
2085	int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2086	int __user *usockaddr_len)
2087	{
2088	struct socket *sock;
2089	struct sockaddr_storage address;
2090	int err, fput_needed;
2091
2092	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2093	if (!sock)
2094	goto out;
2095
2096	err = security_socket_getsockname(sock);
2097	if (err)
2098	goto out_put;
2099
2100	err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, `0`);
2101	if (err < `0`)
2102	goto out_put;
2103	/ "err" is actually length in this case /
2104	err = move_addr_to_user(kaddr: &address, klen: err, uaddr: usockaddr, ulen: usockaddr_len);
2105
2106	out_put:
2107	fput_light(file: sock->file, fput_needed);
2108	out:
2109	return err;
2110	}
2111
2112	SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2113	int __user *, usockaddr_len)
2114	{
2115	return __sys_getsockname(fd, usockaddr, usockaddr_len);
2116	}
2117
2118	/*
2119	* Get the remote address ('name') of a socket object. Move the obtained
2120	* name to user space.
2121	*/
2122
2123	int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2124	int __user *usockaddr_len)
2125	{
2126	struct socket *sock;
2127	struct sockaddr_storage address;
2128	int err, fput_needed;
2129
2130	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2131	if (sock != NULL) {
2132	const struct proto_ops *ops = READ_ONCE(sock->ops);
2133
2134	err = security_socket_getpeername(sock);
2135	if (err) {
2136	fput_light(file: sock->file, fput_needed);
2137	return err;
2138	}
2139
2140	err = ops->getname(sock, (struct sockaddr *)&address, `1`);
2141	if (err >= `0`)
2142	/ "err" is actually length in this case /
2143	err = move_addr_to_user(kaddr: &address, klen: err, uaddr: usockaddr,
2144	ulen: usockaddr_len);
2145	fput_light(file: sock->file, fput_needed);
2146	}
2147	return err;
2148	}
2149
2150	SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2151	int __user *, usockaddr_len)
2152	{
2153	return __sys_getpeername(fd, usockaddr, usockaddr_len);
2154	}
2155
2156	/*
2157	* Send a datagram to a given address. We move the address into kernel
2158	* space and check the user space data area is readable before invoking
2159	* the protocol.
2160	*/
2161	int __sys_sendto(int fd, void __user buff, size_t len, unsigned* int flags,
2162	struct sockaddr __user addr, int* addr_len)
2163	{
2164	struct socket *sock;
2165	struct sockaddr_storage address;
2166	int err;
2167	struct msghdr msg;
2168	struct iovec iov;
2169	int fput_needed;
2170
2171	err = import_single_range(ITER_SOURCE, buf: buff, len, iov: &iov, i: &msg.msg_iter);
2172	if (unlikely(err))
2173	return err;
2174	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2175	if (!sock)
2176	goto out;
2177
2178	msg.msg_name = NULL;
2179	msg.msg_control = NULL;
2180	msg.msg_controllen = `0`;
2181	msg.msg_namelen = `0`;
2182	msg.msg_ubuf = NULL;
2183	if (addr) {
2184	err = move_addr_to_kernel(uaddr: addr, ulen: addr_len, kaddr: &address);
2185	if (err < `0`)
2186	goto out_put;
2187	msg.msg_name = (struct sockaddr *)&address;
2188	msg.msg_namelen = addr_len;
2189	}
2190	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
2191	if (sock->file->f_flags & O_NONBLOCK)
2192	flags \|= MSG_DONTWAIT;
2193	msg.msg_flags = flags;
2194	err = __sock_sendmsg(sock, msg: &msg);
2195
2196	out_put:
2197	fput_light(file: sock->file, fput_needed);
2198	out:
2199	return err;
2200	}
2201
2202	SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2203	unsigned int, flags, struct sockaddr __user *, addr,
2204	int, addr_len)
2205	{
2206	return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2207	}
2208
2209	/*
2210	* Send a datagram down a socket.
2211	*/
2212
2213	SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
2214	unsigned int, flags)
2215	{
2216	return __sys_sendto(fd, buff, len, flags, NULL, addr_len: `0`);
2217	}
2218
2219	/*
2220	* Receive a frame from the socket and optionally record the address of the
2221	* sender. We verify the buffers are writable and if needed move the
2222	* sender address from kernel to user space.
2223	*/
2224	int __sys_recvfrom(int fd, void __user ubuf, size_t size, unsigned* int flags,
2225	struct sockaddr __user addr, int* __user *addr_len)
2226	{
2227	struct sockaddr_storage address;
2228	struct msghdr msg = {
2229	/ Save some cycles and don't copy the address if not needed /
2230	.msg_name = addr ? (struct sockaddr *)&address : NULL,
2231	};
2232	struct socket *sock;
2233	struct iovec iov;
2234	int err, err2;
2235	int fput_needed;
2236
2237	err = import_single_range(ITER_DEST, buf: ubuf, len: size, iov: &iov, i: &msg.msg_iter);
2238	if (unlikely(err))
2239	return err;
2240	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2241	if (!sock)
2242	goto out;
2243
2244	if (sock->file->f_flags & O_NONBLOCK)
2245	flags \|= MSG_DONTWAIT;
2246	err = sock_recvmsg(sock, &msg, flags);
2247
2248	if (err >= `0` && addr != NULL) {
2249	err2 = move_addr_to_user(kaddr: &address,
2250	klen: msg.msg_namelen, uaddr: addr, ulen: addr_len);
2251	if (err2 < `0`)
2252	err = err2;
2253	}
2254
2255	fput_light(file: sock->file, fput_needed);
2256	out:
2257	return err;
2258	}
2259
2260	SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2261	unsigned int, flags, struct sockaddr __user *, addr,
2262	int __user *, addr_len)
2263	{
2264	return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2265	}
2266
2267	/*
2268	* Receive a datagram from a socket.
2269	*/
2270
2271	SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2272	unsigned int, flags)
2273	{
2274	return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
2275	}
2276
2277	static bool sock_use_custom_sol_socket(const struct socket *sock)
2278	{
2279	return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
2280	}
2281
2282	int do_sock_setsockopt(struct socket sock, bool compat, int* level,
2283	int optname, sockptr_t optval, int optlen)
2284	{
2285	const struct proto_ops *ops;
2286	char *kernel_optval = NULL;
2287	int err;
2288
2289	if (optlen < `0`)
2290	return -EINVAL;
2291
2292	err = security_socket_setsockopt(sock, level, optname);
2293	if (err)
2294	goto out_put;
2295
2296	if (!compat)
2297	err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
2298	optval, &optlen,
2299	&kernel_optval);
2300	if (err < `0`)
2301	goto out_put;
2302	if (err > `0`) {
2303	err = `0`;
2304	goto out_put;
2305	}
2306
2307	if (kernel_optval)
2308	optval = KERNEL_SOCKPTR(p: kernel_optval);
2309	ops = READ_ONCE(sock->ops);
2310	if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
2311	err = sock_setsockopt(sock, level, op: optname, optval, optlen);
2312	else if (unlikely(!ops->setsockopt))
2313	err = -EOPNOTSUPP;
2314	else
2315	err = ops->setsockopt(sock, level, optname, optval,
2316	optlen);
2317	kfree(objp: kernel_optval);
2318	out_put:
2319	return err;
2320	}
2321	EXPORT_SYMBOL(do_sock_setsockopt);
2322
2323	/ Set a socket option. Because we don't know the option lengths we have*
2324	* to pass the user mode parameter for the protocols to sort out.
2325	*/
2326	int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2327	int optlen)
2328	{
2329	sockptr_t optval = USER_SOCKPTR(p: user_optval);
2330	bool compat = in_compat_syscall();
2331	int err, fput_needed;
2332	struct socket *sock;
2333
2334	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2335	if (!sock)
2336	return err;
2337
2338	err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
2339
2340	fput_light(file: sock->file, fput_needed);
2341	return err;
2342	}
2343
2344	SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2345	char __user , optval, int*, optlen)
2346	{
2347	return __sys_setsockopt(fd, level, optname, user_optval: optval, optlen);
2348	}
2349
2350	INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2351	int optname));
2352
2353	int do_sock_getsockopt(struct socket sock, bool compat, int* level,
2354	int optname, sockptr_t optval, sockptr_t optlen)
2355	{
2356	int max_optlen __maybe_unused;
2357	const struct proto_ops *ops;
2358	int err;
2359
2360	err = security_socket_getsockopt(sock, level, optname);
2361	if (err)
2362	return err;
2363
2364	if (!compat)
2365	max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2366
2367	ops = READ_ONCE(sock->ops);
2368	if (level == SOL_SOCKET) {
2369	err = sk_getsockopt(sk: sock->sk, level, optname, optval, optlen);
2370	} else if (unlikely(!ops->getsockopt)) {
2371	err = -EOPNOTSUPP;
2372	} else {
2373	if (WARN_ONCE(optval.is_kernel \|\| optlen.is_kernel,
2374	"Invalid argument type"))
2375	return -EOPNOTSUPP;
2376
2377	err = ops->getsockopt(sock, level, optname, optval.user,
2378	optlen.user);
2379	}
2380
2381	if (!compat)
2382	err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2383	optval, optlen, max_optlen,
2384	err);
2385
2386	return err;
2387	}
2388	EXPORT_SYMBOL(do_sock_getsockopt);
2389
2390	/*
2391	* Get a socket option. Because we don't know the option lengths we have
2392	* to pass a user mode parameter for the protocols to sort out.
2393	*/
2394	int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2395	int __user *optlen)
2396	{
2397	int err, fput_needed;
2398	struct socket *sock;
2399	bool compat;
2400
2401	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2402	if (!sock)
2403	return err;
2404
2405	compat = in_compat_syscall();
2406	err = do_sock_getsockopt(sock, compat, level, optname,
2407	USER_SOCKPTR(p: optval), USER_SOCKPTR(p: optlen));
2408
2409	fput_light(file: sock->file, fput_needed);
2410	return err;
2411	}
2412
2413	SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2414	char __user , optval, int* __user *, optlen)
2415	{
2416	return __sys_getsockopt(fd, level, optname, optval, optlen);
2417	}
2418
2419	/*
2420	* Shutdown a socket.
2421	*/
2422
2423	int __sys_shutdown_sock(struct socket sock, int* how)
2424	{
2425	int err;
2426
2427	err = security_socket_shutdown(sock, how);
2428	if (!err)
2429	err = READ_ONCE(sock->ops)->shutdown(sock, how);
2430
2431	return err;
2432	}
2433
2434	int __sys_shutdown(int fd, int how)
2435	{
2436	int err, fput_needed;
2437	struct socket *sock;
2438
2439	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2440	if (sock != NULL) {
2441	err = __sys_shutdown_sock(sock, how);
2442	fput_light(file: sock->file, fput_needed);
2443	}
2444	return err;
2445	}
2446
2447	SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2448	{
2449	return __sys_shutdown(fd, how);
2450	}
2451
2452	/ A couple of helpful macros for getting the address of the 32/64 bit*
2453	* fields which are the same type (int / unsigned) on our platforms.
2454	*/
2455	#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2456	#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2457	#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2458
2459	struct used_address {
2460	struct sockaddr_storage name;
2461	unsigned int name_len;
2462	};
2463
2464	int __copy_msghdr(struct msghdr *kmsg,
2465	struct user_msghdr *msg,
2466	struct sockaddr __user **save_addr)
2467	{
2468	ssize_t err;
2469
2470	kmsg->msg_control_is_user = true;
2471	kmsg->msg_get_inq = `0`;
2472	kmsg->msg_control_user = msg->msg_control;
2473	kmsg->msg_controllen = msg->msg_controllen;
2474	kmsg->msg_flags = msg->msg_flags;
2475
2476	kmsg->msg_namelen = msg->msg_namelen;
2477	if (!msg->msg_name)
2478	kmsg->msg_namelen = `0`;
2479
2480	if (kmsg->msg_namelen < `0`)
2481	return -EINVAL;
2482
2483	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
2484	kmsg->msg_namelen = sizeof(struct sockaddr_storage);
2485
2486	if (save_addr)
2487	*save_addr = msg->msg_name;
2488
2489	if (msg->msg_name && kmsg->msg_namelen) {
2490	if (!save_addr) {
2491	err = move_addr_to_kernel(uaddr: msg->msg_name,
2492	ulen: kmsg->msg_namelen,
2493	kaddr: kmsg->msg_name);
2494	if (err < `0`)
2495	return err;
2496	}
2497	} else {
2498	kmsg->msg_name = NULL;
2499	kmsg->msg_namelen = `0`;
2500	}
2501
2502	if (msg->msg_iovlen > UIO_MAXIOV)
2503	return -EMSGSIZE;
2504
2505	kmsg->msg_iocb = NULL;
2506	kmsg->msg_ubuf = NULL;
2507	return `0`;
2508	}
2509
2510	static int copy_msghdr_from_user(struct msghdr *kmsg,
2511	struct user_msghdr __user *umsg,
2512	struct sockaddr __user **save_addr,
2513	struct iovec **iov)
2514	{
2515	struct user_msghdr msg;
2516	ssize_t err;
2517
2518	if (copy_from_user(to: &msg, from: umsg, n: sizeof(*umsg)))
2519	return -EFAULT;
2520
2521	err = __copy_msghdr(kmsg, msg: &msg, save_addr);
2522	if (err)
2523	return err;
2524
2525	err = import_iovec(type: save_addr ? ITER_DEST : ITER_SOURCE,
2526	uvec: msg.msg_iov, nr_segs: msg.msg_iovlen,
2527	UIO_FASTIOV, iovp: iov, i: &kmsg->msg_iter);
2528	return err < `0` ? err : `0`;
2529	}
2530
2531	static int ____sys_sendmsg(struct socket sock, struct* msghdr *msg_sys,
2532	unsigned int flags, struct used_address *used_address,
2533	unsigned int allowed_msghdr_flags)
2534	{
2535	unsigned char ctl[sizeof(struct cmsghdr) + `20`]
2536	__aligned(sizeof(__kernel_size_t));
2537	/ 20 is size of ipv6_pktinfo /
2538	unsigned char *ctl_buf = ctl;
2539	int ctl_len;
2540	ssize_t err;
2541
2542	err = -ENOBUFS;
2543
2544	if (msg_sys->msg_controllen > INT_MAX)
2545	goto out;
2546	flags \|= (msg_sys->msg_flags & allowed_msghdr_flags);
2547	ctl_len = msg_sys->msg_controllen;
2548	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
2549	err =
2550	cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
2551	sizeof(ctl));
2552	if (err)
2553	goto out;
2554	ctl_buf = msg_sys->msg_control;
2555	ctl_len = msg_sys->msg_controllen;
2556	} else if (ctl_len) {
2557	BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2558	CMSG_ALIGN(sizeof(struct cmsghdr)));
2559	if (ctl_len > sizeof(ctl)) {
2560	ctl_buf = sock_kmalloc(sk: sock->sk, size: ctl_len, GFP_KERNEL);
2561	if (ctl_buf == NULL)
2562	goto out;
2563	}
2564	err = -EFAULT;
2565	if (copy_from_user(to: ctl_buf, from: msg_sys->msg_control_user, n: ctl_len))
2566	goto out_freectl;
2567	msg_sys->msg_control = ctl_buf;
2568	msg_sys->msg_control_is_user = false;
2569	}
2570	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
2571	msg_sys->msg_flags = flags;
2572
2573	if (sock->file->f_flags & O_NONBLOCK)
2574	msg_sys->msg_flags \|= MSG_DONTWAIT;
2575	/*
2576	* If this is sendmmsg() and current destination address is same as
2577	* previously succeeded address, omit asking LSM's decision.
2578	* used_address->name_len is initialized to UINT_MAX so that the first
2579	* destination address never matches.
2580	*/
2581	if (used_address && msg_sys->msg_name &&
2582	used_address->name_len == msg_sys->msg_namelen &&
2583	!memcmp(p: &used_address->name, q: msg_sys->msg_name,
2584	size: used_address->name_len)) {
2585	err = sock_sendmsg_nosec(sock, msg: msg_sys);
2586	goto out_freectl;
2587	}
2588	err = __sock_sendmsg(sock, msg: msg_sys);
2589	/*
2590	* If this is sendmmsg() and sending to current destination address was
2591	* successful, remember it.
2592	*/
2593	if (used_address && err >= `0`) {
2594	used_address->name_len = msg_sys->msg_namelen;
2595	if (msg_sys->msg_name)
2596	memcpy(&used_address->name, msg_sys->msg_name,
2597	used_address->name_len);
2598	}
2599
2600	out_freectl:
2601	if (ctl_buf != ctl)
2602	sock_kfree_s(sk: sock->sk, mem: ctl_buf, size: ctl_len);
2603	out:
2604	return err;
2605	}
2606
2607	int sendmsg_copy_msghdr(struct msghdr *msg,
2608	struct user_msghdr __user umsg, unsigned* flags,
2609	struct iovec **iov)
2610	{
2611	int err;
2612
2613	if (flags & MSG_CMSG_COMPAT) {
2614	struct compat_msghdr __user *msg_compat;
2615
2616	msg_compat = (struct compat_msghdr __user *) umsg;
2617	err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2618	} else {
2619	err = copy_msghdr_from_user(kmsg: msg, umsg, NULL, iov);
2620	}
2621	if (err < `0`)
2622	return err;
2623
2624	return `0`;
2625	}
2626
2627	static int ___sys_sendmsg(struct socket sock, struct* user_msghdr __user *msg,
2628	struct msghdr msg_sys, unsigned* int flags,
2629	struct used_address *used_address,
2630	unsigned int allowed_msghdr_flags)
2631	{
2632	struct sockaddr_storage address;
2633	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2634	ssize_t err;
2635
2636	msg_sys->msg_name = &address;
2637
2638	err = sendmsg_copy_msghdr(msg: msg_sys, umsg: msg, flags, iov: &iov);
2639	if (err < `0`)
2640	return err;
2641
2642	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2643	allowed_msghdr_flags);
2644	kfree(objp: iov);
2645	return err;
2646	}
2647
2648	/*
2649	* BSD sendmsg interface
2650	*/
2651	long __sys_sendmsg_sock(struct socket sock, struct* msghdr *msg,
2652	unsigned int flags)
2653	{
2654	return ____sys_sendmsg(sock, msg_sys: msg, flags, NULL, allowed_msghdr_flags: `0`);
2655	}
2656
2657	long __sys_sendmsg(int fd, struct user_msghdr __user msg, unsigned* int flags,
2658	bool forbid_cmsg_compat)
2659	{
2660	int fput_needed, err;
2661	struct msghdr msg_sys;
2662	struct socket *sock;
2663
2664	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2665	return -EINVAL;
2666
2667	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2668	if (!sock)
2669	goto out;
2670
2671	err = ___sys_sendmsg(sock, msg, msg_sys: &msg_sys, flags, NULL, allowed_msghdr_flags: `0`);
2672
2673	fput_light(file: sock->file, fput_needed);
2674	out:
2675	return err;
2676	}
2677
2678	SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user , msg, unsigned* int, flags)
2679	{
2680	return __sys_sendmsg(fd, msg, flags, forbid_cmsg_compat: true);
2681	}
2682
2683	/*
2684	* Linux sendmmsg interface
2685	*/
2686
2687	int __sys_sendmmsg(int fd, struct mmsghdr __user mmsg, unsigned* int vlen,
2688	unsigned int flags, bool forbid_cmsg_compat)
2689	{
2690	int fput_needed, err, datagrams;
2691	struct socket *sock;
2692	struct mmsghdr __user *entry;
2693	struct compat_mmsghdr __user *compat_entry;
2694	struct msghdr msg_sys;
2695	struct used_address used_address;
2696	unsigned int oflags = flags;
2697
2698	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2699	return -EINVAL;
2700
2701	if (vlen > UIO_MAXIOV)
2702	vlen = UIO_MAXIOV;
2703
2704	datagrams = `0`;
2705
2706	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2707	if (!sock)
2708	return err;
2709
2710	used_address.name_len = UINT_MAX;
2711	entry = mmsg;
2712	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2713	err = `0`;
2714	flags \|= MSG_BATCH;
2715
2716	while (datagrams < vlen) {
2717	if (datagrams == vlen - `1`)
2718	flags = oflags;
2719
2720	if (MSG_CMSG_COMPAT & flags) {
2721	err = ___sys_sendmsg(sock, msg: (struct user_msghdr __user *)compat_entry,
2722	msg_sys: &msg_sys, flags, used_address: &used_address, MSG_EOR);
2723	if (err < `0`)
2724	break;
2725	err = __put_user(err, &compat_entry->msg_len);
2726	++compat_entry;
2727	} else {
2728	err = ___sys_sendmsg(sock,
2729	msg: (struct user_msghdr __user *)entry,
2730	msg_sys: &msg_sys, flags, used_address: &used_address, MSG_EOR);
2731	if (err < `0`)
2732	break;
2733	err = put_user(err, &entry->msg_len);
2734	++entry;
2735	}
2736
2737	if (err)
2738	break;
2739	++datagrams;
2740	if (msg_data_left(msg: &msg_sys))
2741	break;
2742	cond_resched();
2743	}
2744
2745	fput_light(file: sock->file, fput_needed);
2746
2747	/ We only return an error if no datagrams were able to be sent /
2748	if (datagrams != `0`)
2749	return datagrams;
2750
2751	return err;
2752	}
2753
2754	SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2755	unsigned int, vlen, unsigned int, flags)
2756	{
2757	return __sys_sendmmsg(fd, mmsg, vlen, flags, forbid_cmsg_compat: true);
2758	}
2759
2760	int recvmsg_copy_msghdr(struct msghdr *msg,
2761	struct user_msghdr __user umsg, unsigned* flags,
2762	struct sockaddr __user **uaddr,
2763	struct iovec **iov)
2764	{
2765	ssize_t err;
2766
2767	if (MSG_CMSG_COMPAT & flags) {
2768	struct compat_msghdr __user *msg_compat;
2769
2770	msg_compat = (struct compat_msghdr __user *) umsg;
2771	err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2772	} else {
2773	err = copy_msghdr_from_user(kmsg: msg, umsg, save_addr: uaddr, iov);
2774	}
2775	if (err < `0`)
2776	return err;
2777
2778	return `0`;
2779	}
2780
2781	static int ____sys_recvmsg(struct socket sock, struct* msghdr *msg_sys,
2782	struct user_msghdr __user *msg,
2783	struct sockaddr __user *uaddr,
2784	unsigned int flags, int nosec)
2785	{
2786	struct compat_msghdr __user *msg_compat =
2787	(struct compat_msghdr __user *) msg;
2788	int __user *uaddr_len = COMPAT_NAMELEN(msg);
2789	struct sockaddr_storage addr;
2790	unsigned long cmsg_ptr;
2791	int len;
2792	ssize_t err;
2793
2794	msg_sys->msg_name = &addr;
2795	cmsg_ptr = (unsigned long)msg_sys->msg_control;
2796	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC\|MSG_CMSG_COMPAT);
2797
2798	/ We assume all kernel code knows the size of sockaddr_storage /
2799	msg_sys->msg_namelen = `0`;
2800
2801	if (sock->file->f_flags & O_NONBLOCK)
2802	flags \|= MSG_DONTWAIT;
2803
2804	if (unlikely(nosec))
2805	err = sock_recvmsg_nosec(sock, msg: msg_sys, flags);
2806	else
2807	err = sock_recvmsg(sock, msg_sys, flags);
2808
2809	if (err < `0`)
2810	goto out;
2811	len = err;
2812
2813	if (uaddr != NULL) {
2814	err = move_addr_to_user(kaddr: &addr,
2815	klen: msg_sys->msg_namelen, uaddr,
2816	ulen: uaddr_len);
2817	if (err < `0`)
2818	goto out;
2819	}
2820	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2821	COMPAT_FLAGS(msg));
2822	if (err)
2823	goto out;
2824	if (MSG_CMSG_COMPAT & flags)
2825	err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2826	&msg_compat->msg_controllen);
2827	else
2828	err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2829	&msg->msg_controllen);
2830	if (err)
2831	goto out;
2832	err = len;
2833	out:
2834	return err;
2835	}
2836
2837	static int ___sys_recvmsg(struct socket sock, struct* user_msghdr __user *msg,
2838	struct msghdr msg_sys, unsigned* int flags, int nosec)
2839	{
2840	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2841	/ user mode address pointers /
2842	struct sockaddr __user *uaddr;
2843	ssize_t err;
2844
2845	err = recvmsg_copy_msghdr(msg: msg_sys, umsg: msg, flags, uaddr: &uaddr, iov: &iov);
2846	if (err < `0`)
2847	return err;
2848
2849	err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
2850	kfree(objp: iov);
2851	return err;
2852	}
2853
2854	/*
2855	* BSD recvmsg interface
2856	*/
2857
2858	long __sys_recvmsg_sock(struct socket sock, struct* msghdr *msg,
2859	struct user_msghdr __user *umsg,
2860	struct sockaddr __user uaddr, unsigned* int flags)
2861	{
2862	return ____sys_recvmsg(sock, msg_sys: msg, msg: umsg, uaddr, flags, nosec: `0`);
2863	}
2864
2865	long __sys_recvmsg(int fd, struct user_msghdr __user msg, unsigned* int flags,
2866	bool forbid_cmsg_compat)
2867	{
2868	int fput_needed, err;
2869	struct msghdr msg_sys;
2870	struct socket *sock;
2871
2872	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2873	return -EINVAL;
2874
2875	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2876	if (!sock)
2877	goto out;
2878
2879	err = ___sys_recvmsg(sock, msg, msg_sys: &msg_sys, flags, nosec: `0`);
2880
2881	fput_light(file: sock->file, fput_needed);
2882	out:
2883	return err;
2884	}
2885
2886	SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2887	unsigned int, flags)
2888	{
2889	return __sys_recvmsg(fd, msg, flags, forbid_cmsg_compat: true);
2890	}
2891
2892	/*
2893	* Linux recvmmsg interface
2894	*/
2895
2896	static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2897	unsigned int vlen, unsigned int flags,
2898	struct timespec64 *timeout)
2899	{
2900	int fput_needed, err, datagrams;
2901	struct socket *sock;
2902	struct mmsghdr __user *entry;
2903	struct compat_mmsghdr __user *compat_entry;
2904	struct msghdr msg_sys;
2905	struct timespec64 end_time;
2906	struct timespec64 timeout64;
2907
2908	if (timeout &&
2909	poll_select_set_timeout(to: &end_time, sec: timeout->tv_sec,
2910	nsec: timeout->tv_nsec))
2911	return -EINVAL;
2912
2913	datagrams = `0`;
2914
2915	sock = sockfd_lookup_light(fd, err: &err, fput_needed: &fput_needed);
2916	if (!sock)
2917	return err;
2918
2919	if (likely(!(flags & MSG_ERRQUEUE))) {
2920	err = sock_error(sk: sock->sk);
2921	if (err) {
2922	datagrams = err;
2923	goto out_put;
2924	}
2925	}
2926
2927	entry = mmsg;
2928	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2929
2930	while (datagrams < vlen) {
2931	/*
2932	* No need to ask LSM for more than the first datagram.
2933	*/
2934	if (MSG_CMSG_COMPAT & flags) {
2935	err = ___sys_recvmsg(sock, msg: (struct user_msghdr __user *)compat_entry,
2936	msg_sys: &msg_sys, flags: flags & ~MSG_WAITFORONE,
2937	nosec: datagrams);
2938	if (err < `0`)
2939	break;
2940	err = __put_user(err, &compat_entry->msg_len);
2941	++compat_entry;
2942	} else {
2943	err = ___sys_recvmsg(sock,
2944	msg: (struct user_msghdr __user *)entry,
2945	msg_sys: &msg_sys, flags: flags & ~MSG_WAITFORONE,
2946	nosec: datagrams);
2947	if (err < `0`)
2948	break;
2949	err = put_user(err, &entry->msg_len);
2950	++entry;
2951	}
2952
2953	if (err)
2954	break;
2955	++datagrams;
2956
2957	/ MSG_WAITFORONE turns on MSG_DONTWAIT after one packet /
2958	if (flags & MSG_WAITFORONE)
2959	flags \|= MSG_DONTWAIT;
2960
2961	if (timeout) {
2962	ktime_get_ts64(ts: &timeout64);
2963	*timeout = timespec64_sub(lhs: end_time, rhs: timeout64);
2964	if (timeout->tv_sec < `0`) {
2965	timeout->tv_sec = timeout->tv_nsec = `0`;
2966	break;
2967	}
2968
2969	/ Timeout, return less than vlen datagrams /
2970	if (timeout->tv_nsec == `0` && timeout->tv_sec == `0`)
2971	break;
2972	}
2973
2974	/ Out of band data, return right away /
2975	if (msg_sys.msg_flags & MSG_OOB)
2976	break;
2977	cond_resched();
2978	}
2979
2980	if (err == `0`)
2981	goto out_put;
2982
2983	if (datagrams == `0`) {
2984	datagrams = err;
2985	goto out_put;
2986	}
2987
2988	/*
2989	* We may return less entries than requested (vlen) if the
2990	* sock is non block and there aren't enough datagrams...
2991	*/
2992	if (err != -EAGAIN) {
2993	/*
2994	* ... or if recvmsg returns an error after we
2995	* received some datagrams, where we record the
2996	* error to return on the next call or if the
2997	* app asks about it using getsockopt(SO_ERROR).
2998	*/
2999	WRITE_ONCE(sock->sk->sk_err, -err);
3000	}
3001	out_put:
3002	fput_light(file: sock->file, fput_needed);
3003
3004	return datagrams;
3005	}
3006
3007	int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
3008	unsigned int vlen, unsigned int flags,
3009	struct __kernel_timespec __user *timeout,
3010	struct old_timespec32 __user *timeout32)
3011	{
3012	int datagrams;
3013	struct timespec64 timeout_sys;
3014
3015	if (timeout && get_timespec64(ts: &timeout_sys, uts: timeout))
3016	return -EFAULT;
3017
3018	if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
3019	return -EFAULT;
3020
3021	if (!timeout && !timeout32)
3022	return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
3023
3024	datagrams = do_recvmmsg(fd, mmsg, vlen, flags, timeout: &timeout_sys);
3025
3026	if (datagrams <= `0`)
3027	return datagrams;
3028
3029	if (timeout && put_timespec64(ts: &timeout_sys, uts: timeout))
3030	datagrams = -EFAULT;
3031
3032	if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
3033	datagrams = -EFAULT;
3034
3035	return datagrams;
3036	}
3037
3038	SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
3039	unsigned int, vlen, unsigned int, flags,
3040	struct __kernel_timespec __user *, timeout)
3041	{
3042	if (flags & MSG_CMSG_COMPAT)
3043	return -EINVAL;
3044
3045	return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
3046	}
3047
3048	#ifdef CONFIG_COMPAT_32BIT_TIME
3049	SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3050	unsigned int, vlen, unsigned int, flags,
3051	struct old_timespec32 __user *, timeout)
3052	{
3053	if (flags & MSG_CMSG_COMPAT)
3054	return -EINVAL;
3055
3056	return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout32: timeout);
3057	}
3058	#endif
3059
3060	#ifdef __ARCH_WANT_SYS_SOCKETCALL
3061	/ Argument list sizes for sys_socketcall /
3062	#define AL(x) ((x) * sizeof(unsigned long))
3063	static const unsigned char nargs[`21`] = {
3064	AL(`0`), AL(`3`), AL(`3`), AL(`3`), AL(`2`), AL(`3`),
3065	AL(`3`), AL(`3`), AL(`4`), AL(`4`), AL(`4`), AL(`6`),
3066	AL(`6`), AL(`2`), AL(`5`), AL(`5`), AL(`3`), AL(`3`),
3067	AL(`4`), AL(`5`), AL(`4`)
3068	};
3069
3070	#undef AL
3071
3072	/*
3073	* System call vectors.
3074	*
3075	* Argument checking cleaned up. Saved 20% in size.
3076	* This function doesn't need to set the kernel lock because
3077	* it is set by the callees.
3078	*/
3079
3080	SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
3081	{
3082	unsigned long a[AUDITSC_ARGS];
3083	unsigned long a0, a1;
3084	int err;
3085	unsigned int len;
3086
3087	if (call < `1` \|\| call > SYS_SENDMMSG)
3088	return -EINVAL;
3089	call = array_index_nospec(call, SYS_SENDMMSG + `1`);
3090
3091	len = nargs[call];
3092	if (len > sizeof(a))
3093	return -EINVAL;
3094
3095	/ copy_from_user should be SMP safe. /
3096	if (copy_from_user(to: a, from: args, n: len))
3097	return -EFAULT;
3098
3099	err = audit_socketcall(nargs: nargs[call] / sizeof(unsigned long), args: a);
3100	if (err)
3101	return err;
3102
3103	a0 = a[`0`];
3104	a1 = a[`1`];
3105
3106	switch (call) {
3107	case SYS_SOCKET:
3108	err = __sys_socket(family: a0, type: a1, protocol: a[`2`]);
3109	break;
3110	case SYS_BIND:
3111	err = __sys_bind(fd: a0, umyaddr: (struct sockaddr __user *)a1, addrlen: a[`2`]);
3112	break;
3113	case SYS_CONNECT:
3114	err = __sys_connect(fd: a0, uservaddr: (struct sockaddr __user *)a1, addrlen: a[`2`]);
3115	break;
3116	case SYS_LISTEN:
3117	err = __sys_listen(fd: a0, backlog: a1);
3118	break;
3119	case SYS_ACCEPT:
3120	err = __sys_accept4(fd: a0, upeer_sockaddr: (struct sockaddr __user *)a1,
3121	upeer_addrlen: (int __user *)a[`2`], flags: `0`);
3122	break;
3123	case SYS_GETSOCKNAME:
3124	err =
3125	__sys_getsockname(fd: a0, usockaddr: (struct sockaddr __user *)a1,
3126	usockaddr_len: (int __user *)a[`2`]);
3127	break;
3128	case SYS_GETPEERNAME:
3129	err =
3130	__sys_getpeername(fd: a0, usockaddr: (struct sockaddr __user *)a1,
3131	usockaddr_len: (int __user *)a[`2`]);
3132	break;
3133	case SYS_SOCKETPAIR:
3134	err = __sys_socketpair(family: a0, type: a1, protocol: a[`2`], usockvec: (int __user *)a[`3`]);
3135	break;
3136	case SYS_SEND:
3137	err = __sys_sendto(fd: a0, buff: (void __user *)a1, len: a[`2`], flags: a[`3`],
3138	NULL, addr_len: `0`);
3139	break;
3140	case SYS_SENDTO:
3141	err = __sys_sendto(fd: a0, buff: (void __user *)a1, len: a[`2`], flags: a[`3`],
3142	addr: (struct sockaddr __user *)a[`4`], addr_len: a[`5`]);
3143	break;
3144	case SYS_RECV:
3145	err = __sys_recvfrom(fd: a0, ubuf: (void __user *)a1, size: a[`2`], flags: a[`3`],
3146	NULL, NULL);
3147	break;
3148	case SYS_RECVFROM:
3149	err = __sys_recvfrom(fd: a0, ubuf: (void __user *)a1, size: a[`2`], flags: a[`3`],
3150	addr: (struct sockaddr __user *)a[`4`],
3151	addr_len: (int __user *)a[`5`]);
3152	break;
3153	case SYS_SHUTDOWN:
3154	err = __sys_shutdown(fd: a0, how: a1);
3155	break;
3156	case SYS_SETSOCKOPT:
3157	err = __sys_setsockopt(fd: a0, level: a1, optname: a[`2`], user_optval: (char __user *)a[`3`],
3158	optlen: a[`4`]);
3159	break;
3160	case SYS_GETSOCKOPT:
3161	err =
3162	__sys_getsockopt(fd: a0, level: a1, optname: a[`2`], optval: (char __user *)a[`3`],
3163	optlen: (int __user *)a[`4`]);
3164	break;
3165	case SYS_SENDMSG:
3166	err = __sys_sendmsg(fd: a0, msg: (struct user_msghdr __user *)a1,
3167	flags: a[`2`], forbid_cmsg_compat: true);
3168	break;
3169	case SYS_SENDMMSG:
3170	err = __sys_sendmmsg(fd: a0, mmsg: (struct mmsghdr __user *)a1, vlen: a[`2`],
3171	flags: a[`3`], forbid_cmsg_compat: true);
3172	break;
3173	case SYS_RECVMSG:
3174	err = __sys_recvmsg(fd: a0, msg: (struct user_msghdr __user *)a1,
3175	flags: a[`2`], forbid_cmsg_compat: true);
3176	break;
3177	case SYS_RECVMMSG:
3178	if (IS_ENABLED(CONFIG_64BIT))
3179	err = __sys_recvmmsg(fd: a0, mmsg: (struct mmsghdr __user *)a1,
3180	vlen: a[`2`], flags: a[`3`],
3181	timeout: (struct __kernel_timespec __user *)a[`4`],
3182	NULL);
3183	else
3184	err = __sys_recvmmsg(fd: a0, mmsg: (struct mmsghdr __user *)a1,
3185	vlen: a[`2`], flags: a[`3`], NULL,
3186	timeout32: (struct old_timespec32 __user *)a[`4`]);
3187	break;
3188	case SYS_ACCEPT4:
3189	err = __sys_accept4(fd: a0, upeer_sockaddr: (struct sockaddr __user *)a1,
3190	upeer_addrlen: (int __user *)a[`2`], flags: a[`3`]);
3191	break;
3192	default:
3193	err = -EINVAL;
3194	break;
3195	}
3196	return err;
3197	}
3198
3199	#endif /* __ARCH_WANT_SYS_SOCKETCALL */
3200
3201	/**
3202	* sock_register - add a socket protocol handler
3203	* @ops: description of protocol
3204	*
3205	* This function is called by a protocol handler that wants to
3206	* advertise its address family, and have it linked into the
3207	* socket interface. The value ops->family corresponds to the
3208	* socket system call protocol family.
3209	*/
3210	int sock_register(const struct net_proto_family *ops)
3211	{
3212	int err;
3213
3214	if (ops->family >= NPROTO) {
3215	pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
3216	return -ENOBUFS;
3217	}
3218
3219	spin_lock(lock: &net_family_lock);
3220	if (rcu_dereference_protected(net_families[ops->family],
3221	lockdep_is_held(&net_family_lock)))
3222	err = -EEXIST;
3223	else {
3224	rcu_assign_pointer(net_families[ops->family], ops);
3225	err = `0`;
3226	}
3227	spin_unlock(lock: &net_family_lock);
3228
3229	pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
3230	return err;
3231	}
3232	EXPORT_SYMBOL(sock_register);
3233
3234	/**
3235	* sock_unregister - remove a protocol handler
3236	* @family: protocol family to remove
3237	*
3238	* This function is called by a protocol handler that wants to
3239	* remove its address family, and have it unlinked from the
3240	* new socket creation.
3241	*
3242	* If protocol handler is a module, then it can use module reference
3243	* counts to protect against new references. If protocol handler is not
3244	* a module then it needs to provide its own protection in
3245	* the ops->create routine.
3246	*/
3247	void sock_unregister(int family)
3248	{
3249	BUG_ON(family < `0` \|\| family >= NPROTO);
3250
3251	spin_lock(lock: &net_family_lock);
3252	RCU_INIT_POINTER(net_families[family], NULL);
3253	spin_unlock(lock: &net_family_lock);
3254
3255	synchronize_rcu();
3256
3257	pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
3258	}
3259	EXPORT_SYMBOL(sock_unregister);
3260
3261	bool sock_is_registered(int family)
3262	{
3263	return family < NPROTO && rcu_access_pointer(net_families[family]);
3264	}
3265
3266	static int __init sock_init(void)
3267	{
3268	int err;
3269	/*
3270	* Initialize the network sysctl infrastructure.
3271	*/
3272	err = net_sysctl_init();
3273	if (err)
3274	goto out;
3275
3276	/*
3277	* Initialize skbuff SLAB cache
3278	*/
3279	skb_init();
3280
3281	/*
3282	* Initialize the protocols module.
3283	*/
3284
3285	init_inodecache();
3286
3287	err = register_filesystem(&sock_fs_type);
3288	if (err)
3289	goto out;
3290	sock_mnt = kern_mount(&sock_fs_type);
3291	if (IS_ERR(ptr: sock_mnt)) {
3292	err = PTR_ERR(ptr: sock_mnt);
3293	goto out_mount;
3294	}
3295
3296	/ The real protocol initialization is performed in later initcalls.*
3297	*/
3298
3299	#ifdef CONFIG_NETFILTER
3300	err = netfilter_init();
3301	if (err)
3302	goto out;
3303	#endif
3304
3305	ptp_classifier_init();
3306
3307	out:
3308	return err;
3309
3310	out_mount:
3311	unregister_filesystem(&sock_fs_type);
3312	goto out;
3313	}
3314
3315	core_initcall(sock_init); / early initcall /
3316
3317	#ifdef CONFIG_PROC_FS
3318	void socket_seq_show(struct seq_file *seq)
3319	{
3320	seq_printf(m: seq, fmt: "sockets: used %d\n",
3321	sock_inuse_get(net: seq->private));
3322	}
3323	#endif /* CONFIG_PROC_FS */
3324
3325	/ Handle the fact that while struct ifreq has the same layout on*
3326	* 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3327	* which are handled elsewhere, it still has different size due to
3328	* ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3329	* resulting in struct ifreq being 32 and 40 bytes respectively).
3330	* As a result, if the struct happens to be at the end of a page and
3331	* the next page isn't readable/writable, we get a fault. To prevent
3332	* that, copy back and forth to the full size.
3333	*/
3334	int get_user_ifreq(struct ifreq ifr, void* __user *ifrdata, void* __user *arg)
3335	{
3336	if (in_compat_syscall()) {
3337	struct compat_ifreq ifr32 = (struct* compat_ifreq *)ifr;
3338
3339	memset(ifr, `0`, sizeof(*ifr));
3340	if (copy_from_user(to: ifr32, from: arg, n: sizeof(*ifr32)))
3341	return -EFAULT;
3342
3343	if (ifrdata)
3344	*ifrdata = compat_ptr(uptr: ifr32->ifr_data);
3345
3346	return `0`;
3347	}
3348
3349	if (copy_from_user(to: ifr, from: arg, n: sizeof(*ifr)))
3350	return -EFAULT;
3351
3352	if (ifrdata)
3353	*ifrdata = ifr->ifr_data;
3354
3355	return `0`;
3356	}
3357	EXPORT_SYMBOL(get_user_ifreq);
3358
3359	int put_user_ifreq(struct ifreq ifr, void* __user *arg)
3360	{
3361	size_t size = sizeof(*ifr);
3362
3363	if (in_compat_syscall())
3364	size = sizeof(struct compat_ifreq);
3365
3366	if (copy_to_user(to: arg, from: ifr, n: size))
3367	return -EFAULT;
3368
3369	return `0`;
3370	}
3371	EXPORT_SYMBOL(put_user_ifreq);
3372
3373	#ifdef CONFIG_COMPAT
3374	static int compat_siocwandev(struct net net, struct* compat_ifreq __user *uifr32)
3375	{
3376	compat_uptr_t uptr32;
3377	struct ifreq ifr;
3378	void __user *saved;
3379	int err;
3380
3381	if (get_user_ifreq(&ifr, NULL, uifr32))
3382	return -EFAULT;
3383
3384	if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3385	return -EFAULT;
3386
3387	saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3388	ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr: uptr32);
3389
3390	err = dev_ioctl(net, SIOCWANDEV, ifr: &ifr, NULL, NULL);
3391	if (!err) {
3392	ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3393	if (put_user_ifreq(&ifr, uifr32))
3394	err = -EFAULT;
3395	}
3396	return err;
3397	}
3398
3399	/ Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted /
3400	static int compat_ifr_data_ioctl(struct net net, unsigned* int cmd,
3401	struct compat_ifreq __user *u_ifreq32)
3402	{
3403	struct ifreq ifreq;
3404	void __user *data;
3405
3406	if (!is_socket_ioctl_cmd(cmd))
3407	return -ENOTTY;
3408	if (get_user_ifreq(&ifreq, &data, u_ifreq32))
3409	return -EFAULT;
3410	ifreq.ifr_data = data;
3411
3412	return dev_ioctl(net, cmd, ifr: &ifreq, data, NULL);
3413	}
3414
3415	static int compat_sock_ioctl_trans(struct file file, struct* socket *sock,
3416	unsigned int cmd, unsigned long arg)
3417	{
3418	void __user *argp = compat_ptr(uptr: arg);
3419	struct sock *sk = sock->sk;
3420	struct net *net = sock_net(sk);
3421	const struct proto_ops *ops;
3422
3423	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + `15`))
3424	return sock_ioctl(file, cmd, arg: (unsigned long)argp);
3425
3426	switch (cmd) {
3427	case SIOCWANDEV:
3428	return compat_siocwandev(net, uifr32: argp);
3429	case SIOCGSTAMP_OLD:
3430	case SIOCGSTAMPNS_OLD:
3431	ops = READ_ONCE(sock->ops);
3432	if (!ops->gettstamp)
3433	return -ENOIOCTLCMD;
3434	return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3435	!COMPAT_USE_64BIT_TIME);
3436
3437	case SIOCETHTOOL:
3438	case SIOCBONDSLAVEINFOQUERY:
3439	case SIOCBONDINFOQUERY:
3440	case SIOCSHWTSTAMP:
3441	case SIOCGHWTSTAMP:
3442	return compat_ifr_data_ioctl(net, cmd, u_ifreq32: argp);
3443
3444	case FIOSETOWN:
3445	case SIOCSPGRP:
3446	case FIOGETOWN:
3447	case SIOCGPGRP:
3448	case SIOCBRADDBR:
3449	case SIOCBRDELBR:
3450	case SIOCGIFVLAN:
3451	case SIOCSIFVLAN:
3452	case SIOCGSKNS:
3453	case SIOCGSTAMP_NEW:
3454	case SIOCGSTAMPNS_NEW:
3455	case SIOCGIFCONF:
3456	case SIOCSIFBR:
3457	case SIOCGIFBR:
3458	return sock_ioctl(file, cmd, arg);
3459
3460	case SIOCGIFFLAGS:
3461	case SIOCSIFFLAGS:
3462	case SIOCGIFMAP:
3463	case SIOCSIFMAP:
3464	case SIOCGIFMETRIC:
3465	case SIOCSIFMETRIC:
3466	case SIOCGIFMTU:
3467	case SIOCSIFMTU:
3468	case SIOCGIFMEM:
3469	case SIOCSIFMEM:
3470	case SIOCGIFHWADDR:
3471	case SIOCSIFHWADDR:
3472	case SIOCADDMULTI:
3473	case SIOCDELMULTI:
3474	case SIOCGIFINDEX:
3475	case SIOCGIFADDR:
3476	case SIOCSIFADDR:
3477	case SIOCSIFHWBROADCAST:
3478	case SIOCDIFADDR:
3479	case SIOCGIFBRDADDR:
3480	case SIOCSIFBRDADDR:
3481	case SIOCGIFDSTADDR:
3482	case SIOCSIFDSTADDR:
3483	case SIOCGIFNETMASK:
3484	case SIOCSIFNETMASK:
3485	case SIOCSIFPFLAGS:
3486	case SIOCGIFPFLAGS:
3487	case SIOCGIFTXQLEN:
3488	case SIOCSIFTXQLEN:
3489	case SIOCBRADDIF:
3490	case SIOCBRDELIF:
3491	case SIOCGIFNAME:
3492	case SIOCSIFNAME:
3493	case SIOCGMIIPHY:
3494	case SIOCGMIIREG:
3495	case SIOCSMIIREG:
3496	case SIOCBONDENSLAVE:
3497	case SIOCBONDRELEASE:
3498	case SIOCBONDSETHWADDR:
3499	case SIOCBONDCHANGEACTIVE:
3500	case SIOCSARP:
3501	case SIOCGARP:
3502	case SIOCDARP:
3503	case SIOCOUTQ:
3504	case SIOCOUTQNSD:
3505	case SIOCATMARK:
3506	return sock_do_ioctl(net, sock, cmd, arg);
3507	}
3508
3509	return -ENOIOCTLCMD;
3510	}
3511
3512	static long compat_sock_ioctl(struct file file, unsigned* int cmd,
3513	unsigned long arg)
3514	{
3515	struct socket *sock = file->private_data;
3516	const struct proto_ops *ops = READ_ONCE(sock->ops);
3517	int ret = -ENOIOCTLCMD;
3518	struct sock *sk;
3519	struct net *net;
3520
3521	sk = sock->sk;
3522	net = sock_net(sk);
3523
3524	if (ops->compat_ioctl)
3525	ret = ops->compat_ioctl(sock, cmd, arg);
3526
3527	if (ret == -ENOIOCTLCMD &&
3528	(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3529	ret = compat_wext_handle_ioctl(net, cmd, arg);
3530
3531	if (ret == -ENOIOCTLCMD)
3532	ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3533
3534	return ret;
3535	}
3536	#endif
3537
3538	/**
3539	* kernel_bind - bind an address to a socket (kernel space)
3540	* @sock: socket
3541	* @addr: address
3542	* @addrlen: length of address
3543	*
3544	* Returns 0 or an error.
3545	*/
3546
3547	int kernel_bind(struct socket sock, struct* sockaddr addr, int* addrlen)
3548	{
3549	struct sockaddr_storage address;
3550
3551	memcpy(&address, addr, addrlen);
3552
3553	return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
3554	addrlen);
3555	}
3556	EXPORT_SYMBOL(kernel_bind);
3557
3558	/**
3559	* kernel_listen - move socket to listening state (kernel space)
3560	* @sock: socket
3561	* @backlog: pending connections queue size
3562	*
3563	* Returns 0 or an error.
3564	*/
3565
3566	int kernel_listen(struct socket sock, int* backlog)
3567	{
3568	return READ_ONCE(sock->ops)->listen(sock, backlog);
3569	}
3570	EXPORT_SYMBOL(kernel_listen);
3571
3572	/**
3573	* kernel_accept - accept a connection (kernel space)
3574	* @sock: listening socket
3575	* @newsock: new connected socket
3576	* @flags: flags
3577	*
3578	* @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3579	* If it fails, @newsock is guaranteed to be %NULL.
3580	* Returns 0 or an error.
3581	*/
3582
3583	int kernel_accept(struct socket sock, struct* socket *newsock, int* flags)
3584	{
3585	struct sock *sk = sock->sk;
3586	const struct proto_ops *ops = READ_ONCE(sock->ops);
3587	int err;
3588
3589	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3590	newsock);
3591	if (err < `0`)
3592	goto done;
3593
3594	err = ops->accept(sock, *newsock, flags, true);
3595	if (err < `0`) {
3596	sock_release(*newsock);
3597	*newsock = NULL;
3598	goto done;
3599	}
3600
3601	(*newsock)->ops = ops;
3602	__module_get(module: ops->owner);
3603
3604	done:
3605	return err;
3606	}
3607	EXPORT_SYMBOL(kernel_accept);
3608
3609	/**
3610	* kernel_connect - connect a socket (kernel space)
3611	* @sock: socket
3612	* @addr: address
3613	* @addrlen: address length
3614	* @flags: flags (O_NONBLOCK, ...)
3615	*
3616	* For datagram sockets, @addr is the address to which datagrams are sent
3617	* by default, and the only address from which datagrams are received.
3618	* For stream sockets, attempts to connect to @addr.
3619	* Returns 0 or an error code.
3620	*/
3621
3622	int kernel_connect(struct socket sock, struct* sockaddr addr, int* addrlen,
3623	int flags)
3624	{
3625	struct sockaddr_storage address;
3626
3627	memcpy(&address, addr, addrlen);
3628
3629	return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3630	addrlen, flags);
3631	}
3632	EXPORT_SYMBOL(kernel_connect);
3633
3634	/**
3635	* kernel_getsockname - get the address which the socket is bound (kernel space)
3636	* @sock: socket
3637	* @addr: address holder
3638	*
3639	* Fills the @addr pointer with the address which the socket is bound.
3640	* Returns the length of the address in bytes or an error code.
3641	*/
3642
3643	int kernel_getsockname(struct socket sock, struct* sockaddr *addr)
3644	{
3645	return READ_ONCE(sock->ops)->getname(sock, addr, `0`);
3646	}
3647	EXPORT_SYMBOL(kernel_getsockname);
3648
3649	/**
3650	* kernel_getpeername - get the address which the socket is connected (kernel space)
3651	* @sock: socket
3652	* @addr: address holder
3653	*
3654	* Fills the @addr pointer with the address which the socket is connected.
3655	* Returns the length of the address in bytes or an error code.
3656	*/
3657
3658	int kernel_getpeername(struct socket sock, struct* sockaddr *addr)
3659	{
3660	return READ_ONCE(sock->ops)->getname(sock, addr, `1`);
3661	}
3662	EXPORT_SYMBOL(kernel_getpeername);
3663
3664	/**
3665	* kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
3666	* @sock: socket
3667	* @how: connection part
3668	*
3669	* Returns 0 or an error.
3670	*/
3671
3672	int kernel_sock_shutdown(struct socket sock, enum* sock_shutdown_cmd how)
3673	{
3674	return READ_ONCE(sock->ops)->shutdown(sock, how);
3675	}
3676	EXPORT_SYMBOL(kernel_sock_shutdown);
3677
3678	/**
3679	* kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3680	* @sk: socket
3681	*
3682	* This routine returns the IP overhead imposed by a socket i.e.
3683	* the length of the underlying IP header, depending on whether
3684	* this is an IPv4 or IPv6 socket and the length from IP options turned
3685	* on at the socket. Assumes that the caller has a lock on the socket.
3686	*/
3687
3688	u32 kernel_sock_ip_overhead(struct sock *sk)
3689	{
3690	struct inet_sock *inet;
3691	struct ip_options_rcu *opt;
3692	u32 overhead = `0`;
3693	#if IS_ENABLED(CONFIG_IPV6)
3694	struct ipv6_pinfo *np;
3695	struct ipv6_txoptions *optv6 = NULL;
3696	#endif /* IS_ENABLED(CONFIG_IPV6) */
3697
3698	if (!sk)
3699	return overhead;
3700
3701	switch (sk->sk_family) {
3702	case AF_INET:
3703	inet = inet_sk(sk);
3704	overhead += sizeof(struct iphdr);
3705	opt = rcu_dereference_protected(inet->inet_opt,
3706	sock_owned_by_user(sk));
3707	if (opt)
3708	overhead += opt->opt.optlen;
3709	return overhead;
3710	#if IS_ENABLED(CONFIG_IPV6)
3711	case AF_INET6:
3712	np = inet6_sk(sk: sk);
3713	overhead += sizeof(struct ipv6hdr);
3714	if (np)
3715	optv6 = rcu_dereference_protected(np->opt,
3716	sock_owned_by_user(sk));
3717	if (optv6)
3718	overhead += (optv6->opt_flen + optv6->opt_nflen);
3719	return overhead;
3720	#endif /* IS_ENABLED(CONFIG_IPV6) */
3721	default: / Returns 0 overhead if the socket is not ipv4 or ipv6 /
3722	return overhead;
3723	}
3724	}
3725	EXPORT_SYMBOL(kernel_sock_ip_overhead);
3726

source code of linux/net/socket.c