1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * The IP to API glue. |
8 | * |
9 | * Authors: see ip.c |
10 | * |
11 | * Fixes: |
12 | * Many : Split from ip.c , see ip.c for history. |
13 | * Martin Mares : TOS setting fixed. |
14 | * Alan Cox : Fixed a couple of oopses in Martin's |
15 | * TOS tweaks. |
16 | * Mike McLagan : Routing by source |
17 | */ |
18 | |
19 | #include <linux/module.h> |
20 | #include <linux/types.h> |
21 | #include <linux/mm.h> |
22 | #include <linux/skbuff.h> |
23 | #include <linux/ip.h> |
24 | #include <linux/icmp.h> |
25 | #include <linux/inetdevice.h> |
26 | #include <linux/netdevice.h> |
27 | #include <linux/slab.h> |
28 | #include <net/sock.h> |
29 | #include <net/ip.h> |
30 | #include <net/icmp.h> |
31 | #include <net/tcp_states.h> |
32 | #include <linux/udp.h> |
33 | #include <linux/igmp.h> |
34 | #include <linux/netfilter.h> |
35 | #include <linux/route.h> |
36 | #include <linux/mroute.h> |
37 | #include <net/inet_ecn.h> |
38 | #include <net/route.h> |
39 | #include <net/xfrm.h> |
40 | #include <net/compat.h> |
41 | #include <net/checksum.h> |
42 | #if IS_ENABLED(CONFIG_IPV6) |
43 | #include <net/transp_v6.h> |
44 | #endif |
45 | #include <net/ip_fib.h> |
46 | |
47 | #include <linux/errqueue.h> |
48 | #include <linux/uaccess.h> |
49 | |
50 | #include <linux/bpfilter.h> |
51 | |
52 | /* |
53 | * SOL_IP control messages. |
54 | */ |
55 | |
56 | static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) |
57 | { |
58 | struct in_pktinfo info = *PKTINFO_SKB_CB(skb); |
59 | |
60 | info.ipi_addr.s_addr = ip_hdr(skb)->daddr; |
61 | |
62 | put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); |
63 | } |
64 | |
65 | static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) |
66 | { |
67 | int ttl = ip_hdr(skb)->ttl; |
68 | put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl); |
69 | } |
70 | |
71 | static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) |
72 | { |
73 | put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos); |
74 | } |
75 | |
76 | static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) |
77 | { |
78 | if (IPCB(skb)->opt.optlen == 0) |
79 | return; |
80 | |
81 | put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, |
82 | ip_hdr(skb) + 1); |
83 | } |
84 | |
85 | |
86 | static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, |
87 | struct sk_buff *skb) |
88 | { |
89 | unsigned char optbuf[sizeof(struct ip_options) + 40]; |
90 | struct ip_options *opt = (struct ip_options *)optbuf; |
91 | |
92 | if (IPCB(skb)->opt.optlen == 0) |
93 | return; |
94 | |
95 | if (ip_options_echo(net, opt, skb)) { |
96 | msg->msg_flags |= MSG_CTRUNC; |
97 | return; |
98 | } |
99 | ip_options_undo(opt); |
100 | |
101 | put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); |
102 | } |
103 | |
104 | static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) |
105 | { |
106 | int val; |
107 | |
108 | if (IPCB(skb)->frag_max_size == 0) |
109 | return; |
110 | |
111 | val = IPCB(skb)->frag_max_size; |
112 | put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); |
113 | } |
114 | |
115 | static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, |
116 | int tlen, int offset) |
117 | { |
118 | __wsum csum = skb->csum; |
119 | |
120 | if (skb->ip_summed != CHECKSUM_COMPLETE) |
121 | return; |
122 | |
123 | if (offset != 0) { |
124 | int tend_off = skb_transport_offset(skb) + tlen; |
125 | csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0)); |
126 | } |
127 | |
128 | put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); |
129 | } |
130 | |
131 | static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) |
132 | { |
133 | char *secdata; |
134 | u32 seclen, secid; |
135 | int err; |
136 | |
137 | err = security_socket_getpeersec_dgram(NULL, skb, &secid); |
138 | if (err) |
139 | return; |
140 | |
141 | err = security_secid_to_secctx(secid, &secdata, &seclen); |
142 | if (err) |
143 | return; |
144 | |
145 | put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); |
146 | security_release_secctx(secdata, seclen); |
147 | } |
148 | |
149 | static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) |
150 | { |
151 | __be16 _ports[2], *ports; |
152 | struct sockaddr_in sin; |
153 | |
154 | /* All current transport protocols have the port numbers in the |
155 | * first four bytes of the transport header and this function is |
156 | * written with this assumption in mind. |
157 | */ |
158 | ports = skb_header_pointer(skb, skb_transport_offset(skb), |
159 | sizeof(_ports), &_ports); |
160 | if (!ports) |
161 | return; |
162 | |
163 | sin.sin_family = AF_INET; |
164 | sin.sin_addr.s_addr = ip_hdr(skb)->daddr; |
165 | sin.sin_port = ports[1]; |
166 | memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); |
167 | |
168 | put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); |
169 | } |
170 | |
171 | void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, |
172 | struct sk_buff *skb, int tlen, int offset) |
173 | { |
174 | struct inet_sock *inet = inet_sk(sk); |
175 | unsigned int flags = inet->cmsg_flags; |
176 | |
177 | /* Ordered by supposed usage frequency */ |
178 | if (flags & IP_CMSG_PKTINFO) { |
179 | ip_cmsg_recv_pktinfo(msg, skb); |
180 | |
181 | flags &= ~IP_CMSG_PKTINFO; |
182 | if (!flags) |
183 | return; |
184 | } |
185 | |
186 | if (flags & IP_CMSG_TTL) { |
187 | ip_cmsg_recv_ttl(msg, skb); |
188 | |
189 | flags &= ~IP_CMSG_TTL; |
190 | if (!flags) |
191 | return; |
192 | } |
193 | |
194 | if (flags & IP_CMSG_TOS) { |
195 | ip_cmsg_recv_tos(msg, skb); |
196 | |
197 | flags &= ~IP_CMSG_TOS; |
198 | if (!flags) |
199 | return; |
200 | } |
201 | |
202 | if (flags & IP_CMSG_RECVOPTS) { |
203 | ip_cmsg_recv_opts(msg, skb); |
204 | |
205 | flags &= ~IP_CMSG_RECVOPTS; |
206 | if (!flags) |
207 | return; |
208 | } |
209 | |
210 | if (flags & IP_CMSG_RETOPTS) { |
211 | ip_cmsg_recv_retopts(sock_net(sk), msg, skb); |
212 | |
213 | flags &= ~IP_CMSG_RETOPTS; |
214 | if (!flags) |
215 | return; |
216 | } |
217 | |
218 | if (flags & IP_CMSG_PASSSEC) { |
219 | ip_cmsg_recv_security(msg, skb); |
220 | |
221 | flags &= ~IP_CMSG_PASSSEC; |
222 | if (!flags) |
223 | return; |
224 | } |
225 | |
226 | if (flags & IP_CMSG_ORIGDSTADDR) { |
227 | ip_cmsg_recv_dstaddr(msg, skb); |
228 | |
229 | flags &= ~IP_CMSG_ORIGDSTADDR; |
230 | if (!flags) |
231 | return; |
232 | } |
233 | |
234 | if (flags & IP_CMSG_CHECKSUM) |
235 | ip_cmsg_recv_checksum(msg, skb, tlen, offset); |
236 | |
237 | if (flags & IP_CMSG_RECVFRAGSIZE) |
238 | ip_cmsg_recv_fragsize(msg, skb); |
239 | } |
240 | EXPORT_SYMBOL(ip_cmsg_recv_offset); |
241 | |
242 | int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, |
243 | bool allow_ipv6) |
244 | { |
245 | int err, val; |
246 | struct cmsghdr *cmsg; |
247 | struct net *net = sock_net(sk); |
248 | |
249 | for_each_cmsghdr(cmsg, msg) { |
250 | if (!CMSG_OK(msg, cmsg)) |
251 | return -EINVAL; |
252 | #if IS_ENABLED(CONFIG_IPV6) |
253 | if (allow_ipv6 && |
254 | cmsg->cmsg_level == SOL_IPV6 && |
255 | cmsg->cmsg_type == IPV6_PKTINFO) { |
256 | struct in6_pktinfo *src_info; |
257 | |
258 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info))) |
259 | return -EINVAL; |
260 | src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); |
261 | if (!ipv6_addr_v4mapped(&src_info->ipi6_addr)) |
262 | return -EINVAL; |
263 | if (src_info->ipi6_ifindex) |
264 | ipc->oif = src_info->ipi6_ifindex; |
265 | ipc->addr = src_info->ipi6_addr.s6_addr32[3]; |
266 | continue; |
267 | } |
268 | #endif |
269 | if (cmsg->cmsg_level == SOL_SOCKET) { |
270 | err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc); |
271 | if (err) |
272 | return err; |
273 | continue; |
274 | } |
275 | |
276 | if (cmsg->cmsg_level != SOL_IP) |
277 | continue; |
278 | switch (cmsg->cmsg_type) { |
279 | case IP_RETOPTS: |
280 | err = cmsg->cmsg_len - sizeof(struct cmsghdr); |
281 | |
282 | /* Our caller is responsible for freeing ipc->opt */ |
283 | err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), |
284 | err < 40 ? err : 40); |
285 | if (err) |
286 | return err; |
287 | break; |
288 | case IP_PKTINFO: |
289 | { |
290 | struct in_pktinfo *info; |
291 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) |
292 | return -EINVAL; |
293 | info = (struct in_pktinfo *)CMSG_DATA(cmsg); |
294 | if (info->ipi_ifindex) |
295 | ipc->oif = info->ipi_ifindex; |
296 | ipc->addr = info->ipi_spec_dst.s_addr; |
297 | break; |
298 | } |
299 | case IP_TTL: |
300 | if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) |
301 | return -EINVAL; |
302 | val = *(int *)CMSG_DATA(cmsg); |
303 | if (val < 1 || val > 255) |
304 | return -EINVAL; |
305 | ipc->ttl = val; |
306 | break; |
307 | case IP_TOS: |
308 | if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) |
309 | val = *(int *)CMSG_DATA(cmsg); |
310 | else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) |
311 | val = *(u8 *)CMSG_DATA(cmsg); |
312 | else |
313 | return -EINVAL; |
314 | if (val < 0 || val > 255) |
315 | return -EINVAL; |
316 | ipc->tos = val; |
317 | ipc->priority = rt_tos2priority(ipc->tos); |
318 | break; |
319 | |
320 | default: |
321 | return -EINVAL; |
322 | } |
323 | } |
324 | return 0; |
325 | } |
326 | |
327 | static void ip_ra_destroy_rcu(struct rcu_head *head) |
328 | { |
329 | struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); |
330 | |
331 | sock_put(ra->saved_sk); |
332 | kfree(ra); |
333 | } |
334 | |
335 | int ip_ra_control(struct sock *sk, unsigned char on, |
336 | void (*destructor)(struct sock *)) |
337 | { |
338 | struct ip_ra_chain *ra, *new_ra; |
339 | struct ip_ra_chain __rcu **rap; |
340 | struct net *net = sock_net(sk); |
341 | |
342 | if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) |
343 | return -EINVAL; |
344 | |
345 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; |
346 | |
347 | mutex_lock(&net->ipv4.ra_mutex); |
348 | for (rap = &net->ipv4.ra_chain; |
349 | (ra = rcu_dereference_protected(*rap, |
350 | lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; |
351 | rap = &ra->next) { |
352 | if (ra->sk == sk) { |
353 | if (on) { |
354 | mutex_unlock(&net->ipv4.ra_mutex); |
355 | kfree(new_ra); |
356 | return -EADDRINUSE; |
357 | } |
358 | /* dont let ip_call_ra_chain() use sk again */ |
359 | ra->sk = NULL; |
360 | RCU_INIT_POINTER(*rap, ra->next); |
361 | mutex_unlock(&net->ipv4.ra_mutex); |
362 | |
363 | if (ra->destructor) |
364 | ra->destructor(sk); |
365 | /* |
366 | * Delay sock_put(sk) and kfree(ra) after one rcu grace |
367 | * period. This guarantee ip_call_ra_chain() dont need |
368 | * to mess with socket refcounts. |
369 | */ |
370 | ra->saved_sk = sk; |
371 | call_rcu(&ra->rcu, ip_ra_destroy_rcu); |
372 | return 0; |
373 | } |
374 | } |
375 | if (!new_ra) { |
376 | mutex_unlock(&net->ipv4.ra_mutex); |
377 | return -ENOBUFS; |
378 | } |
379 | new_ra->sk = sk; |
380 | new_ra->destructor = destructor; |
381 | |
382 | RCU_INIT_POINTER(new_ra->next, ra); |
383 | rcu_assign_pointer(*rap, new_ra); |
384 | sock_hold(sk); |
385 | mutex_unlock(&net->ipv4.ra_mutex); |
386 | |
387 | return 0; |
388 | } |
389 | |
390 | void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, |
391 | __be16 port, u32 info, u8 *payload) |
392 | { |
393 | struct sock_exterr_skb *serr; |
394 | |
395 | skb = skb_clone(skb, GFP_ATOMIC); |
396 | if (!skb) |
397 | return; |
398 | |
399 | serr = SKB_EXT_ERR(skb); |
400 | serr->ee.ee_errno = err; |
401 | serr->ee.ee_origin = SO_EE_ORIGIN_ICMP; |
402 | serr->ee.ee_type = icmp_hdr(skb)->type; |
403 | serr->ee.ee_code = icmp_hdr(skb)->code; |
404 | serr->ee.ee_pad = 0; |
405 | serr->ee.ee_info = info; |
406 | serr->ee.ee_data = 0; |
407 | serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) - |
408 | skb_network_header(skb); |
409 | serr->port = port; |
410 | |
411 | if (skb_pull(skb, payload - skb->data)) { |
412 | skb_reset_transport_header(skb); |
413 | if (sock_queue_err_skb(sk, skb) == 0) |
414 | return; |
415 | } |
416 | kfree_skb(skb); |
417 | } |
418 | |
419 | void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) |
420 | { |
421 | struct inet_sock *inet = inet_sk(sk); |
422 | struct sock_exterr_skb *serr; |
423 | struct iphdr *iph; |
424 | struct sk_buff *skb; |
425 | |
426 | if (!inet->recverr) |
427 | return; |
428 | |
429 | skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC); |
430 | if (!skb) |
431 | return; |
432 | |
433 | skb_put(skb, sizeof(struct iphdr)); |
434 | skb_reset_network_header(skb); |
435 | iph = ip_hdr(skb); |
436 | iph->daddr = daddr; |
437 | |
438 | serr = SKB_EXT_ERR(skb); |
439 | serr->ee.ee_errno = err; |
440 | serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; |
441 | serr->ee.ee_type = 0; |
442 | serr->ee.ee_code = 0; |
443 | serr->ee.ee_pad = 0; |
444 | serr->ee.ee_info = info; |
445 | serr->ee.ee_data = 0; |
446 | serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); |
447 | serr->port = port; |
448 | |
449 | __skb_pull(skb, skb_tail_pointer(skb) - skb->data); |
450 | skb_reset_transport_header(skb); |
451 | |
452 | if (sock_queue_err_skb(sk, skb)) |
453 | kfree_skb(skb); |
454 | } |
455 | |
456 | /* For some errors we have valid addr_offset even with zero payload and |
457 | * zero port. Also, addr_offset should be supported if port is set. |
458 | */ |
459 | static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr) |
460 | { |
461 | return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || |
462 | serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; |
463 | } |
464 | |
465 | /* IPv4 supports cmsg on all imcp errors and some timestamps |
466 | * |
467 | * Timestamp code paths do not initialize the fields expected by cmsg: |
468 | * the PKTINFO fields in skb->cb[]. Fill those in here. |
469 | */ |
470 | static bool ipv4_datagram_support_cmsg(const struct sock *sk, |
471 | struct sk_buff *skb, |
472 | int ee_origin) |
473 | { |
474 | struct in_pktinfo *info; |
475 | |
476 | if (ee_origin == SO_EE_ORIGIN_ICMP) |
477 | return true; |
478 | |
479 | if (ee_origin == SO_EE_ORIGIN_LOCAL) |
480 | return false; |
481 | |
482 | /* Support IP_PKTINFO on tstamp packets if requested, to correlate |
483 | * timestamp with egress dev. Not possible for packets without iif |
484 | * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). |
485 | */ |
486 | info = PKTINFO_SKB_CB(skb); |
487 | if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || |
488 | !info->ipi_ifindex) |
489 | return false; |
490 | |
491 | info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; |
492 | return true; |
493 | } |
494 | |
495 | /* |
496 | * Handle MSG_ERRQUEUE |
497 | */ |
498 | int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) |
499 | { |
500 | struct sock_exterr_skb *serr; |
501 | struct sk_buff *skb; |
502 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); |
503 | struct { |
504 | struct sock_extended_err ee; |
505 | struct sockaddr_in offender; |
506 | } errhdr; |
507 | int err; |
508 | int copied; |
509 | |
510 | err = -EAGAIN; |
511 | skb = sock_dequeue_err_skb(sk); |
512 | if (!skb) |
513 | goto out; |
514 | |
515 | copied = skb->len; |
516 | if (copied > len) { |
517 | msg->msg_flags |= MSG_TRUNC; |
518 | copied = len; |
519 | } |
520 | err = skb_copy_datagram_msg(skb, 0, msg, copied); |
521 | if (unlikely(err)) { |
522 | kfree_skb(skb); |
523 | return err; |
524 | } |
525 | sock_recv_timestamp(msg, sk, skb); |
526 | |
527 | serr = SKB_EXT_ERR(skb); |
528 | |
529 | if (sin && ipv4_datagram_support_addr(serr)) { |
530 | sin->sin_family = AF_INET; |
531 | sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + |
532 | serr->addr_offset); |
533 | sin->sin_port = serr->port; |
534 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); |
535 | *addr_len = sizeof(*sin); |
536 | } |
537 | |
538 | memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); |
539 | sin = &errhdr.offender; |
540 | memset(sin, 0, sizeof(*sin)); |
541 | |
542 | if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { |
543 | sin->sin_family = AF_INET; |
544 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
545 | if (inet_sk(sk)->cmsg_flags) |
546 | ip_cmsg_recv(msg, skb); |
547 | } |
548 | |
549 | put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr); |
550 | |
551 | /* Now we could try to dump offended packet options */ |
552 | |
553 | msg->msg_flags |= MSG_ERRQUEUE; |
554 | err = copied; |
555 | |
556 | consume_skb(skb); |
557 | out: |
558 | return err; |
559 | } |
560 | |
561 | |
562 | /* |
563 | * Socket option code for IP. This is the end of the line after any |
564 | * TCP,UDP etc options on an IP socket. |
565 | */ |
566 | static bool setsockopt_needs_rtnl(int optname) |
567 | { |
568 | switch (optname) { |
569 | case IP_ADD_MEMBERSHIP: |
570 | case IP_ADD_SOURCE_MEMBERSHIP: |
571 | case IP_BLOCK_SOURCE: |
572 | case IP_DROP_MEMBERSHIP: |
573 | case IP_DROP_SOURCE_MEMBERSHIP: |
574 | case IP_MSFILTER: |
575 | case IP_UNBLOCK_SOURCE: |
576 | case MCAST_BLOCK_SOURCE: |
577 | case MCAST_MSFILTER: |
578 | case MCAST_JOIN_GROUP: |
579 | case MCAST_JOIN_SOURCE_GROUP: |
580 | case MCAST_LEAVE_GROUP: |
581 | case MCAST_LEAVE_SOURCE_GROUP: |
582 | case MCAST_UNBLOCK_SOURCE: |
583 | return true; |
584 | } |
585 | return false; |
586 | } |
587 | |
588 | static int do_ip_setsockopt(struct sock *sk, int level, |
589 | int optname, char __user *optval, unsigned int optlen) |
590 | { |
591 | struct inet_sock *inet = inet_sk(sk); |
592 | struct net *net = sock_net(sk); |
593 | int val = 0, err; |
594 | bool needs_rtnl = setsockopt_needs_rtnl(optname); |
595 | |
596 | switch (optname) { |
597 | case IP_PKTINFO: |
598 | case IP_RECVTTL: |
599 | case IP_RECVOPTS: |
600 | case IP_RECVTOS: |
601 | case IP_RETOPTS: |
602 | case IP_TOS: |
603 | case IP_TTL: |
604 | case IP_HDRINCL: |
605 | case IP_MTU_DISCOVER: |
606 | case IP_RECVERR: |
607 | case IP_ROUTER_ALERT: |
608 | case IP_FREEBIND: |
609 | case IP_PASSSEC: |
610 | case IP_TRANSPARENT: |
611 | case IP_MINTTL: |
612 | case IP_NODEFRAG: |
613 | case IP_BIND_ADDRESS_NO_PORT: |
614 | case IP_UNICAST_IF: |
615 | case IP_MULTICAST_TTL: |
616 | case IP_MULTICAST_ALL: |
617 | case IP_MULTICAST_LOOP: |
618 | case IP_RECVORIGDSTADDR: |
619 | case IP_CHECKSUM: |
620 | case IP_RECVFRAGSIZE: |
621 | if (optlen >= sizeof(int)) { |
622 | if (get_user(val, (int __user *) optval)) |
623 | return -EFAULT; |
624 | } else if (optlen >= sizeof(char)) { |
625 | unsigned char ucval; |
626 | |
627 | if (get_user(ucval, (unsigned char __user *) optval)) |
628 | return -EFAULT; |
629 | val = (int) ucval; |
630 | } |
631 | } |
632 | |
633 | /* If optlen==0, it is equivalent to val == 0 */ |
634 | |
635 | if (optname == IP_ROUTER_ALERT) |
636 | return ip_ra_control(sk, val ? 1 : 0, NULL); |
637 | if (ip_mroute_opt(optname)) |
638 | return ip_mroute_setsockopt(sk, optname, optval, optlen); |
639 | |
640 | err = 0; |
641 | if (needs_rtnl) |
642 | rtnl_lock(); |
643 | lock_sock(sk); |
644 | |
645 | switch (optname) { |
646 | case IP_OPTIONS: |
647 | { |
648 | struct ip_options_rcu *old, *opt = NULL; |
649 | |
650 | if (optlen > 40) |
651 | goto e_inval; |
652 | err = ip_options_get_from_user(sock_net(sk), &opt, |
653 | optval, optlen); |
654 | if (err) |
655 | break; |
656 | old = rcu_dereference_protected(inet->inet_opt, |
657 | lockdep_sock_is_held(sk)); |
658 | if (inet->is_icsk) { |
659 | struct inet_connection_sock *icsk = inet_csk(sk); |
660 | #if IS_ENABLED(CONFIG_IPV6) |
661 | if (sk->sk_family == PF_INET || |
662 | (!((1 << sk->sk_state) & |
663 | (TCPF_LISTEN | TCPF_CLOSE)) && |
664 | inet->inet_daddr != LOOPBACK4_IPV6)) { |
665 | #endif |
666 | if (old) |
667 | icsk->icsk_ext_hdr_len -= old->opt.optlen; |
668 | if (opt) |
669 | icsk->icsk_ext_hdr_len += opt->opt.optlen; |
670 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); |
671 | #if IS_ENABLED(CONFIG_IPV6) |
672 | } |
673 | #endif |
674 | } |
675 | rcu_assign_pointer(inet->inet_opt, opt); |
676 | if (old) |
677 | kfree_rcu(old, rcu); |
678 | break; |
679 | } |
680 | case IP_PKTINFO: |
681 | if (val) |
682 | inet->cmsg_flags |= IP_CMSG_PKTINFO; |
683 | else |
684 | inet->cmsg_flags &= ~IP_CMSG_PKTINFO; |
685 | break; |
686 | case IP_RECVTTL: |
687 | if (val) |
688 | inet->cmsg_flags |= IP_CMSG_TTL; |
689 | else |
690 | inet->cmsg_flags &= ~IP_CMSG_TTL; |
691 | break; |
692 | case IP_RECVTOS: |
693 | if (val) |
694 | inet->cmsg_flags |= IP_CMSG_TOS; |
695 | else |
696 | inet->cmsg_flags &= ~IP_CMSG_TOS; |
697 | break; |
698 | case IP_RECVOPTS: |
699 | if (val) |
700 | inet->cmsg_flags |= IP_CMSG_RECVOPTS; |
701 | else |
702 | inet->cmsg_flags &= ~IP_CMSG_RECVOPTS; |
703 | break; |
704 | case IP_RETOPTS: |
705 | if (val) |
706 | inet->cmsg_flags |= IP_CMSG_RETOPTS; |
707 | else |
708 | inet->cmsg_flags &= ~IP_CMSG_RETOPTS; |
709 | break; |
710 | case IP_PASSSEC: |
711 | if (val) |
712 | inet->cmsg_flags |= IP_CMSG_PASSSEC; |
713 | else |
714 | inet->cmsg_flags &= ~IP_CMSG_PASSSEC; |
715 | break; |
716 | case IP_RECVORIGDSTADDR: |
717 | if (val) |
718 | inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; |
719 | else |
720 | inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; |
721 | break; |
722 | case IP_CHECKSUM: |
723 | if (val) { |
724 | if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) { |
725 | inet_inc_convert_csum(sk); |
726 | inet->cmsg_flags |= IP_CMSG_CHECKSUM; |
727 | } |
728 | } else { |
729 | if (inet->cmsg_flags & IP_CMSG_CHECKSUM) { |
730 | inet_dec_convert_csum(sk); |
731 | inet->cmsg_flags &= ~IP_CMSG_CHECKSUM; |
732 | } |
733 | } |
734 | break; |
735 | case IP_RECVFRAGSIZE: |
736 | if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) |
737 | goto e_inval; |
738 | if (val) |
739 | inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; |
740 | else |
741 | inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; |
742 | break; |
743 | case IP_TOS: /* This sets both TOS and Precedence */ |
744 | if (sk->sk_type == SOCK_STREAM) { |
745 | val &= ~INET_ECN_MASK; |
746 | val |= inet->tos & INET_ECN_MASK; |
747 | } |
748 | if (inet->tos != val) { |
749 | inet->tos = val; |
750 | sk->sk_priority = rt_tos2priority(val); |
751 | sk_dst_reset(sk); |
752 | } |
753 | break; |
754 | case IP_TTL: |
755 | if (optlen < 1) |
756 | goto e_inval; |
757 | if (val != -1 && (val < 1 || val > 255)) |
758 | goto e_inval; |
759 | inet->uc_ttl = val; |
760 | break; |
761 | case IP_HDRINCL: |
762 | if (sk->sk_type != SOCK_RAW) { |
763 | err = -ENOPROTOOPT; |
764 | break; |
765 | } |
766 | inet->hdrincl = val ? 1 : 0; |
767 | break; |
768 | case IP_NODEFRAG: |
769 | if (sk->sk_type != SOCK_RAW) { |
770 | err = -ENOPROTOOPT; |
771 | break; |
772 | } |
773 | inet->nodefrag = val ? 1 : 0; |
774 | break; |
775 | case IP_BIND_ADDRESS_NO_PORT: |
776 | inet->bind_address_no_port = val ? 1 : 0; |
777 | break; |
778 | case IP_MTU_DISCOVER: |
779 | if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) |
780 | goto e_inval; |
781 | inet->pmtudisc = val; |
782 | break; |
783 | case IP_RECVERR: |
784 | inet->recverr = !!val; |
785 | if (!val) |
786 | skb_queue_purge(&sk->sk_error_queue); |
787 | break; |
788 | case IP_MULTICAST_TTL: |
789 | if (sk->sk_type == SOCK_STREAM) |
790 | goto e_inval; |
791 | if (optlen < 1) |
792 | goto e_inval; |
793 | if (val == -1) |
794 | val = 1; |
795 | if (val < 0 || val > 255) |
796 | goto e_inval; |
797 | inet->mc_ttl = val; |
798 | break; |
799 | case IP_MULTICAST_LOOP: |
800 | if (optlen < 1) |
801 | goto e_inval; |
802 | inet->mc_loop = !!val; |
803 | break; |
804 | case IP_UNICAST_IF: |
805 | { |
806 | struct net_device *dev = NULL; |
807 | int ifindex; |
808 | int midx; |
809 | |
810 | if (optlen != sizeof(int)) |
811 | goto e_inval; |
812 | |
813 | ifindex = (__force int)ntohl((__force __be32)val); |
814 | if (ifindex == 0) { |
815 | inet->uc_index = 0; |
816 | err = 0; |
817 | break; |
818 | } |
819 | |
820 | dev = dev_get_by_index(sock_net(sk), ifindex); |
821 | err = -EADDRNOTAVAIL; |
822 | if (!dev) |
823 | break; |
824 | |
825 | midx = l3mdev_master_ifindex(dev); |
826 | dev_put(dev); |
827 | |
828 | err = -EINVAL; |
829 | if (sk->sk_bound_dev_if && |
830 | (!midx || midx != sk->sk_bound_dev_if)) |
831 | break; |
832 | |
833 | inet->uc_index = ifindex; |
834 | err = 0; |
835 | break; |
836 | } |
837 | case IP_MULTICAST_IF: |
838 | { |
839 | struct ip_mreqn mreq; |
840 | struct net_device *dev = NULL; |
841 | int midx; |
842 | |
843 | if (sk->sk_type == SOCK_STREAM) |
844 | goto e_inval; |
845 | /* |
846 | * Check the arguments are allowable |
847 | */ |
848 | |
849 | if (optlen < sizeof(struct in_addr)) |
850 | goto e_inval; |
851 | |
852 | err = -EFAULT; |
853 | if (optlen >= sizeof(struct ip_mreqn)) { |
854 | if (copy_from_user(&mreq, optval, sizeof(mreq))) |
855 | break; |
856 | } else { |
857 | memset(&mreq, 0, sizeof(mreq)); |
858 | if (optlen >= sizeof(struct ip_mreq)) { |
859 | if (copy_from_user(&mreq, optval, |
860 | sizeof(struct ip_mreq))) |
861 | break; |
862 | } else if (optlen >= sizeof(struct in_addr)) { |
863 | if (copy_from_user(&mreq.imr_address, optval, |
864 | sizeof(struct in_addr))) |
865 | break; |
866 | } |
867 | } |
868 | |
869 | if (!mreq.imr_ifindex) { |
870 | if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { |
871 | inet->mc_index = 0; |
872 | inet->mc_addr = 0; |
873 | err = 0; |
874 | break; |
875 | } |
876 | dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); |
877 | if (dev) |
878 | mreq.imr_ifindex = dev->ifindex; |
879 | } else |
880 | dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); |
881 | |
882 | |
883 | err = -EADDRNOTAVAIL; |
884 | if (!dev) |
885 | break; |
886 | |
887 | midx = l3mdev_master_ifindex(dev); |
888 | |
889 | dev_put(dev); |
890 | |
891 | err = -EINVAL; |
892 | if (sk->sk_bound_dev_if && |
893 | mreq.imr_ifindex != sk->sk_bound_dev_if && |
894 | (!midx || midx != sk->sk_bound_dev_if)) |
895 | break; |
896 | |
897 | inet->mc_index = mreq.imr_ifindex; |
898 | inet->mc_addr = mreq.imr_address.s_addr; |
899 | err = 0; |
900 | break; |
901 | } |
902 | |
903 | case IP_ADD_MEMBERSHIP: |
904 | case IP_DROP_MEMBERSHIP: |
905 | { |
906 | struct ip_mreqn mreq; |
907 | |
908 | err = -EPROTO; |
909 | if (inet_sk(sk)->is_icsk) |
910 | break; |
911 | |
912 | if (optlen < sizeof(struct ip_mreq)) |
913 | goto e_inval; |
914 | err = -EFAULT; |
915 | if (optlen >= sizeof(struct ip_mreqn)) { |
916 | if (copy_from_user(&mreq, optval, sizeof(mreq))) |
917 | break; |
918 | } else { |
919 | memset(&mreq, 0, sizeof(mreq)); |
920 | if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) |
921 | break; |
922 | } |
923 | |
924 | if (optname == IP_ADD_MEMBERSHIP) |
925 | err = ip_mc_join_group(sk, &mreq); |
926 | else |
927 | err = ip_mc_leave_group(sk, &mreq); |
928 | break; |
929 | } |
930 | case IP_MSFILTER: |
931 | { |
932 | struct ip_msfilter *msf; |
933 | |
934 | if (optlen < IP_MSFILTER_SIZE(0)) |
935 | goto e_inval; |
936 | if (optlen > sysctl_optmem_max) { |
937 | err = -ENOBUFS; |
938 | break; |
939 | } |
940 | msf = memdup_user(optval, optlen); |
941 | if (IS_ERR(msf)) { |
942 | err = PTR_ERR(msf); |
943 | break; |
944 | } |
945 | /* numsrc >= (1G-4) overflow in 32 bits */ |
946 | if (msf->imsf_numsrc >= 0x3ffffffcU || |
947 | msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { |
948 | kfree(msf); |
949 | err = -ENOBUFS; |
950 | break; |
951 | } |
952 | if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) { |
953 | kfree(msf); |
954 | err = -EINVAL; |
955 | break; |
956 | } |
957 | err = ip_mc_msfilter(sk, msf, 0); |
958 | kfree(msf); |
959 | break; |
960 | } |
961 | case IP_BLOCK_SOURCE: |
962 | case IP_UNBLOCK_SOURCE: |
963 | case IP_ADD_SOURCE_MEMBERSHIP: |
964 | case IP_DROP_SOURCE_MEMBERSHIP: |
965 | { |
966 | struct ip_mreq_source mreqs; |
967 | int omode, add; |
968 | |
969 | if (optlen != sizeof(struct ip_mreq_source)) |
970 | goto e_inval; |
971 | if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { |
972 | err = -EFAULT; |
973 | break; |
974 | } |
975 | if (optname == IP_BLOCK_SOURCE) { |
976 | omode = MCAST_EXCLUDE; |
977 | add = 1; |
978 | } else if (optname == IP_UNBLOCK_SOURCE) { |
979 | omode = MCAST_EXCLUDE; |
980 | add = 0; |
981 | } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) { |
982 | struct ip_mreqn mreq; |
983 | |
984 | mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; |
985 | mreq.imr_address.s_addr = mreqs.imr_interface; |
986 | mreq.imr_ifindex = 0; |
987 | err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); |
988 | if (err && err != -EADDRINUSE) |
989 | break; |
990 | omode = MCAST_INCLUDE; |
991 | add = 1; |
992 | } else /* IP_DROP_SOURCE_MEMBERSHIP */ { |
993 | omode = MCAST_INCLUDE; |
994 | add = 0; |
995 | } |
996 | err = ip_mc_source(add, omode, sk, &mreqs, 0); |
997 | break; |
998 | } |
999 | case MCAST_JOIN_GROUP: |
1000 | case MCAST_LEAVE_GROUP: |
1001 | { |
1002 | struct group_req greq; |
1003 | struct sockaddr_in *psin; |
1004 | struct ip_mreqn mreq; |
1005 | |
1006 | if (optlen < sizeof(struct group_req)) |
1007 | goto e_inval; |
1008 | err = -EFAULT; |
1009 | if (copy_from_user(&greq, optval, sizeof(greq))) |
1010 | break; |
1011 | psin = (struct sockaddr_in *)&greq.gr_group; |
1012 | if (psin->sin_family != AF_INET) |
1013 | goto e_inval; |
1014 | memset(&mreq, 0, sizeof(mreq)); |
1015 | mreq.imr_multiaddr = psin->sin_addr; |
1016 | mreq.imr_ifindex = greq.gr_interface; |
1017 | |
1018 | if (optname == MCAST_JOIN_GROUP) |
1019 | err = ip_mc_join_group(sk, &mreq); |
1020 | else |
1021 | err = ip_mc_leave_group(sk, &mreq); |
1022 | break; |
1023 | } |
1024 | case MCAST_JOIN_SOURCE_GROUP: |
1025 | case MCAST_LEAVE_SOURCE_GROUP: |
1026 | case MCAST_BLOCK_SOURCE: |
1027 | case MCAST_UNBLOCK_SOURCE: |
1028 | { |
1029 | struct group_source_req greqs; |
1030 | struct ip_mreq_source mreqs; |
1031 | struct sockaddr_in *psin; |
1032 | int omode, add; |
1033 | |
1034 | if (optlen != sizeof(struct group_source_req)) |
1035 | goto e_inval; |
1036 | if (copy_from_user(&greqs, optval, sizeof(greqs))) { |
1037 | err = -EFAULT; |
1038 | break; |
1039 | } |
1040 | if (greqs.gsr_group.ss_family != AF_INET || |
1041 | greqs.gsr_source.ss_family != AF_INET) { |
1042 | err = -EADDRNOTAVAIL; |
1043 | break; |
1044 | } |
1045 | psin = (struct sockaddr_in *)&greqs.gsr_group; |
1046 | mreqs.imr_multiaddr = psin->sin_addr.s_addr; |
1047 | psin = (struct sockaddr_in *)&greqs.gsr_source; |
1048 | mreqs.imr_sourceaddr = psin->sin_addr.s_addr; |
1049 | mreqs.imr_interface = 0; /* use index for mc_source */ |
1050 | |
1051 | if (optname == MCAST_BLOCK_SOURCE) { |
1052 | omode = MCAST_EXCLUDE; |
1053 | add = 1; |
1054 | } else if (optname == MCAST_UNBLOCK_SOURCE) { |
1055 | omode = MCAST_EXCLUDE; |
1056 | add = 0; |
1057 | } else if (optname == MCAST_JOIN_SOURCE_GROUP) { |
1058 | struct ip_mreqn mreq; |
1059 | |
1060 | psin = (struct sockaddr_in *)&greqs.gsr_group; |
1061 | mreq.imr_multiaddr = psin->sin_addr; |
1062 | mreq.imr_address.s_addr = 0; |
1063 | mreq.imr_ifindex = greqs.gsr_interface; |
1064 | err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); |
1065 | if (err && err != -EADDRINUSE) |
1066 | break; |
1067 | greqs.gsr_interface = mreq.imr_ifindex; |
1068 | omode = MCAST_INCLUDE; |
1069 | add = 1; |
1070 | } else /* MCAST_LEAVE_SOURCE_GROUP */ { |
1071 | omode = MCAST_INCLUDE; |
1072 | add = 0; |
1073 | } |
1074 | err = ip_mc_source(add, omode, sk, &mreqs, |
1075 | greqs.gsr_interface); |
1076 | break; |
1077 | } |
1078 | case MCAST_MSFILTER: |
1079 | { |
1080 | struct sockaddr_in *psin; |
1081 | struct ip_msfilter *msf = NULL; |
1082 | struct group_filter *gsf = NULL; |
1083 | int msize, i, ifindex; |
1084 | |
1085 | if (optlen < GROUP_FILTER_SIZE(0)) |
1086 | goto e_inval; |
1087 | if (optlen > sysctl_optmem_max) { |
1088 | err = -ENOBUFS; |
1089 | break; |
1090 | } |
1091 | gsf = memdup_user(optval, optlen); |
1092 | if (IS_ERR(gsf)) { |
1093 | err = PTR_ERR(gsf); |
1094 | break; |
1095 | } |
1096 | |
1097 | /* numsrc >= (4G-140)/128 overflow in 32 bits */ |
1098 | if (gsf->gf_numsrc >= 0x1ffffff || |
1099 | gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { |
1100 | err = -ENOBUFS; |
1101 | goto mc_msf_out; |
1102 | } |
1103 | if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { |
1104 | err = -EINVAL; |
1105 | goto mc_msf_out; |
1106 | } |
1107 | msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); |
1108 | msf = kmalloc(msize, GFP_KERNEL); |
1109 | if (!msf) { |
1110 | err = -ENOBUFS; |
1111 | goto mc_msf_out; |
1112 | } |
1113 | ifindex = gsf->gf_interface; |
1114 | psin = (struct sockaddr_in *)&gsf->gf_group; |
1115 | if (psin->sin_family != AF_INET) { |
1116 | err = -EADDRNOTAVAIL; |
1117 | goto mc_msf_out; |
1118 | } |
1119 | msf->imsf_multiaddr = psin->sin_addr.s_addr; |
1120 | msf->imsf_interface = 0; |
1121 | msf->imsf_fmode = gsf->gf_fmode; |
1122 | msf->imsf_numsrc = gsf->gf_numsrc; |
1123 | err = -EADDRNOTAVAIL; |
1124 | for (i = 0; i < gsf->gf_numsrc; ++i) { |
1125 | psin = (struct sockaddr_in *)&gsf->gf_slist[i]; |
1126 | |
1127 | if (psin->sin_family != AF_INET) |
1128 | goto mc_msf_out; |
1129 | msf->imsf_slist[i] = psin->sin_addr.s_addr; |
1130 | } |
1131 | kfree(gsf); |
1132 | gsf = NULL; |
1133 | |
1134 | err = ip_mc_msfilter(sk, msf, ifindex); |
1135 | mc_msf_out: |
1136 | kfree(msf); |
1137 | kfree(gsf); |
1138 | break; |
1139 | } |
1140 | case IP_MULTICAST_ALL: |
1141 | if (optlen < 1) |
1142 | goto e_inval; |
1143 | if (val != 0 && val != 1) |
1144 | goto e_inval; |
1145 | inet->mc_all = val; |
1146 | break; |
1147 | |
1148 | case IP_FREEBIND: |
1149 | if (optlen < 1) |
1150 | goto e_inval; |
1151 | inet->freebind = !!val; |
1152 | break; |
1153 | |
1154 | case IP_IPSEC_POLICY: |
1155 | case IP_XFRM_POLICY: |
1156 | err = -EPERM; |
1157 | if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) |
1158 | break; |
1159 | err = xfrm_user_policy(sk, optname, optval, optlen); |
1160 | break; |
1161 | |
1162 | case IP_TRANSPARENT: |
1163 | if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && |
1164 | !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { |
1165 | err = -EPERM; |
1166 | break; |
1167 | } |
1168 | if (optlen < 1) |
1169 | goto e_inval; |
1170 | inet->transparent = !!val; |
1171 | break; |
1172 | |
1173 | case IP_MINTTL: |
1174 | if (optlen < 1) |
1175 | goto e_inval; |
1176 | if (val < 0 || val > 255) |
1177 | goto e_inval; |
1178 | inet->min_ttl = val; |
1179 | break; |
1180 | |
1181 | default: |
1182 | err = -ENOPROTOOPT; |
1183 | break; |
1184 | } |
1185 | release_sock(sk); |
1186 | if (needs_rtnl) |
1187 | rtnl_unlock(); |
1188 | return err; |
1189 | |
1190 | e_inval: |
1191 | release_sock(sk); |
1192 | if (needs_rtnl) |
1193 | rtnl_unlock(); |
1194 | return -EINVAL; |
1195 | } |
1196 | |
1197 | /** |
1198 | * ipv4_pktinfo_prepare - transfer some info from rtable to skb |
1199 | * @sk: socket |
1200 | * @skb: buffer |
1201 | * |
1202 | * To support IP_CMSG_PKTINFO option, we store rt_iif and specific |
1203 | * destination in skb->cb[] before dst drop. |
1204 | * This way, receiver doesn't make cache line misses to read rtable. |
1205 | */ |
1206 | void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) |
1207 | { |
1208 | struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); |
1209 | bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || |
1210 | ipv6_sk_rxinfo(sk); |
1211 | |
1212 | if (prepare && skb_rtable(skb)) { |
1213 | /* skb->cb is overloaded: prior to this point it is IP{6}CB |
1214 | * which has interface index (iif) as the first member of the |
1215 | * underlying inet{6}_skb_parm struct. This code then overlays |
1216 | * PKTINFO_SKB_CB and in_pktinfo also has iif as the first |
1217 | * element so the iif is picked up from the prior IPCB. If iif |
1218 | * is the loopback interface, then return the sending interface |
1219 | * (e.g., process binds socket to eth0 for Tx which is |
1220 | * redirected to loopback in the rtable/dst). |
1221 | */ |
1222 | struct rtable *rt = skb_rtable(skb); |
1223 | bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); |
1224 | |
1225 | if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) |
1226 | pktinfo->ipi_ifindex = inet_iif(skb); |
1227 | else if (l3slave && rt && rt->rt_iif) |
1228 | pktinfo->ipi_ifindex = rt->rt_iif; |
1229 | |
1230 | pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); |
1231 | } else { |
1232 | pktinfo->ipi_ifindex = 0; |
1233 | pktinfo->ipi_spec_dst.s_addr = 0; |
1234 | } |
1235 | skb_dst_drop(skb); |
1236 | } |
1237 | |
1238 | int ip_setsockopt(struct sock *sk, int level, |
1239 | int optname, char __user *optval, unsigned int optlen) |
1240 | { |
1241 | int err; |
1242 | |
1243 | if (level != SOL_IP) |
1244 | return -ENOPROTOOPT; |
1245 | |
1246 | err = do_ip_setsockopt(sk, level, optname, optval, optlen); |
1247 | #if IS_ENABLED(CONFIG_BPFILTER_UMH) |
1248 | if (optname >= BPFILTER_IPT_SO_SET_REPLACE && |
1249 | optname < BPFILTER_IPT_SET_MAX) |
1250 | err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); |
1251 | #endif |
1252 | #ifdef CONFIG_NETFILTER |
1253 | /* we need to exclude all possible ENOPROTOOPTs except default case */ |
1254 | if (err == -ENOPROTOOPT && optname != IP_HDRINCL && |
1255 | optname != IP_IPSEC_POLICY && |
1256 | optname != IP_XFRM_POLICY && |
1257 | !ip_mroute_opt(optname)) |
1258 | err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); |
1259 | #endif |
1260 | return err; |
1261 | } |
1262 | EXPORT_SYMBOL(ip_setsockopt); |
1263 | |
1264 | #ifdef CONFIG_COMPAT |
1265 | int compat_ip_setsockopt(struct sock *sk, int level, int optname, |
1266 | char __user *optval, unsigned int optlen) |
1267 | { |
1268 | int err; |
1269 | |
1270 | if (level != SOL_IP) |
1271 | return -ENOPROTOOPT; |
1272 | |
1273 | if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) |
1274 | return compat_mc_setsockopt(sk, level, optname, optval, optlen, |
1275 | ip_setsockopt); |
1276 | |
1277 | err = do_ip_setsockopt(sk, level, optname, optval, optlen); |
1278 | #ifdef CONFIG_NETFILTER |
1279 | /* we need to exclude all possible ENOPROTOOPTs except default case */ |
1280 | if (err == -ENOPROTOOPT && optname != IP_HDRINCL && |
1281 | optname != IP_IPSEC_POLICY && |
1282 | optname != IP_XFRM_POLICY && |
1283 | !ip_mroute_opt(optname)) |
1284 | err = compat_nf_setsockopt(sk, PF_INET, optname, optval, |
1285 | optlen); |
1286 | #endif |
1287 | return err; |
1288 | } |
1289 | EXPORT_SYMBOL(compat_ip_setsockopt); |
1290 | #endif |
1291 | |
1292 | /* |
1293 | * Get the options. Note for future reference. The GET of IP options gets |
1294 | * the _received_ ones. The set sets the _sent_ ones. |
1295 | */ |
1296 | |
1297 | static bool getsockopt_needs_rtnl(int optname) |
1298 | { |
1299 | switch (optname) { |
1300 | case IP_MSFILTER: |
1301 | case MCAST_MSFILTER: |
1302 | return true; |
1303 | } |
1304 | return false; |
1305 | } |
1306 | |
1307 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, |
1308 | char __user *optval, int __user *optlen, unsigned int flags) |
1309 | { |
1310 | struct inet_sock *inet = inet_sk(sk); |
1311 | bool needs_rtnl = getsockopt_needs_rtnl(optname); |
1312 | int val, err = 0; |
1313 | int len; |
1314 | |
1315 | if (level != SOL_IP) |
1316 | return -EOPNOTSUPP; |
1317 | |
1318 | if (ip_mroute_opt(optname)) |
1319 | return ip_mroute_getsockopt(sk, optname, optval, optlen); |
1320 | |
1321 | if (get_user(len, optlen)) |
1322 | return -EFAULT; |
1323 | if (len < 0) |
1324 | return -EINVAL; |
1325 | |
1326 | if (needs_rtnl) |
1327 | rtnl_lock(); |
1328 | lock_sock(sk); |
1329 | |
1330 | switch (optname) { |
1331 | case IP_OPTIONS: |
1332 | { |
1333 | unsigned char optbuf[sizeof(struct ip_options)+40]; |
1334 | struct ip_options *opt = (struct ip_options *)optbuf; |
1335 | struct ip_options_rcu *inet_opt; |
1336 | |
1337 | inet_opt = rcu_dereference_protected(inet->inet_opt, |
1338 | lockdep_sock_is_held(sk)); |
1339 | opt->optlen = 0; |
1340 | if (inet_opt) |
1341 | memcpy(optbuf, &inet_opt->opt, |
1342 | sizeof(struct ip_options) + |
1343 | inet_opt->opt.optlen); |
1344 | release_sock(sk); |
1345 | |
1346 | if (opt->optlen == 0) |
1347 | return put_user(0, optlen); |
1348 | |
1349 | ip_options_undo(opt); |
1350 | |
1351 | len = min_t(unsigned int, len, opt->optlen); |
1352 | if (put_user(len, optlen)) |
1353 | return -EFAULT; |
1354 | if (copy_to_user(optval, opt->__data, len)) |
1355 | return -EFAULT; |
1356 | return 0; |
1357 | } |
1358 | case IP_PKTINFO: |
1359 | val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0; |
1360 | break; |
1361 | case IP_RECVTTL: |
1362 | val = (inet->cmsg_flags & IP_CMSG_TTL) != 0; |
1363 | break; |
1364 | case IP_RECVTOS: |
1365 | val = (inet->cmsg_flags & IP_CMSG_TOS) != 0; |
1366 | break; |
1367 | case IP_RECVOPTS: |
1368 | val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0; |
1369 | break; |
1370 | case IP_RETOPTS: |
1371 | val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0; |
1372 | break; |
1373 | case IP_PASSSEC: |
1374 | val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; |
1375 | break; |
1376 | case IP_RECVORIGDSTADDR: |
1377 | val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; |
1378 | break; |
1379 | case IP_CHECKSUM: |
1380 | val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; |
1381 | break; |
1382 | case IP_RECVFRAGSIZE: |
1383 | val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; |
1384 | break; |
1385 | case IP_TOS: |
1386 | val = inet->tos; |
1387 | break; |
1388 | case IP_TTL: |
1389 | { |
1390 | struct net *net = sock_net(sk); |
1391 | val = (inet->uc_ttl == -1 ? |
1392 | net->ipv4.sysctl_ip_default_ttl : |
1393 | inet->uc_ttl); |
1394 | break; |
1395 | } |
1396 | case IP_HDRINCL: |
1397 | val = inet->hdrincl; |
1398 | break; |
1399 | case IP_NODEFRAG: |
1400 | val = inet->nodefrag; |
1401 | break; |
1402 | case IP_BIND_ADDRESS_NO_PORT: |
1403 | val = inet->bind_address_no_port; |
1404 | break; |
1405 | case IP_MTU_DISCOVER: |
1406 | val = inet->pmtudisc; |
1407 | break; |
1408 | case IP_MTU: |
1409 | { |
1410 | struct dst_entry *dst; |
1411 | val = 0; |
1412 | dst = sk_dst_get(sk); |
1413 | if (dst) { |
1414 | val = dst_mtu(dst); |
1415 | dst_release(dst); |
1416 | } |
1417 | if (!val) { |
1418 | release_sock(sk); |
1419 | return -ENOTCONN; |
1420 | } |
1421 | break; |
1422 | } |
1423 | case IP_RECVERR: |
1424 | val = inet->recverr; |
1425 | break; |
1426 | case IP_MULTICAST_TTL: |
1427 | val = inet->mc_ttl; |
1428 | break; |
1429 | case IP_MULTICAST_LOOP: |
1430 | val = inet->mc_loop; |
1431 | break; |
1432 | case IP_UNICAST_IF: |
1433 | val = (__force int)htonl((__u32) inet->uc_index); |
1434 | break; |
1435 | case IP_MULTICAST_IF: |
1436 | { |
1437 | struct in_addr addr; |
1438 | len = min_t(unsigned int, len, sizeof(struct in_addr)); |
1439 | addr.s_addr = inet->mc_addr; |
1440 | release_sock(sk); |
1441 | |
1442 | if (put_user(len, optlen)) |
1443 | return -EFAULT; |
1444 | if (copy_to_user(optval, &addr, len)) |
1445 | return -EFAULT; |
1446 | return 0; |
1447 | } |
1448 | case IP_MSFILTER: |
1449 | { |
1450 | struct ip_msfilter msf; |
1451 | |
1452 | if (len < IP_MSFILTER_SIZE(0)) { |
1453 | err = -EINVAL; |
1454 | goto out; |
1455 | } |
1456 | if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) { |
1457 | err = -EFAULT; |
1458 | goto out; |
1459 | } |
1460 | err = ip_mc_msfget(sk, &msf, |
1461 | (struct ip_msfilter __user *)optval, optlen); |
1462 | goto out; |
1463 | } |
1464 | case MCAST_MSFILTER: |
1465 | { |
1466 | struct group_filter gsf; |
1467 | |
1468 | if (len < GROUP_FILTER_SIZE(0)) { |
1469 | err = -EINVAL; |
1470 | goto out; |
1471 | } |
1472 | if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { |
1473 | err = -EFAULT; |
1474 | goto out; |
1475 | } |
1476 | err = ip_mc_gsfget(sk, &gsf, |
1477 | (struct group_filter __user *)optval, |
1478 | optlen); |
1479 | goto out; |
1480 | } |
1481 | case IP_MULTICAST_ALL: |
1482 | val = inet->mc_all; |
1483 | break; |
1484 | case IP_PKTOPTIONS: |
1485 | { |
1486 | struct msghdr msg; |
1487 | |
1488 | release_sock(sk); |
1489 | |
1490 | if (sk->sk_type != SOCK_STREAM) |
1491 | return -ENOPROTOOPT; |
1492 | |
1493 | msg.msg_control = (__force void *) optval; |
1494 | msg.msg_controllen = len; |
1495 | msg.msg_flags = flags; |
1496 | |
1497 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { |
1498 | struct in_pktinfo info; |
1499 | |
1500 | info.ipi_addr.s_addr = inet->inet_rcv_saddr; |
1501 | info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; |
1502 | info.ipi_ifindex = inet->mc_index; |
1503 | put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); |
1504 | } |
1505 | if (inet->cmsg_flags & IP_CMSG_TTL) { |
1506 | int hlim = inet->mc_ttl; |
1507 | put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); |
1508 | } |
1509 | if (inet->cmsg_flags & IP_CMSG_TOS) { |
1510 | int tos = inet->rcv_tos; |
1511 | put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); |
1512 | } |
1513 | len -= msg.msg_controllen; |
1514 | return put_user(len, optlen); |
1515 | } |
1516 | case IP_FREEBIND: |
1517 | val = inet->freebind; |
1518 | break; |
1519 | case IP_TRANSPARENT: |
1520 | val = inet->transparent; |
1521 | break; |
1522 | case IP_MINTTL: |
1523 | val = inet->min_ttl; |
1524 | break; |
1525 | default: |
1526 | release_sock(sk); |
1527 | return -ENOPROTOOPT; |
1528 | } |
1529 | release_sock(sk); |
1530 | |
1531 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1532 | unsigned char ucval = (unsigned char)val; |
1533 | len = 1; |
1534 | if (put_user(len, optlen)) |
1535 | return -EFAULT; |
1536 | if (copy_to_user(optval, &ucval, 1)) |
1537 | return -EFAULT; |
1538 | } else { |
1539 | len = min_t(unsigned int, sizeof(int), len); |
1540 | if (put_user(len, optlen)) |
1541 | return -EFAULT; |
1542 | if (copy_to_user(optval, &val, len)) |
1543 | return -EFAULT; |
1544 | } |
1545 | return 0; |
1546 | |
1547 | out: |
1548 | release_sock(sk); |
1549 | if (needs_rtnl) |
1550 | rtnl_unlock(); |
1551 | return err; |
1552 | } |
1553 | |
1554 | int ip_getsockopt(struct sock *sk, int level, |
1555 | int optname, char __user *optval, int __user *optlen) |
1556 | { |
1557 | int err; |
1558 | |
1559 | err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); |
1560 | #if IS_ENABLED(CONFIG_BPFILTER_UMH) |
1561 | if (optname >= BPFILTER_IPT_SO_GET_INFO && |
1562 | optname < BPFILTER_IPT_GET_MAX) |
1563 | err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); |
1564 | #endif |
1565 | #ifdef CONFIG_NETFILTER |
1566 | /* we need to exclude all possible ENOPROTOOPTs except default case */ |
1567 | if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && |
1568 | !ip_mroute_opt(optname)) { |
1569 | int len; |
1570 | |
1571 | if (get_user(len, optlen)) |
1572 | return -EFAULT; |
1573 | |
1574 | err = nf_getsockopt(sk, PF_INET, optname, optval, &len); |
1575 | if (err >= 0) |
1576 | err = put_user(len, optlen); |
1577 | return err; |
1578 | } |
1579 | #endif |
1580 | return err; |
1581 | } |
1582 | EXPORT_SYMBOL(ip_getsockopt); |
1583 | |
1584 | #ifdef CONFIG_COMPAT |
1585 | int compat_ip_getsockopt(struct sock *sk, int level, int optname, |
1586 | char __user *optval, int __user *optlen) |
1587 | { |
1588 | int err; |
1589 | |
1590 | if (optname == MCAST_MSFILTER) |
1591 | return compat_mc_getsockopt(sk, level, optname, optval, optlen, |
1592 | ip_getsockopt); |
1593 | |
1594 | err = do_ip_getsockopt(sk, level, optname, optval, optlen, |
1595 | MSG_CMSG_COMPAT); |
1596 | |
1597 | #if IS_ENABLED(CONFIG_BPFILTER_UMH) |
1598 | if (optname >= BPFILTER_IPT_SO_GET_INFO && |
1599 | optname < BPFILTER_IPT_GET_MAX) |
1600 | err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); |
1601 | #endif |
1602 | #ifdef CONFIG_NETFILTER |
1603 | /* we need to exclude all possible ENOPROTOOPTs except default case */ |
1604 | if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && |
1605 | !ip_mroute_opt(optname)) { |
1606 | int len; |
1607 | |
1608 | if (get_user(len, optlen)) |
1609 | return -EFAULT; |
1610 | |
1611 | err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); |
1612 | if (err >= 0) |
1613 | err = put_user(len, optlen); |
1614 | return err; |
1615 | } |
1616 | #endif |
1617 | return err; |
1618 | } |
1619 | EXPORT_SYMBOL(compat_ip_getsockopt); |
1620 | #endif |
1621 | |