1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * RAW - implementation of IP "raw" sockets. |
8 | * |
9 | * Authors: Ross Biro |
10 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
11 | * |
12 | * Fixes: |
13 | * Alan Cox : verify_area() fixed up |
14 | * Alan Cox : ICMP error handling |
15 | * Alan Cox : EMSGSIZE if you send too big a packet |
16 | * Alan Cox : Now uses generic datagrams and shared |
17 | * skbuff library. No more peek crashes, |
18 | * no more backlogs |
19 | * Alan Cox : Checks sk->broadcast. |
20 | * Alan Cox : Uses skb_free_datagram/skb_copy_datagram |
21 | * Alan Cox : Raw passes ip options too |
22 | * Alan Cox : Setsocketopt added |
23 | * Alan Cox : Fixed error return for broadcasts |
24 | * Alan Cox : Removed wake_up calls |
25 | * Alan Cox : Use ttl/tos |
26 | * Alan Cox : Cleaned up old debugging |
27 | * Alan Cox : Use new kernel side addresses |
28 | * Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets. |
29 | * Alan Cox : BSD style RAW socket demultiplexing. |
30 | * Alan Cox : Beginnings of mrouted support. |
31 | * Alan Cox : Added IP_HDRINCL option. |
32 | * Alan Cox : Skip broadcast check if BSDism set. |
33 | * David S. Miller : New socket lookup architecture. |
34 | */ |
35 | |
36 | #include <linux/types.h> |
37 | #include <linux/atomic.h> |
38 | #include <asm/byteorder.h> |
39 | #include <asm/current.h> |
40 | #include <linux/uaccess.h> |
41 | #include <asm/ioctls.h> |
42 | #include <linux/stddef.h> |
43 | #include <linux/slab.h> |
44 | #include <linux/errno.h> |
45 | #include <linux/kernel.h> |
46 | #include <linux/export.h> |
47 | #include <linux/spinlock.h> |
48 | #include <linux/sockios.h> |
49 | #include <linux/socket.h> |
50 | #include <linux/in.h> |
51 | #include <linux/mroute.h> |
52 | #include <linux/netdevice.h> |
53 | #include <linux/in_route.h> |
54 | #include <linux/route.h> |
55 | #include <linux/skbuff.h> |
56 | #include <linux/igmp.h> |
57 | #include <net/net_namespace.h> |
58 | #include <net/dst.h> |
59 | #include <net/sock.h> |
60 | #include <linux/ip.h> |
61 | #include <linux/net.h> |
62 | #include <net/ip.h> |
63 | #include <net/icmp.h> |
64 | #include <net/udp.h> |
65 | #include <net/raw.h> |
66 | #include <net/snmp.h> |
67 | #include <net/tcp_states.h> |
68 | #include <net/inet_common.h> |
69 | #include <net/checksum.h> |
70 | #include <net/xfrm.h> |
71 | #include <linux/rtnetlink.h> |
72 | #include <linux/proc_fs.h> |
73 | #include <linux/seq_file.h> |
74 | #include <linux/netfilter.h> |
75 | #include <linux/netfilter_ipv4.h> |
76 | #include <linux/compat.h> |
77 | #include <linux/uio.h> |
78 | |
79 | struct raw_frag_vec { |
80 | struct msghdr *msg; |
81 | union { |
82 | struct icmphdr icmph; |
83 | char c[1]; |
84 | } hdr; |
85 | int hlen; |
86 | }; |
87 | |
88 | struct raw_hashinfo raw_v4_hashinfo; |
89 | EXPORT_SYMBOL_GPL(raw_v4_hashinfo); |
90 | |
91 | int raw_hash_sk(struct sock *sk) |
92 | { |
93 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; |
94 | struct hlist_head *hlist; |
95 | |
96 | hlist = &h->ht[raw_hashfunc(net: sock_net(sk), inet_sk(sk)->inet_num)]; |
97 | |
98 | spin_lock(lock: &h->lock); |
99 | sk_add_node_rcu(sk, list: hlist); |
100 | sock_set_flag(sk, flag: SOCK_RCU_FREE); |
101 | spin_unlock(lock: &h->lock); |
102 | sock_prot_inuse_add(net: sock_net(sk), prot: sk->sk_prot, val: 1); |
103 | |
104 | return 0; |
105 | } |
106 | EXPORT_SYMBOL_GPL(raw_hash_sk); |
107 | |
108 | void raw_unhash_sk(struct sock *sk) |
109 | { |
110 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; |
111 | |
112 | spin_lock(lock: &h->lock); |
113 | if (sk_del_node_init_rcu(sk)) |
114 | sock_prot_inuse_add(net: sock_net(sk), prot: sk->sk_prot, val: -1); |
115 | spin_unlock(lock: &h->lock); |
116 | } |
117 | EXPORT_SYMBOL_GPL(raw_unhash_sk); |
118 | |
119 | bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, |
120 | __be32 raddr, __be32 laddr, int dif, int sdif) |
121 | { |
122 | const struct inet_sock *inet = inet_sk(sk); |
123 | |
124 | if (net_eq(net1: sock_net(sk), net2: net) && inet->inet_num == num && |
125 | !(inet->inet_daddr && inet->inet_daddr != raddr) && |
126 | !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && |
127 | raw_sk_bound_dev_eq(net, bound_dev_if: sk->sk_bound_dev_if, dif, sdif)) |
128 | return true; |
129 | return false; |
130 | } |
131 | EXPORT_SYMBOL_GPL(raw_v4_match); |
132 | |
133 | /* |
134 | * 0 - deliver |
135 | * 1 - block |
136 | */ |
137 | static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) |
138 | { |
139 | struct icmphdr _hdr; |
140 | const struct icmphdr *hdr; |
141 | |
142 | hdr = skb_header_pointer(skb, offset: skb_transport_offset(skb), |
143 | len: sizeof(_hdr), buffer: &_hdr); |
144 | if (!hdr) |
145 | return 1; |
146 | |
147 | if (hdr->type < 32) { |
148 | __u32 data = raw_sk(sk)->filter.data; |
149 | |
150 | return ((1U << hdr->type) & data) != 0; |
151 | } |
152 | |
153 | /* Do not block unknown ICMP types */ |
154 | return 0; |
155 | } |
156 | |
157 | /* IP input processing comes here for RAW socket delivery. |
158 | * Caller owns SKB, so we must make clones. |
159 | * |
160 | * RFC 1122: SHOULD pass TOS value up to the transport layer. |
161 | * -> It does. And not only TOS, but all IP header. |
162 | */ |
163 | static int raw_v4_input(struct net *net, struct sk_buff *skb, |
164 | const struct iphdr *iph, int hash) |
165 | { |
166 | int sdif = inet_sdif(skb); |
167 | struct hlist_head *hlist; |
168 | int dif = inet_iif(skb); |
169 | int delivered = 0; |
170 | struct sock *sk; |
171 | |
172 | hlist = &raw_v4_hashinfo.ht[hash]; |
173 | rcu_read_lock(); |
174 | sk_for_each_rcu(sk, hlist) { |
175 | if (!raw_v4_match(net, sk, iph->protocol, |
176 | iph->saddr, iph->daddr, dif, sdif)) |
177 | continue; |
178 | delivered = 1; |
179 | if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && |
180 | ip_mc_sf_allow(sk, local: iph->daddr, rmt: iph->saddr, |
181 | dif: skb->dev->ifindex, sdif)) { |
182 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
183 | |
184 | /* Not releasing hash table! */ |
185 | if (clone) |
186 | raw_rcv(sk, clone); |
187 | } |
188 | } |
189 | rcu_read_unlock(); |
190 | return delivered; |
191 | } |
192 | |
193 | int raw_local_deliver(struct sk_buff *skb, int protocol) |
194 | { |
195 | struct net *net = dev_net(dev: skb->dev); |
196 | |
197 | return raw_v4_input(net, skb, iph: ip_hdr(skb), |
198 | hash: raw_hashfunc(net, proto: protocol)); |
199 | } |
200 | |
201 | static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) |
202 | { |
203 | struct inet_sock *inet = inet_sk(sk); |
204 | const int type = icmp_hdr(skb)->type; |
205 | const int code = icmp_hdr(skb)->code; |
206 | int harderr = 0; |
207 | bool recverr; |
208 | int err = 0; |
209 | |
210 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) |
211 | ipv4_sk_update_pmtu(skb, sk, mtu: info); |
212 | else if (type == ICMP_REDIRECT) { |
213 | ipv4_sk_redirect(skb, sk); |
214 | return; |
215 | } |
216 | |
217 | /* Report error on raw socket, if: |
218 | 1. User requested ip_recverr. |
219 | 2. Socket is connected (otherwise the error indication |
220 | is useless without ip_recverr and error is hard. |
221 | */ |
222 | recverr = inet_test_bit(RECVERR, sk); |
223 | if (!recverr && sk->sk_state != TCP_ESTABLISHED) |
224 | return; |
225 | |
226 | switch (type) { |
227 | default: |
228 | case ICMP_TIME_EXCEEDED: |
229 | err = EHOSTUNREACH; |
230 | break; |
231 | case ICMP_SOURCE_QUENCH: |
232 | return; |
233 | case ICMP_PARAMETERPROB: |
234 | err = EPROTO; |
235 | harderr = 1; |
236 | break; |
237 | case ICMP_DEST_UNREACH: |
238 | err = EHOSTUNREACH; |
239 | if (code > NR_ICMP_UNREACH) |
240 | break; |
241 | if (code == ICMP_FRAG_NEEDED) { |
242 | harderr = READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT; |
243 | err = EMSGSIZE; |
244 | } else { |
245 | err = icmp_err_convert[code].errno; |
246 | harderr = icmp_err_convert[code].fatal; |
247 | } |
248 | } |
249 | |
250 | if (recverr) { |
251 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
252 | u8 *payload = skb->data + (iph->ihl << 2); |
253 | |
254 | if (inet_test_bit(HDRINCL, sk)) |
255 | payload = skb->data; |
256 | ip_icmp_error(sk, skb, err, port: 0, info, payload); |
257 | } |
258 | |
259 | if (recverr || harderr) { |
260 | sk->sk_err = err; |
261 | sk_error_report(sk); |
262 | } |
263 | } |
264 | |
265 | void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) |
266 | { |
267 | struct net *net = dev_net(dev: skb->dev); |
268 | int dif = skb->dev->ifindex; |
269 | int sdif = inet_sdif(skb); |
270 | struct hlist_head *hlist; |
271 | const struct iphdr *iph; |
272 | struct sock *sk; |
273 | int hash; |
274 | |
275 | hash = raw_hashfunc(net, proto: protocol); |
276 | hlist = &raw_v4_hashinfo.ht[hash]; |
277 | |
278 | rcu_read_lock(); |
279 | sk_for_each_rcu(sk, hlist) { |
280 | iph = (const struct iphdr *)skb->data; |
281 | if (!raw_v4_match(net, sk, iph->protocol, |
282 | iph->daddr, iph->saddr, dif, sdif)) |
283 | continue; |
284 | raw_err(sk, skb, info); |
285 | } |
286 | rcu_read_unlock(); |
287 | } |
288 | |
289 | static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) |
290 | { |
291 | enum skb_drop_reason reason; |
292 | |
293 | /* Charge it to the socket. */ |
294 | |
295 | ipv4_pktinfo_prepare(sk, skb); |
296 | if (sock_queue_rcv_skb_reason(sk, skb, reason: &reason) < 0) { |
297 | kfree_skb_reason(skb, reason); |
298 | return NET_RX_DROP; |
299 | } |
300 | |
301 | return NET_RX_SUCCESS; |
302 | } |
303 | |
304 | int raw_rcv(struct sock *sk, struct sk_buff *skb) |
305 | { |
306 | if (!xfrm4_policy_check(sk, dir: XFRM_POLICY_IN, skb)) { |
307 | atomic_inc(v: &sk->sk_drops); |
308 | kfree_skb_reason(skb, reason: SKB_DROP_REASON_XFRM_POLICY); |
309 | return NET_RX_DROP; |
310 | } |
311 | nf_reset_ct(skb); |
312 | |
313 | skb_push(skb, len: skb->data - skb_network_header(skb)); |
314 | |
315 | raw_rcv_skb(sk, skb); |
316 | return 0; |
317 | } |
318 | |
319 | static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, |
320 | struct msghdr *msg, size_t length, |
321 | struct rtable **rtp, unsigned int flags, |
322 | const struct sockcm_cookie *sockc) |
323 | { |
324 | struct inet_sock *inet = inet_sk(sk); |
325 | struct net *net = sock_net(sk); |
326 | struct iphdr *iph; |
327 | struct sk_buff *skb; |
328 | unsigned int iphlen; |
329 | int err; |
330 | struct rtable *rt = *rtp; |
331 | int hlen, tlen; |
332 | |
333 | if (length > rt->dst.dev->mtu) { |
334 | ip_local_error(sk, EMSGSIZE, daddr: fl4->daddr, dport: inet->inet_dport, |
335 | info: rt->dst.dev->mtu); |
336 | return -EMSGSIZE; |
337 | } |
338 | if (length < sizeof(struct iphdr)) |
339 | return -EINVAL; |
340 | |
341 | if (flags&MSG_PROBE) |
342 | goto out; |
343 | |
344 | hlen = LL_RESERVED_SPACE(rt->dst.dev); |
345 | tlen = rt->dst.dev->needed_tailroom; |
346 | skb = sock_alloc_send_skb(sk, |
347 | size: length + hlen + tlen + 15, |
348 | noblock: flags & MSG_DONTWAIT, errcode: &err); |
349 | if (!skb) |
350 | goto error; |
351 | skb_reserve(skb, len: hlen); |
352 | |
353 | skb->priority = READ_ONCE(sk->sk_priority); |
354 | skb->mark = sockc->mark; |
355 | skb->tstamp = sockc->transmit_time; |
356 | skb_dst_set(skb, dst: &rt->dst); |
357 | *rtp = NULL; |
358 | |
359 | skb_reset_network_header(skb); |
360 | iph = ip_hdr(skb); |
361 | skb_put(skb, len: length); |
362 | |
363 | skb->ip_summed = CHECKSUM_NONE; |
364 | |
365 | skb_setup_tx_timestamp(skb, tsflags: sockc->tsflags); |
366 | |
367 | if (flags & MSG_CONFIRM) |
368 | skb_set_dst_pending_confirm(skb, val: 1); |
369 | |
370 | skb->transport_header = skb->network_header; |
371 | err = -EFAULT; |
372 | if (memcpy_from_msg(data: iph, msg, len: length)) |
373 | goto error_free; |
374 | |
375 | iphlen = iph->ihl * 4; |
376 | |
377 | /* |
378 | * We don't want to modify the ip header, but we do need to |
379 | * be sure that it won't cause problems later along the network |
380 | * stack. Specifically we want to make sure that iph->ihl is a |
381 | * sane value. If ihl points beyond the length of the buffer passed |
382 | * in, reject the frame as invalid |
383 | */ |
384 | err = -EINVAL; |
385 | if (iphlen > length) |
386 | goto error_free; |
387 | |
388 | if (iphlen >= sizeof(*iph)) { |
389 | if (!iph->saddr) |
390 | iph->saddr = fl4->saddr; |
391 | iph->check = 0; |
392 | iph->tot_len = htons(length); |
393 | if (!iph->id) |
394 | ip_select_ident(net, skb, NULL); |
395 | |
396 | iph->check = ip_fast_csum(iph: (unsigned char *)iph, ihl: iph->ihl); |
397 | skb->transport_header += iphlen; |
398 | if (iph->protocol == IPPROTO_ICMP && |
399 | length >= iphlen + sizeof(struct icmphdr)) |
400 | icmp_out_count(net, type: ((struct icmphdr *) |
401 | skb_transport_header(skb))->type); |
402 | } |
403 | |
404 | err = NF_HOOK(pf: NFPROTO_IPV4, hook: NF_INET_LOCAL_OUT, |
405 | net, sk, skb, NULL, out: rt->dst.dev, |
406 | okfn: dst_output); |
407 | if (err > 0) |
408 | err = net_xmit_errno(err); |
409 | if (err) |
410 | goto error; |
411 | out: |
412 | return 0; |
413 | |
414 | error_free: |
415 | kfree_skb(skb); |
416 | error: |
417 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
418 | if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) |
419 | err = 0; |
420 | return err; |
421 | } |
422 | |
423 | static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4) |
424 | { |
425 | int err; |
426 | |
427 | if (fl4->flowi4_proto != IPPROTO_ICMP) |
428 | return 0; |
429 | |
430 | /* We only need the first two bytes. */ |
431 | rfv->hlen = 2; |
432 | |
433 | err = memcpy_from_msg(data: rfv->hdr.c, msg: rfv->msg, len: rfv->hlen); |
434 | if (err) |
435 | return err; |
436 | |
437 | fl4->fl4_icmp_type = rfv->hdr.icmph.type; |
438 | fl4->fl4_icmp_code = rfv->hdr.icmph.code; |
439 | |
440 | return 0; |
441 | } |
442 | |
443 | static int raw_getfrag(void *from, char *to, int offset, int len, int odd, |
444 | struct sk_buff *skb) |
445 | { |
446 | struct raw_frag_vec *rfv = from; |
447 | |
448 | if (offset < rfv->hlen) { |
449 | int copy = min(rfv->hlen - offset, len); |
450 | |
451 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
452 | memcpy(to, rfv->hdr.c + offset, copy); |
453 | else |
454 | skb->csum = csum_block_add( |
455 | csum: skb->csum, |
456 | csum2: csum_partial_copy_nocheck(src: rfv->hdr.c + offset, |
457 | dst: to, len: copy), |
458 | offset: odd); |
459 | |
460 | odd = 0; |
461 | offset += copy; |
462 | to += copy; |
463 | len -= copy; |
464 | |
465 | if (!len) |
466 | return 0; |
467 | } |
468 | |
469 | offset -= rfv->hlen; |
470 | |
471 | return ip_generic_getfrag(from: rfv->msg, to, offset, len, odd, skb); |
472 | } |
473 | |
474 | static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) |
475 | { |
476 | struct inet_sock *inet = inet_sk(sk); |
477 | struct net *net = sock_net(sk); |
478 | struct ipcm_cookie ipc; |
479 | struct rtable *rt = NULL; |
480 | struct flowi4 fl4; |
481 | u8 tos, scope; |
482 | int free = 0; |
483 | __be32 daddr; |
484 | __be32 saddr; |
485 | int uc_index, err; |
486 | struct ip_options_data opt_copy; |
487 | struct raw_frag_vec rfv; |
488 | int hdrincl; |
489 | |
490 | err = -EMSGSIZE; |
491 | if (len > 0xFFFF) |
492 | goto out; |
493 | |
494 | hdrincl = inet_test_bit(HDRINCL, sk); |
495 | |
496 | /* |
497 | * Check the flags. |
498 | */ |
499 | |
500 | err = -EOPNOTSUPP; |
501 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ |
502 | goto out; /* compatibility */ |
503 | |
504 | /* |
505 | * Get and verify the address. |
506 | */ |
507 | |
508 | if (msg->msg_namelen) { |
509 | DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); |
510 | err = -EINVAL; |
511 | if (msg->msg_namelen < sizeof(*usin)) |
512 | goto out; |
513 | if (usin->sin_family != AF_INET) { |
514 | pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n" , |
515 | __func__, current->comm); |
516 | err = -EAFNOSUPPORT; |
517 | if (usin->sin_family) |
518 | goto out; |
519 | } |
520 | daddr = usin->sin_addr.s_addr; |
521 | /* ANK: I did not forget to get protocol from port field. |
522 | * I just do not know, who uses this weirdness. |
523 | * IP_HDRINCL is much more convenient. |
524 | */ |
525 | } else { |
526 | err = -EDESTADDRREQ; |
527 | if (sk->sk_state != TCP_ESTABLISHED) |
528 | goto out; |
529 | daddr = inet->inet_daddr; |
530 | } |
531 | |
532 | ipcm_init_sk(ipcm: &ipc, inet); |
533 | /* Keep backward compat */ |
534 | if (hdrincl) |
535 | ipc.protocol = IPPROTO_RAW; |
536 | |
537 | if (msg->msg_controllen) { |
538 | err = ip_cmsg_send(sk, msg, ipc: &ipc, allow_ipv6: false); |
539 | if (unlikely(err)) { |
540 | kfree(objp: ipc.opt); |
541 | goto out; |
542 | } |
543 | if (ipc.opt) |
544 | free = 1; |
545 | } |
546 | |
547 | saddr = ipc.addr; |
548 | ipc.addr = daddr; |
549 | |
550 | if (!ipc.opt) { |
551 | struct ip_options_rcu *inet_opt; |
552 | |
553 | rcu_read_lock(); |
554 | inet_opt = rcu_dereference(inet->inet_opt); |
555 | if (inet_opt) { |
556 | memcpy(&opt_copy, inet_opt, |
557 | sizeof(*inet_opt) + inet_opt->opt.optlen); |
558 | ipc.opt = &opt_copy.opt; |
559 | } |
560 | rcu_read_unlock(); |
561 | } |
562 | |
563 | if (ipc.opt) { |
564 | err = -EINVAL; |
565 | /* Linux does not mangle headers on raw sockets, |
566 | * so that IP options + IP_HDRINCL is non-sense. |
567 | */ |
568 | if (hdrincl) |
569 | goto done; |
570 | if (ipc.opt->opt.srr) { |
571 | if (!daddr) |
572 | goto done; |
573 | daddr = ipc.opt->opt.faddr; |
574 | } |
575 | } |
576 | tos = get_rttos(ipc: &ipc, inet); |
577 | scope = ip_sendmsg_scope(inet, ipc: &ipc, msg); |
578 | |
579 | uc_index = READ_ONCE(inet->uc_index); |
580 | if (ipv4_is_multicast(addr: daddr)) { |
581 | if (!ipc.oif || netif_index_is_l3_master(net: sock_net(sk), ifindex: ipc.oif)) |
582 | ipc.oif = READ_ONCE(inet->mc_index); |
583 | if (!saddr) |
584 | saddr = READ_ONCE(inet->mc_addr); |
585 | } else if (!ipc.oif) { |
586 | ipc.oif = uc_index; |
587 | } else if (ipv4_is_lbcast(addr: daddr) && uc_index) { |
588 | /* oif is set, packet is to local broadcast |
589 | * and uc_index is set. oif is most likely set |
590 | * by sk_bound_dev_if. If uc_index != oif check if the |
591 | * oif is an L3 master and uc_index is an L3 slave. |
592 | * If so, we want to allow the send using the uc_index. |
593 | */ |
594 | if (ipc.oif != uc_index && |
595 | ipc.oif == l3mdev_master_ifindex_by_index(net: sock_net(sk), |
596 | ifindex: uc_index)) { |
597 | ipc.oif = uc_index; |
598 | } |
599 | } |
600 | |
601 | flowi4_init_output(fl4: &fl4, oif: ipc.oif, mark: ipc.sockc.mark, tos, scope, |
602 | proto: hdrincl ? ipc.protocol : sk->sk_protocol, |
603 | flags: inet_sk_flowi_flags(sk) | |
604 | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), |
605 | daddr, saddr, dport: 0, sport: 0, uid: sk->sk_uid); |
606 | |
607 | if (!hdrincl) { |
608 | rfv.msg = msg; |
609 | rfv.hlen = 0; |
610 | |
611 | err = raw_probe_proto_opt(rfv: &rfv, fl4: &fl4); |
612 | if (err) |
613 | goto done; |
614 | } |
615 | |
616 | security_sk_classify_flow(sk, flic: flowi4_to_flowi_common(fl4: &fl4)); |
617 | rt = ip_route_output_flow(net, flp: &fl4, sk); |
618 | if (IS_ERR(ptr: rt)) { |
619 | err = PTR_ERR(ptr: rt); |
620 | rt = NULL; |
621 | goto done; |
622 | } |
623 | |
624 | err = -EACCES; |
625 | if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, flag: SOCK_BROADCAST)) |
626 | goto done; |
627 | |
628 | if (msg->msg_flags & MSG_CONFIRM) |
629 | goto do_confirm; |
630 | back_from_confirm: |
631 | |
632 | if (hdrincl) |
633 | err = raw_send_hdrinc(sk, fl4: &fl4, msg, length: len, |
634 | rtp: &rt, flags: msg->msg_flags, sockc: &ipc.sockc); |
635 | |
636 | else { |
637 | if (!ipc.addr) |
638 | ipc.addr = fl4.daddr; |
639 | lock_sock(sk); |
640 | err = ip_append_data(sk, fl4: &fl4, getfrag: raw_getfrag, |
641 | from: &rfv, len, protolen: 0, |
642 | ipc: &ipc, rt: &rt, flags: msg->msg_flags); |
643 | if (err) |
644 | ip_flush_pending_frames(sk); |
645 | else if (!(msg->msg_flags & MSG_MORE)) { |
646 | err = ip_push_pending_frames(sk, fl4: &fl4); |
647 | if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) |
648 | err = 0; |
649 | } |
650 | release_sock(sk); |
651 | } |
652 | done: |
653 | if (free) |
654 | kfree(objp: ipc.opt); |
655 | ip_rt_put(rt); |
656 | |
657 | out: |
658 | if (err < 0) |
659 | return err; |
660 | return len; |
661 | |
662 | do_confirm: |
663 | if (msg->msg_flags & MSG_PROBE) |
664 | dst_confirm_neigh(dst: &rt->dst, daddr: &fl4.daddr); |
665 | if (!(msg->msg_flags & MSG_PROBE) || len) |
666 | goto back_from_confirm; |
667 | err = 0; |
668 | goto done; |
669 | } |
670 | |
671 | static void raw_close(struct sock *sk, long timeout) |
672 | { |
673 | /* |
674 | * Raw sockets may have direct kernel references. Kill them. |
675 | */ |
676 | ip_ra_control(sk, on: 0, NULL); |
677 | |
678 | sk_common_release(sk); |
679 | } |
680 | |
681 | static void raw_destroy(struct sock *sk) |
682 | { |
683 | lock_sock(sk); |
684 | ip_flush_pending_frames(sk); |
685 | release_sock(sk); |
686 | } |
687 | |
688 | /* This gets rid of all the nasties in af_inet. -DaveM */ |
689 | static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
690 | { |
691 | struct inet_sock *inet = inet_sk(sk); |
692 | struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; |
693 | struct net *net = sock_net(sk); |
694 | u32 tb_id = RT_TABLE_LOCAL; |
695 | int ret = -EINVAL; |
696 | int chk_addr_ret; |
697 | |
698 | lock_sock(sk); |
699 | if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) |
700 | goto out; |
701 | |
702 | if (sk->sk_bound_dev_if) |
703 | tb_id = l3mdev_fib_table_by_index(net, |
704 | ifindex: sk->sk_bound_dev_if) ? : tb_id; |
705 | |
706 | chk_addr_ret = inet_addr_type_table(net, addr: addr->sin_addr.s_addr, tb_id); |
707 | |
708 | ret = -EADDRNOTAVAIL; |
709 | if (!inet_addr_valid_or_nonlocal(net, inet, addr: addr->sin_addr.s_addr, |
710 | addr_type: chk_addr_ret)) |
711 | goto out; |
712 | |
713 | inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; |
714 | if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) |
715 | inet->inet_saddr = 0; /* Use device */ |
716 | sk_dst_reset(sk); |
717 | ret = 0; |
718 | out: |
719 | release_sock(sk); |
720 | return ret; |
721 | } |
722 | |
723 | /* |
724 | * This should be easy, if there is something there |
725 | * we return it, otherwise we block. |
726 | */ |
727 | |
728 | static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, |
729 | int flags, int *addr_len) |
730 | { |
731 | struct inet_sock *inet = inet_sk(sk); |
732 | size_t copied = 0; |
733 | int err = -EOPNOTSUPP; |
734 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); |
735 | struct sk_buff *skb; |
736 | |
737 | if (flags & MSG_OOB) |
738 | goto out; |
739 | |
740 | if (flags & MSG_ERRQUEUE) { |
741 | err = ip_recv_error(sk, msg, len, addr_len); |
742 | goto out; |
743 | } |
744 | |
745 | skb = skb_recv_datagram(sk, flags, err: &err); |
746 | if (!skb) |
747 | goto out; |
748 | |
749 | copied = skb->len; |
750 | if (len < copied) { |
751 | msg->msg_flags |= MSG_TRUNC; |
752 | copied = len; |
753 | } |
754 | |
755 | err = skb_copy_datagram_msg(from: skb, offset: 0, msg, size: copied); |
756 | if (err) |
757 | goto done; |
758 | |
759 | sock_recv_cmsgs(msg, sk, skb); |
760 | |
761 | /* Copy the address. */ |
762 | if (sin) { |
763 | sin->sin_family = AF_INET; |
764 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
765 | sin->sin_port = 0; |
766 | memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); |
767 | *addr_len = sizeof(*sin); |
768 | } |
769 | if (inet_cmsg_flags(inet)) |
770 | ip_cmsg_recv(msg, skb); |
771 | if (flags & MSG_TRUNC) |
772 | copied = skb->len; |
773 | done: |
774 | skb_free_datagram(sk, skb); |
775 | out: |
776 | if (err) |
777 | return err; |
778 | return copied; |
779 | } |
780 | |
781 | static int raw_sk_init(struct sock *sk) |
782 | { |
783 | struct raw_sock *rp = raw_sk(sk); |
784 | |
785 | if (inet_sk(sk)->inet_num == IPPROTO_ICMP) |
786 | memset(&rp->filter, 0, sizeof(rp->filter)); |
787 | return 0; |
788 | } |
789 | |
790 | static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen) |
791 | { |
792 | if (optlen > sizeof(struct icmp_filter)) |
793 | optlen = sizeof(struct icmp_filter); |
794 | if (copy_from_sockptr(dst: &raw_sk(sk)->filter, src: optval, size: optlen)) |
795 | return -EFAULT; |
796 | return 0; |
797 | } |
798 | |
799 | static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *optlen) |
800 | { |
801 | int len, ret = -EFAULT; |
802 | |
803 | if (get_user(len, optlen)) |
804 | goto out; |
805 | ret = -EINVAL; |
806 | if (len < 0) |
807 | goto out; |
808 | if (len > sizeof(struct icmp_filter)) |
809 | len = sizeof(struct icmp_filter); |
810 | ret = -EFAULT; |
811 | if (put_user(len, optlen) || |
812 | copy_to_user(to: optval, from: &raw_sk(sk)->filter, n: len)) |
813 | goto out; |
814 | ret = 0; |
815 | out: return ret; |
816 | } |
817 | |
818 | static int do_raw_setsockopt(struct sock *sk, int level, int optname, |
819 | sockptr_t optval, unsigned int optlen) |
820 | { |
821 | if (optname == ICMP_FILTER) { |
822 | if (inet_sk(sk)->inet_num != IPPROTO_ICMP) |
823 | return -EOPNOTSUPP; |
824 | else |
825 | return raw_seticmpfilter(sk, optval, optlen); |
826 | } |
827 | return -ENOPROTOOPT; |
828 | } |
829 | |
830 | static int raw_setsockopt(struct sock *sk, int level, int optname, |
831 | sockptr_t optval, unsigned int optlen) |
832 | { |
833 | if (level != SOL_RAW) |
834 | return ip_setsockopt(sk, level, optname, optval, optlen); |
835 | return do_raw_setsockopt(sk, level, optname, optval, optlen); |
836 | } |
837 | |
838 | static int do_raw_getsockopt(struct sock *sk, int level, int optname, |
839 | char __user *optval, int __user *optlen) |
840 | { |
841 | if (optname == ICMP_FILTER) { |
842 | if (inet_sk(sk)->inet_num != IPPROTO_ICMP) |
843 | return -EOPNOTSUPP; |
844 | else |
845 | return raw_geticmpfilter(sk, optval, optlen); |
846 | } |
847 | return -ENOPROTOOPT; |
848 | } |
849 | |
850 | static int raw_getsockopt(struct sock *sk, int level, int optname, |
851 | char __user *optval, int __user *optlen) |
852 | { |
853 | if (level != SOL_RAW) |
854 | return ip_getsockopt(sk, level, optname, optval, optlen); |
855 | return do_raw_getsockopt(sk, level, optname, optval, optlen); |
856 | } |
857 | |
858 | static int raw_ioctl(struct sock *sk, int cmd, int *karg) |
859 | { |
860 | switch (cmd) { |
861 | case SIOCOUTQ: { |
862 | *karg = sk_wmem_alloc_get(sk); |
863 | return 0; |
864 | } |
865 | case SIOCINQ: { |
866 | struct sk_buff *skb; |
867 | |
868 | spin_lock_bh(lock: &sk->sk_receive_queue.lock); |
869 | skb = skb_peek(list_: &sk->sk_receive_queue); |
870 | if (skb) |
871 | *karg = skb->len; |
872 | else |
873 | *karg = 0; |
874 | spin_unlock_bh(lock: &sk->sk_receive_queue.lock); |
875 | return 0; |
876 | } |
877 | |
878 | default: |
879 | #ifdef CONFIG_IP_MROUTE |
880 | return ipmr_ioctl(sk, cmd, arg: karg); |
881 | #else |
882 | return -ENOIOCTLCMD; |
883 | #endif |
884 | } |
885 | } |
886 | |
887 | #ifdef CONFIG_COMPAT |
888 | static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) |
889 | { |
890 | switch (cmd) { |
891 | case SIOCOUTQ: |
892 | case SIOCINQ: |
893 | return -ENOIOCTLCMD; |
894 | default: |
895 | #ifdef CONFIG_IP_MROUTE |
896 | return ipmr_compat_ioctl(sk, cmd, arg: compat_ptr(uptr: arg)); |
897 | #else |
898 | return -ENOIOCTLCMD; |
899 | #endif |
900 | } |
901 | } |
902 | #endif |
903 | |
904 | int raw_abort(struct sock *sk, int err) |
905 | { |
906 | lock_sock(sk); |
907 | |
908 | sk->sk_err = err; |
909 | sk_error_report(sk); |
910 | __udp_disconnect(sk, flags: 0); |
911 | |
912 | release_sock(sk); |
913 | |
914 | return 0; |
915 | } |
916 | EXPORT_SYMBOL_GPL(raw_abort); |
917 | |
918 | struct proto raw_prot = { |
919 | .name = "RAW" , |
920 | .owner = THIS_MODULE, |
921 | .close = raw_close, |
922 | .destroy = raw_destroy, |
923 | .connect = ip4_datagram_connect, |
924 | .disconnect = __udp_disconnect, |
925 | .ioctl = raw_ioctl, |
926 | .init = raw_sk_init, |
927 | .setsockopt = raw_setsockopt, |
928 | .getsockopt = raw_getsockopt, |
929 | .sendmsg = raw_sendmsg, |
930 | .recvmsg = raw_recvmsg, |
931 | .bind = raw_bind, |
932 | .backlog_rcv = raw_rcv_skb, |
933 | .release_cb = ip4_datagram_release_cb, |
934 | .hash = raw_hash_sk, |
935 | .unhash = raw_unhash_sk, |
936 | .obj_size = sizeof(struct raw_sock), |
937 | .useroffset = offsetof(struct raw_sock, filter), |
938 | .usersize = sizeof_field(struct raw_sock, filter), |
939 | .h.raw_hash = &raw_v4_hashinfo, |
940 | #ifdef CONFIG_COMPAT |
941 | .compat_ioctl = compat_raw_ioctl, |
942 | #endif |
943 | .diag_destroy = raw_abort, |
944 | }; |
945 | |
946 | #ifdef CONFIG_PROC_FS |
947 | static struct sock *raw_get_first(struct seq_file *seq, int bucket) |
948 | { |
949 | struct raw_hashinfo *h = pde_data(inode: file_inode(f: seq->file)); |
950 | struct raw_iter_state *state = raw_seq_private(seq); |
951 | struct hlist_head *hlist; |
952 | struct sock *sk; |
953 | |
954 | for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; |
955 | ++state->bucket) { |
956 | hlist = &h->ht[state->bucket]; |
957 | sk_for_each(sk, hlist) { |
958 | if (sock_net(sk) == seq_file_net(seq)) |
959 | return sk; |
960 | } |
961 | } |
962 | return NULL; |
963 | } |
964 | |
965 | static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) |
966 | { |
967 | struct raw_iter_state *state = raw_seq_private(seq); |
968 | |
969 | do { |
970 | sk = sk_next(sk); |
971 | } while (sk && sock_net(sk) != seq_file_net(seq)); |
972 | |
973 | if (!sk) |
974 | return raw_get_first(seq, bucket: state->bucket + 1); |
975 | return sk; |
976 | } |
977 | |
978 | static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) |
979 | { |
980 | struct sock *sk = raw_get_first(seq, bucket: 0); |
981 | |
982 | if (sk) |
983 | while (pos && (sk = raw_get_next(seq, sk)) != NULL) |
984 | --pos; |
985 | return pos ? NULL : sk; |
986 | } |
987 | |
988 | void *raw_seq_start(struct seq_file *seq, loff_t *pos) |
989 | __acquires(&h->lock) |
990 | { |
991 | struct raw_hashinfo *h = pde_data(inode: file_inode(f: seq->file)); |
992 | |
993 | spin_lock(lock: &h->lock); |
994 | |
995 | return *pos ? raw_get_idx(seq, pos: *pos - 1) : SEQ_START_TOKEN; |
996 | } |
997 | EXPORT_SYMBOL_GPL(raw_seq_start); |
998 | |
999 | void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
1000 | { |
1001 | struct sock *sk; |
1002 | |
1003 | if (v == SEQ_START_TOKEN) |
1004 | sk = raw_get_first(seq, bucket: 0); |
1005 | else |
1006 | sk = raw_get_next(seq, sk: v); |
1007 | ++*pos; |
1008 | return sk; |
1009 | } |
1010 | EXPORT_SYMBOL_GPL(raw_seq_next); |
1011 | |
1012 | void raw_seq_stop(struct seq_file *seq, void *v) |
1013 | __releases(&h->lock) |
1014 | { |
1015 | struct raw_hashinfo *h = pde_data(inode: file_inode(f: seq->file)); |
1016 | |
1017 | spin_unlock(lock: &h->lock); |
1018 | } |
1019 | EXPORT_SYMBOL_GPL(raw_seq_stop); |
1020 | |
1021 | static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) |
1022 | { |
1023 | struct inet_sock *inet = inet_sk(sp); |
1024 | __be32 dest = inet->inet_daddr, |
1025 | src = inet->inet_rcv_saddr; |
1026 | __u16 destp = 0, |
1027 | srcp = inet->inet_num; |
1028 | |
1029 | seq_printf(m: seq, fmt: "%4d: %08X:%04X %08X:%04X" |
1030 | " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n" , |
1031 | i, src, srcp, dest, destp, sp->sk_state, |
1032 | sk_wmem_alloc_get(sk: sp), |
1033 | sk_rmem_alloc_get(sk: sp), |
1034 | 0, 0L, 0, |
1035 | from_kuid_munged(to: seq_user_ns(seq), uid: sock_i_uid(sk: sp)), |
1036 | 0, sock_i_ino(sk: sp), |
1037 | refcount_read(r: &sp->sk_refcnt), sp, atomic_read(v: &sp->sk_drops)); |
1038 | } |
1039 | |
1040 | static int raw_seq_show(struct seq_file *seq, void *v) |
1041 | { |
1042 | if (v == SEQ_START_TOKEN) |
1043 | seq_printf(m: seq, fmt: " sl local_address rem_address st tx_queue " |
1044 | "rx_queue tr tm->when retrnsmt uid timeout " |
1045 | "inode ref pointer drops\n" ); |
1046 | else |
1047 | raw_sock_seq_show(seq, sp: v, i: raw_seq_private(seq)->bucket); |
1048 | return 0; |
1049 | } |
1050 | |
1051 | static const struct seq_operations raw_seq_ops = { |
1052 | .start = raw_seq_start, |
1053 | .next = raw_seq_next, |
1054 | .stop = raw_seq_stop, |
1055 | .show = raw_seq_show, |
1056 | }; |
1057 | |
1058 | static __net_init int raw_init_net(struct net *net) |
1059 | { |
1060 | if (!proc_create_net_data(name: "raw" , mode: 0444, parent: net->proc_net, ops: &raw_seq_ops, |
1061 | state_size: sizeof(struct raw_iter_state), data: &raw_v4_hashinfo)) |
1062 | return -ENOMEM; |
1063 | |
1064 | return 0; |
1065 | } |
1066 | |
1067 | static __net_exit void raw_exit_net(struct net *net) |
1068 | { |
1069 | remove_proc_entry("raw" , net->proc_net); |
1070 | } |
1071 | |
1072 | static __net_initdata struct pernet_operations raw_net_ops = { |
1073 | .init = raw_init_net, |
1074 | .exit = raw_exit_net, |
1075 | }; |
1076 | |
1077 | int __init raw_proc_init(void) |
1078 | { |
1079 | |
1080 | return register_pernet_subsys(&raw_net_ops); |
1081 | } |
1082 | |
1083 | void __init raw_proc_exit(void) |
1084 | { |
1085 | unregister_pernet_subsys(&raw_net_ops); |
1086 | } |
1087 | #endif /* CONFIG_PROC_FS */ |
1088 | |
1089 | static void raw_sysctl_init_net(struct net *net) |
1090 | { |
1091 | #ifdef CONFIG_NET_L3_MASTER_DEV |
1092 | net->ipv4.sysctl_raw_l3mdev_accept = 1; |
1093 | #endif |
1094 | } |
1095 | |
1096 | static int __net_init raw_sysctl_init(struct net *net) |
1097 | { |
1098 | raw_sysctl_init_net(net); |
1099 | return 0; |
1100 | } |
1101 | |
1102 | static struct pernet_operations __net_initdata raw_sysctl_ops = { |
1103 | .init = raw_sysctl_init, |
1104 | }; |
1105 | |
1106 | void __init raw_init(void) |
1107 | { |
1108 | raw_sysctl_init_net(net: &init_net); |
1109 | if (register_pernet_subsys(&raw_sysctl_ops)) |
1110 | panic(fmt: "RAW: failed to init sysctl parameters.\n" ); |
1111 | } |
1112 | |