1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP |
3 | * |
4 | * Copyright (c) 2021, Red Hat. |
5 | */ |
6 | |
7 | #define pr_fmt(fmt) "MPTCP: " fmt |
8 | |
9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> |
11 | #include <net/sock.h> |
12 | #include <net/protocol.h> |
13 | #include <net/tcp.h> |
14 | #include <net/mptcp.h> |
15 | #include "protocol.h" |
16 | |
17 | #define MIN_INFO_OPTLEN_SIZE 16 |
18 | #define MIN_FULL_INFO_OPTLEN_SIZE 40 |
19 | |
20 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
21 | { |
22 | msk_owned_by_me(msk); |
23 | |
24 | if (likely(!__mptcp_check_fallback(msk))) |
25 | return NULL; |
26 | |
27 | return msk->first; |
28 | } |
29 | |
30 | static u32 sockopt_seq_reset(const struct sock *sk) |
31 | { |
32 | sock_owned_by_me(sk); |
33 | |
34 | /* Highbits contain state. Allows to distinguish sockopt_seq |
35 | * of listener and established: |
36 | * s0 = new_listener() |
37 | * sockopt(s0) - seq is 1 |
38 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) |
39 | * sockopt(s0) - seq increments to 2 on s0 |
40 | * sockopt(s1) // seq increments to 2 on s1 (different option) |
41 | * new ssk completes join, inherits options from s0 // seq 2 |
42 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq |
43 | * |
44 | * Set High order bits to sk_state so ssk->seq == msk->seq test |
45 | * will fail. |
46 | */ |
47 | |
48 | return (u32)sk->sk_state << 24u; |
49 | } |
50 | |
51 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
52 | { |
53 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; |
54 | |
55 | msk->setsockopt_seq = sockopt_seq_reset(sk: (struct sock *)msk) + seq; |
56 | } |
57 | |
58 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, |
59 | unsigned int optlen, int *val) |
60 | { |
61 | if (optlen < sizeof(int)) |
62 | return -EINVAL; |
63 | |
64 | if (copy_from_sockptr(dst: val, src: optval, size: sizeof(*val))) |
65 | return -EFAULT; |
66 | |
67 | return 0; |
68 | } |
69 | |
70 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) |
71 | { |
72 | struct mptcp_subflow_context *subflow; |
73 | struct sock *sk = (struct sock *)msk; |
74 | |
75 | lock_sock(sk); |
76 | sockopt_seq_inc(msk); |
77 | |
78 | mptcp_for_each_subflow(msk, subflow) { |
79 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
80 | bool slow = lock_sock_fast(sk: ssk); |
81 | |
82 | switch (optname) { |
83 | case SO_DEBUG: |
84 | sock_valbool_flag(sk: ssk, bit: SOCK_DBG, valbool: !!val); |
85 | break; |
86 | case SO_KEEPALIVE: |
87 | if (ssk->sk_prot->keepalive) |
88 | ssk->sk_prot->keepalive(ssk, !!val); |
89 | sock_valbool_flag(sk: ssk, bit: SOCK_KEEPOPEN, valbool: !!val); |
90 | break; |
91 | case SO_PRIORITY: |
92 | WRITE_ONCE(ssk->sk_priority, val); |
93 | break; |
94 | case SO_SNDBUF: |
95 | case SO_SNDBUFFORCE: |
96 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
97 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
98 | mptcp_subflow_ctx(sk: ssk)->cached_sndbuf = sk->sk_sndbuf; |
99 | break; |
100 | case SO_RCVBUF: |
101 | case SO_RCVBUFFORCE: |
102 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
103 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); |
104 | break; |
105 | case SO_MARK: |
106 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { |
107 | WRITE_ONCE(ssk->sk_mark, sk->sk_mark); |
108 | sk_dst_reset(sk: ssk); |
109 | } |
110 | break; |
111 | case SO_INCOMING_CPU: |
112 | WRITE_ONCE(ssk->sk_incoming_cpu, val); |
113 | break; |
114 | } |
115 | |
116 | subflow->setsockopt_seq = msk->setsockopt_seq; |
117 | unlock_sock_fast(sk: ssk, slow); |
118 | } |
119 | |
120 | release_sock(sk); |
121 | } |
122 | |
123 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) |
124 | { |
125 | sockptr_t optval = KERNEL_SOCKPTR(p: &val); |
126 | struct sock *sk = (struct sock *)msk; |
127 | int ret; |
128 | |
129 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, |
130 | optval, optlen: sizeof(val)); |
131 | if (ret) |
132 | return ret; |
133 | |
134 | mptcp_sol_socket_sync_intval(msk, optname, val); |
135 | return 0; |
136 | } |
137 | |
138 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
139 | { |
140 | struct sock *sk = (struct sock *)msk; |
141 | |
142 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
143 | |
144 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); |
145 | } |
146 | |
147 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
148 | { |
149 | sockptr_t optval = KERNEL_SOCKPTR(p: &val); |
150 | struct mptcp_subflow_context *subflow; |
151 | struct sock *sk = (struct sock *)msk; |
152 | int ret; |
153 | |
154 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, |
155 | optval, optlen: sizeof(val)); |
156 | if (ret) |
157 | return ret; |
158 | |
159 | lock_sock(sk); |
160 | mptcp_for_each_subflow(msk, subflow) { |
161 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
162 | bool slow = lock_sock_fast(sk: ssk); |
163 | |
164 | sock_set_timestamp(sk, optname, valbool: !!val); |
165 | unlock_sock_fast(sk: ssk, slow); |
166 | } |
167 | |
168 | release_sock(sk); |
169 | return 0; |
170 | } |
171 | |
172 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
173 | sockptr_t optval, |
174 | unsigned int optlen) |
175 | { |
176 | int val, ret; |
177 | |
178 | ret = mptcp_get_int_option(msk, optval, optlen, val: &val); |
179 | if (ret) |
180 | return ret; |
181 | |
182 | switch (optname) { |
183 | case SO_KEEPALIVE: |
184 | mptcp_sol_socket_sync_intval(msk, optname, val); |
185 | return 0; |
186 | case SO_DEBUG: |
187 | case SO_MARK: |
188 | case SO_PRIORITY: |
189 | case SO_SNDBUF: |
190 | case SO_SNDBUFFORCE: |
191 | case SO_RCVBUF: |
192 | case SO_RCVBUFFORCE: |
193 | return mptcp_sol_socket_intval(msk, optname, val); |
194 | case SO_INCOMING_CPU: |
195 | mptcp_so_incoming_cpu(msk, val); |
196 | return 0; |
197 | case SO_TIMESTAMP_OLD: |
198 | case SO_TIMESTAMP_NEW: |
199 | case SO_TIMESTAMPNS_OLD: |
200 | case SO_TIMESTAMPNS_NEW: |
201 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
202 | } |
203 | |
204 | return -ENOPROTOOPT; |
205 | } |
206 | |
207 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, |
208 | int optname, |
209 | sockptr_t optval, |
210 | unsigned int optlen) |
211 | { |
212 | struct mptcp_subflow_context *subflow; |
213 | struct sock *sk = (struct sock *)msk; |
214 | struct so_timestamping timestamping; |
215 | int ret; |
216 | |
217 | if (optlen == sizeof(timestamping)) { |
218 | if (copy_from_sockptr(dst: ×tamping, src: optval, |
219 | size: sizeof(timestamping))) |
220 | return -EFAULT; |
221 | } else if (optlen == sizeof(int)) { |
222 | memset(×tamping, 0, sizeof(timestamping)); |
223 | |
224 | if (copy_from_sockptr(dst: ×tamping.flags, src: optval, size: sizeof(int))) |
225 | return -EFAULT; |
226 | } else { |
227 | return -EINVAL; |
228 | } |
229 | |
230 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, |
231 | optval: KERNEL_SOCKPTR(p: ×tamping), |
232 | optlen: sizeof(timestamping)); |
233 | if (ret) |
234 | return ret; |
235 | |
236 | lock_sock(sk); |
237 | |
238 | mptcp_for_each_subflow(msk, subflow) { |
239 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
240 | bool slow = lock_sock_fast(sk: ssk); |
241 | |
242 | sock_set_timestamping(sk, optname, timestamping); |
243 | unlock_sock_fast(sk: ssk, slow); |
244 | } |
245 | |
246 | release_sock(sk); |
247 | |
248 | return 0; |
249 | } |
250 | |
251 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
252 | unsigned int optlen) |
253 | { |
254 | struct mptcp_subflow_context *subflow; |
255 | struct sock *sk = (struct sock *)msk; |
256 | struct linger ling; |
257 | sockptr_t kopt; |
258 | int ret; |
259 | |
260 | if (optlen < sizeof(ling)) |
261 | return -EINVAL; |
262 | |
263 | if (copy_from_sockptr(dst: &ling, src: optval, size: sizeof(ling))) |
264 | return -EFAULT; |
265 | |
266 | kopt = KERNEL_SOCKPTR(p: &ling); |
267 | ret = sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, SO_LINGER, optval: kopt, optlen: sizeof(ling)); |
268 | if (ret) |
269 | return ret; |
270 | |
271 | lock_sock(sk); |
272 | sockopt_seq_inc(msk); |
273 | mptcp_for_each_subflow(msk, subflow) { |
274 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
275 | bool slow = lock_sock_fast(sk: ssk); |
276 | |
277 | if (!ling.l_onoff) { |
278 | sock_reset_flag(sk: ssk, flag: SOCK_LINGER); |
279 | } else { |
280 | ssk->sk_lingertime = sk->sk_lingertime; |
281 | sock_set_flag(sk: ssk, flag: SOCK_LINGER); |
282 | } |
283 | |
284 | subflow->setsockopt_seq = msk->setsockopt_seq; |
285 | unlock_sock_fast(sk: ssk, slow); |
286 | } |
287 | |
288 | release_sock(sk); |
289 | return 0; |
290 | } |
291 | |
292 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
293 | sockptr_t optval, unsigned int optlen) |
294 | { |
295 | struct sock *sk = (struct sock *)msk; |
296 | struct sock *ssk; |
297 | int ret; |
298 | |
299 | switch (optname) { |
300 | case SO_REUSEPORT: |
301 | case SO_REUSEADDR: |
302 | case SO_BINDTODEVICE: |
303 | case SO_BINDTOIFINDEX: |
304 | lock_sock(sk); |
305 | ssk = __mptcp_nmpc_sk(msk); |
306 | if (IS_ERR(ptr: ssk)) { |
307 | release_sock(sk); |
308 | return PTR_ERR(ptr: ssk); |
309 | } |
310 | |
311 | ret = sk_setsockopt(sk: ssk, SOL_SOCKET, optname, optval, optlen); |
312 | if (ret == 0) { |
313 | if (optname == SO_REUSEPORT) |
314 | sk->sk_reuseport = ssk->sk_reuseport; |
315 | else if (optname == SO_REUSEADDR) |
316 | sk->sk_reuse = ssk->sk_reuse; |
317 | else if (optname == SO_BINDTODEVICE) |
318 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
319 | else if (optname == SO_BINDTOIFINDEX) |
320 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
321 | } |
322 | release_sock(sk); |
323 | return ret; |
324 | case SO_KEEPALIVE: |
325 | case SO_PRIORITY: |
326 | case SO_SNDBUF: |
327 | case SO_SNDBUFFORCE: |
328 | case SO_RCVBUF: |
329 | case SO_RCVBUFFORCE: |
330 | case SO_MARK: |
331 | case SO_INCOMING_CPU: |
332 | case SO_DEBUG: |
333 | case SO_TIMESTAMP_OLD: |
334 | case SO_TIMESTAMP_NEW: |
335 | case SO_TIMESTAMPNS_OLD: |
336 | case SO_TIMESTAMPNS_NEW: |
337 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, |
338 | optlen); |
339 | case SO_TIMESTAMPING_OLD: |
340 | case SO_TIMESTAMPING_NEW: |
341 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, |
342 | optval, optlen); |
343 | case SO_LINGER: |
344 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); |
345 | case SO_RCVLOWAT: |
346 | case SO_RCVTIMEO_OLD: |
347 | case SO_RCVTIMEO_NEW: |
348 | case SO_SNDTIMEO_OLD: |
349 | case SO_SNDTIMEO_NEW: |
350 | case SO_BUSY_POLL: |
351 | case SO_PREFER_BUSY_POLL: |
352 | case SO_BUSY_POLL_BUDGET: |
353 | /* No need to copy: only relevant for msk */ |
354 | return sock_setsockopt(sock: sk->sk_socket, SOL_SOCKET, op: optname, optval, optlen); |
355 | case SO_NO_CHECK: |
356 | case SO_DONTROUTE: |
357 | case SO_BROADCAST: |
358 | case SO_BSDCOMPAT: |
359 | case SO_PASSCRED: |
360 | case SO_PASSPIDFD: |
361 | case SO_PASSSEC: |
362 | case SO_RXQ_OVFL: |
363 | case SO_WIFI_STATUS: |
364 | case SO_NOFCS: |
365 | case SO_SELECT_ERR_QUEUE: |
366 | return 0; |
367 | } |
368 | |
369 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
370 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, |
371 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, |
372 | * we must be careful with subflows |
373 | * |
374 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks |
375 | * explicitly the sk_protocol field |
376 | * |
377 | * SO_PEEK_OFF is unsupported, as it is for plain TCP |
378 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows |
379 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, |
380 | * but likely needs careful design |
381 | * |
382 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg |
383 | * SO_TXTIME is currently unsupported |
384 | */ |
385 | |
386 | return -EOPNOTSUPP; |
387 | } |
388 | |
389 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, |
390 | sockptr_t optval, unsigned int optlen) |
391 | { |
392 | struct sock *sk = (struct sock *)msk; |
393 | int ret = -EOPNOTSUPP; |
394 | struct sock *ssk; |
395 | |
396 | switch (optname) { |
397 | case IPV6_V6ONLY: |
398 | case IPV6_TRANSPARENT: |
399 | case IPV6_FREEBIND: |
400 | lock_sock(sk); |
401 | ssk = __mptcp_nmpc_sk(msk); |
402 | if (IS_ERR(ptr: ssk)) { |
403 | release_sock(sk); |
404 | return PTR_ERR(ptr: ssk); |
405 | } |
406 | |
407 | ret = tcp_setsockopt(sk: ssk, SOL_IPV6, optname, optval, optlen); |
408 | if (ret != 0) { |
409 | release_sock(sk); |
410 | return ret; |
411 | } |
412 | |
413 | sockopt_seq_inc(msk); |
414 | |
415 | switch (optname) { |
416 | case IPV6_V6ONLY: |
417 | sk->sk_ipv6only = ssk->sk_ipv6only; |
418 | break; |
419 | case IPV6_TRANSPARENT: |
420 | inet_assign_bit(TRANSPARENT, sk, |
421 | inet_test_bit(TRANSPARENT, ssk)); |
422 | break; |
423 | case IPV6_FREEBIND: |
424 | inet_assign_bit(FREEBIND, sk, |
425 | inet_test_bit(FREEBIND, ssk)); |
426 | break; |
427 | } |
428 | |
429 | release_sock(sk); |
430 | break; |
431 | } |
432 | |
433 | return ret; |
434 | } |
435 | |
436 | static bool mptcp_supported_sockopt(int level, int optname) |
437 | { |
438 | if (level == SOL_IP) { |
439 | switch (optname) { |
440 | /* should work fine */ |
441 | case IP_FREEBIND: |
442 | case IP_TRANSPARENT: |
443 | case IP_BIND_ADDRESS_NO_PORT: |
444 | case IP_LOCAL_PORT_RANGE: |
445 | |
446 | /* the following are control cmsg related */ |
447 | case IP_PKTINFO: |
448 | case IP_RECVTTL: |
449 | case IP_RECVTOS: |
450 | case IP_RECVOPTS: |
451 | case IP_RETOPTS: |
452 | case IP_PASSSEC: |
453 | case IP_RECVORIGDSTADDR: |
454 | case IP_CHECKSUM: |
455 | case IP_RECVFRAGSIZE: |
456 | |
457 | /* common stuff that need some love */ |
458 | case IP_TOS: |
459 | case IP_TTL: |
460 | case IP_MTU_DISCOVER: |
461 | case IP_RECVERR: |
462 | |
463 | /* possibly less common may deserve some love */ |
464 | case IP_MINTTL: |
465 | |
466 | /* the following is apparently a no-op for plain TCP */ |
467 | case IP_RECVERR_RFC4884: |
468 | return true; |
469 | } |
470 | |
471 | /* IP_OPTIONS is not supported, needs subflow care */ |
472 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ |
473 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, |
474 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, |
475 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, |
476 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, |
477 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, |
478 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal |
479 | * with mcast stuff |
480 | */ |
481 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ |
482 | return false; |
483 | } |
484 | if (level == SOL_IPV6) { |
485 | switch (optname) { |
486 | case IPV6_V6ONLY: |
487 | |
488 | /* the following are control cmsg related */ |
489 | case IPV6_RECVPKTINFO: |
490 | case IPV6_2292PKTINFO: |
491 | case IPV6_RECVHOPLIMIT: |
492 | case IPV6_2292HOPLIMIT: |
493 | case IPV6_RECVRTHDR: |
494 | case IPV6_2292RTHDR: |
495 | case IPV6_RECVHOPOPTS: |
496 | case IPV6_2292HOPOPTS: |
497 | case IPV6_RECVDSTOPTS: |
498 | case IPV6_2292DSTOPTS: |
499 | case IPV6_RECVTCLASS: |
500 | case IPV6_FLOWINFO: |
501 | case IPV6_RECVPATHMTU: |
502 | case IPV6_RECVORIGDSTADDR: |
503 | case IPV6_RECVFRAGSIZE: |
504 | |
505 | /* the following ones need some love but are quite common */ |
506 | case IPV6_TCLASS: |
507 | case IPV6_TRANSPARENT: |
508 | case IPV6_FREEBIND: |
509 | case IPV6_PKTINFO: |
510 | case IPV6_2292PKTOPTIONS: |
511 | case IPV6_UNICAST_HOPS: |
512 | case IPV6_MTU_DISCOVER: |
513 | case IPV6_MTU: |
514 | case IPV6_RECVERR: |
515 | case IPV6_FLOWINFO_SEND: |
516 | case IPV6_FLOWLABEL_MGR: |
517 | case IPV6_MINHOPCOUNT: |
518 | case IPV6_DONTFRAG: |
519 | case IPV6_AUTOFLOWLABEL: |
520 | |
521 | /* the following one is a no-op for plain TCP */ |
522 | case IPV6_RECVERR_RFC4884: |
523 | return true; |
524 | } |
525 | |
526 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are |
527 | * not supported |
528 | */ |
529 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, |
530 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, |
531 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, |
532 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, |
533 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, |
534 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER |
535 | * are not supported better not deal with mcast |
536 | */ |
537 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ |
538 | |
539 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ |
540 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ |
541 | return false; |
542 | } |
543 | if (level == SOL_TCP) { |
544 | switch (optname) { |
545 | /* the following are no-op or should work just fine */ |
546 | case TCP_THIN_DUPACK: |
547 | case TCP_DEFER_ACCEPT: |
548 | |
549 | /* the following need some love */ |
550 | case TCP_MAXSEG: |
551 | case TCP_NODELAY: |
552 | case TCP_THIN_LINEAR_TIMEOUTS: |
553 | case TCP_CONGESTION: |
554 | case TCP_CORK: |
555 | case TCP_KEEPIDLE: |
556 | case TCP_KEEPINTVL: |
557 | case TCP_KEEPCNT: |
558 | case TCP_SYNCNT: |
559 | case TCP_SAVE_SYN: |
560 | case TCP_LINGER2: |
561 | case TCP_WINDOW_CLAMP: |
562 | case TCP_QUICKACK: |
563 | case TCP_USER_TIMEOUT: |
564 | case TCP_TIMESTAMP: |
565 | case TCP_NOTSENT_LOWAT: |
566 | case TCP_TX_DELAY: |
567 | case TCP_INQ: |
568 | case TCP_FASTOPEN: |
569 | case TCP_FASTOPEN_CONNECT: |
570 | case TCP_FASTOPEN_KEY: |
571 | case TCP_FASTOPEN_NO_COOKIE: |
572 | return true; |
573 | } |
574 | |
575 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ |
576 | |
577 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, |
578 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess |
579 | */ |
580 | } |
581 | return false; |
582 | } |
583 | |
584 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
585 | unsigned int optlen) |
586 | { |
587 | struct mptcp_subflow_context *subflow; |
588 | struct sock *sk = (struct sock *)msk; |
589 | char name[TCP_CA_NAME_MAX]; |
590 | bool cap_net_admin; |
591 | int ret; |
592 | |
593 | if (optlen < 1) |
594 | return -EINVAL; |
595 | |
596 | ret = strncpy_from_sockptr(dst: name, src: optval, |
597 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); |
598 | if (ret < 0) |
599 | return -EFAULT; |
600 | |
601 | name[ret] = 0; |
602 | |
603 | cap_net_admin = ns_capable(ns: sock_net(sk)->user_ns, CAP_NET_ADMIN); |
604 | |
605 | ret = 0; |
606 | lock_sock(sk); |
607 | sockopt_seq_inc(msk); |
608 | mptcp_for_each_subflow(msk, subflow) { |
609 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
610 | int err; |
611 | |
612 | lock_sock(sk: ssk); |
613 | err = tcp_set_congestion_control(sk: ssk, name, load: true, cap_net_admin); |
614 | if (err < 0 && ret == 0) |
615 | ret = err; |
616 | subflow->setsockopt_seq = msk->setsockopt_seq; |
617 | release_sock(sk: ssk); |
618 | } |
619 | |
620 | if (ret == 0) |
621 | strcpy(p: msk->ca_name, q: name); |
622 | |
623 | release_sock(sk); |
624 | return ret; |
625 | } |
626 | |
627 | static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) |
628 | { |
629 | struct mptcp_subflow_context *subflow; |
630 | struct sock *sk = (struct sock *)msk; |
631 | |
632 | sockopt_seq_inc(msk); |
633 | msk->cork = !!val; |
634 | mptcp_for_each_subflow(msk, subflow) { |
635 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
636 | |
637 | lock_sock(sk: ssk); |
638 | __tcp_sock_set_cork(sk: ssk, on: !!val); |
639 | release_sock(sk: ssk); |
640 | } |
641 | if (!val) |
642 | mptcp_check_and_set_pending(sk); |
643 | |
644 | return 0; |
645 | } |
646 | |
647 | static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) |
648 | { |
649 | struct mptcp_subflow_context *subflow; |
650 | struct sock *sk = (struct sock *)msk; |
651 | |
652 | sockopt_seq_inc(msk); |
653 | msk->nodelay = !!val; |
654 | mptcp_for_each_subflow(msk, subflow) { |
655 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
656 | |
657 | lock_sock(sk: ssk); |
658 | __tcp_sock_set_nodelay(sk: ssk, on: !!val); |
659 | release_sock(sk: ssk); |
660 | } |
661 | if (val) |
662 | mptcp_check_and_set_pending(sk); |
663 | return 0; |
664 | } |
665 | |
666 | static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, |
667 | sockptr_t optval, unsigned int optlen) |
668 | { |
669 | struct sock *sk = (struct sock *)msk; |
670 | struct sock *ssk; |
671 | int err; |
672 | |
673 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); |
674 | if (err != 0) |
675 | return err; |
676 | |
677 | lock_sock(sk); |
678 | |
679 | ssk = __mptcp_nmpc_sk(msk); |
680 | if (IS_ERR(ptr: ssk)) { |
681 | release_sock(sk); |
682 | return PTR_ERR(ptr: ssk); |
683 | } |
684 | |
685 | switch (optname) { |
686 | case IP_FREEBIND: |
687 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
688 | break; |
689 | case IP_TRANSPARENT: |
690 | inet_assign_bit(TRANSPARENT, ssk, |
691 | inet_test_bit(TRANSPARENT, sk)); |
692 | break; |
693 | case IP_BIND_ADDRESS_NO_PORT: |
694 | inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, |
695 | inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); |
696 | break; |
697 | case IP_LOCAL_PORT_RANGE: |
698 | WRITE_ONCE(inet_sk(ssk)->local_port_range, |
699 | READ_ONCE(inet_sk(sk)->local_port_range)); |
700 | break; |
701 | default: |
702 | release_sock(sk); |
703 | WARN_ON_ONCE(1); |
704 | return -EOPNOTSUPP; |
705 | } |
706 | |
707 | sockopt_seq_inc(msk); |
708 | release_sock(sk); |
709 | return 0; |
710 | } |
711 | |
712 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, |
713 | sockptr_t optval, unsigned int optlen) |
714 | { |
715 | struct mptcp_subflow_context *subflow; |
716 | struct sock *sk = (struct sock *)msk; |
717 | int err, val; |
718 | |
719 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); |
720 | |
721 | if (err != 0) |
722 | return err; |
723 | |
724 | lock_sock(sk); |
725 | sockopt_seq_inc(msk); |
726 | val = READ_ONCE(inet_sk(sk)->tos); |
727 | mptcp_for_each_subflow(msk, subflow) { |
728 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
729 | bool slow; |
730 | |
731 | slow = lock_sock_fast(sk: ssk); |
732 | __ip_sock_set_tos(sk: ssk, val); |
733 | unlock_sock_fast(sk: ssk, slow); |
734 | } |
735 | release_sock(sk); |
736 | |
737 | return 0; |
738 | } |
739 | |
740 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, |
741 | sockptr_t optval, unsigned int optlen) |
742 | { |
743 | switch (optname) { |
744 | case IP_FREEBIND: |
745 | case IP_TRANSPARENT: |
746 | case IP_BIND_ADDRESS_NO_PORT: |
747 | case IP_LOCAL_PORT_RANGE: |
748 | return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); |
749 | case IP_TOS: |
750 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); |
751 | } |
752 | |
753 | return -EOPNOTSUPP; |
754 | } |
755 | |
756 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
757 | sockptr_t optval, unsigned int optlen) |
758 | { |
759 | struct sock *sk = (struct sock *)msk; |
760 | struct sock *ssk; |
761 | int ret; |
762 | |
763 | /* Limit to first subflow, before the connection establishment */ |
764 | lock_sock(sk); |
765 | ssk = __mptcp_nmpc_sk(msk); |
766 | if (IS_ERR(ptr: ssk)) { |
767 | ret = PTR_ERR(ptr: ssk); |
768 | goto unlock; |
769 | } |
770 | |
771 | ret = tcp_setsockopt(sk: ssk, level, optname, optval, optlen); |
772 | |
773 | unlock: |
774 | release_sock(sk); |
775 | return ret; |
776 | } |
777 | |
778 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
779 | sockptr_t optval, unsigned int optlen) |
780 | { |
781 | struct sock *sk = (void *)msk; |
782 | int ret, val; |
783 | |
784 | switch (optname) { |
785 | case TCP_ULP: |
786 | return -EOPNOTSUPP; |
787 | case TCP_CONGESTION: |
788 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); |
789 | case TCP_DEFER_ACCEPT: |
790 | /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ |
791 | mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); |
792 | return 0; |
793 | case TCP_FASTOPEN: |
794 | case TCP_FASTOPEN_CONNECT: |
795 | case TCP_FASTOPEN_KEY: |
796 | case TCP_FASTOPEN_NO_COOKIE: |
797 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, |
798 | optval, optlen); |
799 | } |
800 | |
801 | ret = mptcp_get_int_option(msk, optval, optlen, val: &val); |
802 | if (ret) |
803 | return ret; |
804 | |
805 | lock_sock(sk); |
806 | switch (optname) { |
807 | case TCP_INQ: |
808 | if (val < 0 || val > 1) |
809 | ret = -EINVAL; |
810 | else |
811 | msk->recvmsg_inq = !!val; |
812 | break; |
813 | case TCP_NOTSENT_LOWAT: |
814 | WRITE_ONCE(msk->notsent_lowat, val); |
815 | mptcp_write_space(sk); |
816 | break; |
817 | case TCP_CORK: |
818 | ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); |
819 | break; |
820 | case TCP_NODELAY: |
821 | ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); |
822 | break; |
823 | default: |
824 | ret = -ENOPROTOOPT; |
825 | } |
826 | |
827 | release_sock(sk); |
828 | return ret; |
829 | } |
830 | |
831 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
832 | sockptr_t optval, unsigned int optlen) |
833 | { |
834 | struct mptcp_sock *msk = mptcp_sk(sk); |
835 | struct sock *ssk; |
836 | |
837 | pr_debug("msk=%p" , msk); |
838 | |
839 | if (level == SOL_SOCKET) |
840 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); |
841 | |
842 | if (!mptcp_supported_sockopt(level, optname)) |
843 | return -ENOPROTOOPT; |
844 | |
845 | /* @@ the meaning of setsockopt() when the socket is connected and |
846 | * there are multiple subflows is not yet defined. It is up to the |
847 | * MPTCP-level socket to configure the subflows until the subflow |
848 | * is in TCP fallback, when TCP socket options are passed through |
849 | * to the one remaining subflow. |
850 | */ |
851 | lock_sock(sk); |
852 | ssk = __mptcp_tcp_fallback(msk); |
853 | release_sock(sk); |
854 | if (ssk) |
855 | return tcp_setsockopt(sk: ssk, level, optname, optval, optlen); |
856 | |
857 | if (level == SOL_IP) |
858 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); |
859 | |
860 | if (level == SOL_IPV6) |
861 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); |
862 | |
863 | if (level == SOL_TCP) |
864 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); |
865 | |
866 | return -EOPNOTSUPP; |
867 | } |
868 | |
869 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
870 | char __user *optval, int __user *optlen) |
871 | { |
872 | struct sock *sk = (struct sock *)msk; |
873 | struct sock *ssk; |
874 | int ret; |
875 | |
876 | lock_sock(sk); |
877 | ssk = msk->first; |
878 | if (ssk) { |
879 | ret = tcp_getsockopt(sk: ssk, level, optname, optval, optlen); |
880 | goto out; |
881 | } |
882 | |
883 | ssk = __mptcp_nmpc_sk(msk); |
884 | if (IS_ERR(ptr: ssk)) { |
885 | ret = PTR_ERR(ptr: ssk); |
886 | goto out; |
887 | } |
888 | |
889 | ret = tcp_getsockopt(sk: ssk, level, optname, optval, optlen); |
890 | |
891 | out: |
892 | release_sock(sk); |
893 | return ret; |
894 | } |
895 | |
896 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) |
897 | { |
898 | struct sock *sk = (struct sock *)msk; |
899 | u32 flags = 0; |
900 | bool slow; |
901 | |
902 | memset(info, 0, sizeof(*info)); |
903 | |
904 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); |
905 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); |
906 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); |
907 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); |
908 | |
909 | if (inet_sk_state_load(sk) == TCP_LISTEN) |
910 | return; |
911 | |
912 | /* The following limits only make sense for the in-kernel PM */ |
913 | if (mptcp_pm_is_kernel(msk)) { |
914 | info->mptcpi_subflows_max = |
915 | mptcp_pm_get_subflows_max(msk); |
916 | info->mptcpi_add_addr_signal_max = |
917 | mptcp_pm_get_add_addr_signal_max(msk); |
918 | info->mptcpi_add_addr_accepted_max = |
919 | mptcp_pm_get_add_addr_accept_max(msk); |
920 | info->mptcpi_local_addr_max = |
921 | mptcp_pm_get_local_addr_max(msk); |
922 | } |
923 | |
924 | if (__mptcp_check_fallback(msk)) |
925 | flags |= MPTCP_INFO_FLAG_FALLBACK; |
926 | if (READ_ONCE(msk->can_ack)) |
927 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; |
928 | info->mptcpi_flags = flags; |
929 | mptcp_data_lock(sk); |
930 | info->mptcpi_snd_una = msk->snd_una; |
931 | info->mptcpi_rcv_nxt = msk->ack_seq; |
932 | info->mptcpi_bytes_acked = msk->bytes_acked; |
933 | mptcp_data_unlock(sk); |
934 | |
935 | slow = lock_sock_fast(sk); |
936 | info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); |
937 | info->mptcpi_token = msk->token; |
938 | info->mptcpi_write_seq = msk->write_seq; |
939 | info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; |
940 | info->mptcpi_bytes_sent = msk->bytes_sent; |
941 | info->mptcpi_bytes_received = msk->bytes_received; |
942 | info->mptcpi_bytes_retrans = msk->bytes_retrans; |
943 | info->mptcpi_subflows_total = info->mptcpi_subflows + |
944 | __mptcp_has_initial_subflow(msk); |
945 | unlock_sock_fast(sk, slow); |
946 | } |
947 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); |
948 | |
949 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) |
950 | { |
951 | struct mptcp_info m_info; |
952 | int len; |
953 | |
954 | if (get_user(len, optlen)) |
955 | return -EFAULT; |
956 | |
957 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); |
958 | |
959 | mptcp_diag_fill_info(msk, &m_info); |
960 | |
961 | if (put_user(len, optlen)) |
962 | return -EFAULT; |
963 | |
964 | if (copy_to_user(to: optval, from: &m_info, n: len)) |
965 | return -EFAULT; |
966 | |
967 | return 0; |
968 | } |
969 | |
970 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, |
971 | char __user *optval, |
972 | u32 copied, |
973 | int __user *optlen) |
974 | { |
975 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); |
976 | |
977 | if (copied) |
978 | copied += sfd->size_subflow_data; |
979 | else |
980 | copied = copylen; |
981 | |
982 | if (put_user(copied, optlen)) |
983 | return -EFAULT; |
984 | |
985 | if (copy_to_user(to: optval, from: sfd, n: copylen)) |
986 | return -EFAULT; |
987 | |
988 | return 0; |
989 | } |
990 | |
991 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, |
992 | char __user *optval, |
993 | int __user *optlen) |
994 | { |
995 | int len, copylen; |
996 | |
997 | if (get_user(len, optlen)) |
998 | return -EFAULT; |
999 | |
1000 | /* if mptcp_subflow_data size is changed, need to adjust |
1001 | * this function to deal with programs using old version. |
1002 | */ |
1003 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); |
1004 | |
1005 | if (len < MIN_INFO_OPTLEN_SIZE) |
1006 | return -EINVAL; |
1007 | |
1008 | memset(sfd, 0, sizeof(*sfd)); |
1009 | |
1010 | copylen = min_t(unsigned int, len, sizeof(*sfd)); |
1011 | if (copy_from_user(to: sfd, from: optval, n: copylen)) |
1012 | return -EFAULT; |
1013 | |
1014 | /* size_subflow_data is u32, but len is signed */ |
1015 | if (sfd->size_subflow_data > INT_MAX || |
1016 | sfd->size_user > INT_MAX) |
1017 | return -EINVAL; |
1018 | |
1019 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || |
1020 | sfd->size_subflow_data > len) |
1021 | return -EINVAL; |
1022 | |
1023 | if (sfd->num_subflows || sfd->size_kernel) |
1024 | return -EINVAL; |
1025 | |
1026 | return len - sfd->size_subflow_data; |
1027 | } |
1028 | |
1029 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, |
1030 | int __user *optlen) |
1031 | { |
1032 | struct mptcp_subflow_context *subflow; |
1033 | struct sock *sk = (struct sock *)msk; |
1034 | unsigned int sfcount = 0, copied = 0; |
1035 | struct mptcp_subflow_data sfd; |
1036 | char __user *infoptr; |
1037 | int len; |
1038 | |
1039 | len = mptcp_get_subflow_data(sfd: &sfd, optval, optlen); |
1040 | if (len < 0) |
1041 | return len; |
1042 | |
1043 | sfd.size_kernel = sizeof(struct tcp_info); |
1044 | sfd.size_user = min_t(unsigned int, sfd.size_user, |
1045 | sizeof(struct tcp_info)); |
1046 | |
1047 | infoptr = optval + sfd.size_subflow_data; |
1048 | |
1049 | lock_sock(sk); |
1050 | |
1051 | mptcp_for_each_subflow(msk, subflow) { |
1052 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1053 | |
1054 | ++sfcount; |
1055 | |
1056 | if (len && len >= sfd.size_user) { |
1057 | struct tcp_info info; |
1058 | |
1059 | tcp_get_info(ssk, &info); |
1060 | |
1061 | if (copy_to_user(to: infoptr, from: &info, n: sfd.size_user)) { |
1062 | release_sock(sk); |
1063 | return -EFAULT; |
1064 | } |
1065 | |
1066 | infoptr += sfd.size_user; |
1067 | copied += sfd.size_user; |
1068 | len -= sfd.size_user; |
1069 | } |
1070 | } |
1071 | |
1072 | release_sock(sk); |
1073 | |
1074 | sfd.num_subflows = sfcount; |
1075 | |
1076 | if (mptcp_put_subflow_data(sfd: &sfd, optval, copied, optlen)) |
1077 | return -EFAULT; |
1078 | |
1079 | return 0; |
1080 | } |
1081 | |
1082 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) |
1083 | { |
1084 | const struct inet_sock *inet = inet_sk(sk); |
1085 | |
1086 | memset(a, 0, sizeof(*a)); |
1087 | |
1088 | if (sk->sk_family == AF_INET) { |
1089 | a->sin_local.sin_family = AF_INET; |
1090 | a->sin_local.sin_port = inet->inet_sport; |
1091 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; |
1092 | |
1093 | if (!a->sin_local.sin_addr.s_addr) |
1094 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; |
1095 | |
1096 | a->sin_remote.sin_family = AF_INET; |
1097 | a->sin_remote.sin_port = inet->inet_dport; |
1098 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; |
1099 | #if IS_ENABLED(CONFIG_IPV6) |
1100 | } else if (sk->sk_family == AF_INET6) { |
1101 | const struct ipv6_pinfo *np = inet6_sk(sk: sk); |
1102 | |
1103 | if (WARN_ON_ONCE(!np)) |
1104 | return; |
1105 | |
1106 | a->sin6_local.sin6_family = AF_INET6; |
1107 | a->sin6_local.sin6_port = inet->inet_sport; |
1108 | |
1109 | if (ipv6_addr_any(a: &sk->sk_v6_rcv_saddr)) |
1110 | a->sin6_local.sin6_addr = np->saddr; |
1111 | else |
1112 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; |
1113 | |
1114 | a->sin6_remote.sin6_family = AF_INET6; |
1115 | a->sin6_remote.sin6_port = inet->inet_dport; |
1116 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; |
1117 | #endif |
1118 | } |
1119 | } |
1120 | |
1121 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, |
1122 | int __user *optlen) |
1123 | { |
1124 | struct mptcp_subflow_context *subflow; |
1125 | struct sock *sk = (struct sock *)msk; |
1126 | unsigned int sfcount = 0, copied = 0; |
1127 | struct mptcp_subflow_data sfd; |
1128 | char __user *addrptr; |
1129 | int len; |
1130 | |
1131 | len = mptcp_get_subflow_data(sfd: &sfd, optval, optlen); |
1132 | if (len < 0) |
1133 | return len; |
1134 | |
1135 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); |
1136 | sfd.size_user = min_t(unsigned int, sfd.size_user, |
1137 | sizeof(struct mptcp_subflow_addrs)); |
1138 | |
1139 | addrptr = optval + sfd.size_subflow_data; |
1140 | |
1141 | lock_sock(sk); |
1142 | |
1143 | mptcp_for_each_subflow(msk, subflow) { |
1144 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1145 | |
1146 | ++sfcount; |
1147 | |
1148 | if (len && len >= sfd.size_user) { |
1149 | struct mptcp_subflow_addrs a; |
1150 | |
1151 | mptcp_get_sub_addrs(sk: ssk, a: &a); |
1152 | |
1153 | if (copy_to_user(to: addrptr, from: &a, n: sfd.size_user)) { |
1154 | release_sock(sk); |
1155 | return -EFAULT; |
1156 | } |
1157 | |
1158 | addrptr += sfd.size_user; |
1159 | copied += sfd.size_user; |
1160 | len -= sfd.size_user; |
1161 | } |
1162 | } |
1163 | |
1164 | release_sock(sk); |
1165 | |
1166 | sfd.num_subflows = sfcount; |
1167 | |
1168 | if (mptcp_put_subflow_data(sfd: &sfd, optval, copied, optlen)) |
1169 | return -EFAULT; |
1170 | |
1171 | return 0; |
1172 | } |
1173 | |
1174 | static int mptcp_get_full_info(struct mptcp_full_info *mfi, |
1175 | char __user *optval, |
1176 | int __user *optlen) |
1177 | { |
1178 | int len; |
1179 | |
1180 | BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != |
1181 | MIN_FULL_INFO_OPTLEN_SIZE); |
1182 | |
1183 | if (get_user(len, optlen)) |
1184 | return -EFAULT; |
1185 | |
1186 | if (len < MIN_FULL_INFO_OPTLEN_SIZE) |
1187 | return -EINVAL; |
1188 | |
1189 | memset(mfi, 0, sizeof(*mfi)); |
1190 | if (copy_from_user(to: mfi, from: optval, MIN_FULL_INFO_OPTLEN_SIZE)) |
1191 | return -EFAULT; |
1192 | |
1193 | if (mfi->size_tcpinfo_kernel || |
1194 | mfi->size_sfinfo_kernel || |
1195 | mfi->num_subflows) |
1196 | return -EINVAL; |
1197 | |
1198 | if (mfi->size_sfinfo_user > INT_MAX || |
1199 | mfi->size_tcpinfo_user > INT_MAX) |
1200 | return -EINVAL; |
1201 | |
1202 | return len - MIN_FULL_INFO_OPTLEN_SIZE; |
1203 | } |
1204 | |
1205 | static int mptcp_put_full_info(struct mptcp_full_info *mfi, |
1206 | char __user *optval, |
1207 | u32 copylen, |
1208 | int __user *optlen) |
1209 | { |
1210 | copylen += MIN_FULL_INFO_OPTLEN_SIZE; |
1211 | if (put_user(copylen, optlen)) |
1212 | return -EFAULT; |
1213 | |
1214 | if (copy_to_user(to: optval, from: mfi, n: copylen)) |
1215 | return -EFAULT; |
1216 | return 0; |
1217 | } |
1218 | |
1219 | static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, |
1220 | int __user *optlen) |
1221 | { |
1222 | unsigned int sfcount = 0, copylen = 0; |
1223 | struct mptcp_subflow_context *subflow; |
1224 | struct sock *sk = (struct sock *)msk; |
1225 | void __user *tcpinfoptr, *sfinfoptr; |
1226 | struct mptcp_full_info mfi; |
1227 | int len; |
1228 | |
1229 | len = mptcp_get_full_info(mfi: &mfi, optval, optlen); |
1230 | if (len < 0) |
1231 | return len; |
1232 | |
1233 | /* don't bother filling the mptcp info if there is not enough |
1234 | * user-space-provided storage |
1235 | */ |
1236 | if (len > 0) { |
1237 | mptcp_diag_fill_info(msk, &mfi.mptcp_info); |
1238 | copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); |
1239 | } |
1240 | |
1241 | mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); |
1242 | mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, |
1243 | sizeof(struct tcp_info)); |
1244 | sfinfoptr = u64_to_user_ptr(mfi.subflow_info); |
1245 | mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); |
1246 | mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, |
1247 | sizeof(struct mptcp_subflow_info)); |
1248 | tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); |
1249 | |
1250 | lock_sock(sk); |
1251 | mptcp_for_each_subflow(msk, subflow) { |
1252 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1253 | struct mptcp_subflow_info sfinfo; |
1254 | struct tcp_info tcp_info; |
1255 | |
1256 | if (sfcount++ >= mfi.size_arrays_user) |
1257 | continue; |
1258 | |
1259 | /* fetch addr/tcp_info only if the user space buffers |
1260 | * are wide enough |
1261 | */ |
1262 | memset(&sfinfo, 0, sizeof(sfinfo)); |
1263 | sfinfo.id = subflow->subflow_id; |
1264 | if (mfi.size_sfinfo_user > |
1265 | offsetof(struct mptcp_subflow_info, addrs)) |
1266 | mptcp_get_sub_addrs(sk: ssk, a: &sfinfo.addrs); |
1267 | if (copy_to_user(to: sfinfoptr, from: &sfinfo, n: mfi.size_sfinfo_user)) |
1268 | goto fail_release; |
1269 | |
1270 | if (mfi.size_tcpinfo_user) { |
1271 | tcp_get_info(ssk, &tcp_info); |
1272 | if (copy_to_user(to: tcpinfoptr, from: &tcp_info, |
1273 | n: mfi.size_tcpinfo_user)) |
1274 | goto fail_release; |
1275 | } |
1276 | |
1277 | tcpinfoptr += mfi.size_tcpinfo_user; |
1278 | sfinfoptr += mfi.size_sfinfo_user; |
1279 | } |
1280 | release_sock(sk); |
1281 | |
1282 | mfi.num_subflows = sfcount; |
1283 | if (mptcp_put_full_info(mfi: &mfi, optval, copylen, optlen)) |
1284 | return -EFAULT; |
1285 | |
1286 | return 0; |
1287 | |
1288 | fail_release: |
1289 | release_sock(sk); |
1290 | return -EFAULT; |
1291 | } |
1292 | |
1293 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, |
1294 | int __user *optlen, int val) |
1295 | { |
1296 | int len; |
1297 | |
1298 | if (get_user(len, optlen)) |
1299 | return -EFAULT; |
1300 | if (len < 0) |
1301 | return -EINVAL; |
1302 | |
1303 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1304 | unsigned char ucval = (unsigned char)val; |
1305 | |
1306 | len = 1; |
1307 | if (put_user(len, optlen)) |
1308 | return -EFAULT; |
1309 | if (copy_to_user(to: optval, from: &ucval, n: 1)) |
1310 | return -EFAULT; |
1311 | } else { |
1312 | len = min_t(unsigned int, len, sizeof(int)); |
1313 | if (put_user(len, optlen)) |
1314 | return -EFAULT; |
1315 | if (copy_to_user(to: optval, from: &val, n: len)) |
1316 | return -EFAULT; |
1317 | } |
1318 | |
1319 | return 0; |
1320 | } |
1321 | |
1322 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
1323 | char __user *optval, int __user *optlen) |
1324 | { |
1325 | switch (optname) { |
1326 | case TCP_ULP: |
1327 | case TCP_CONGESTION: |
1328 | case TCP_INFO: |
1329 | case TCP_CC_INFO: |
1330 | case TCP_DEFER_ACCEPT: |
1331 | case TCP_FASTOPEN: |
1332 | case TCP_FASTOPEN_CONNECT: |
1333 | case TCP_FASTOPEN_KEY: |
1334 | case TCP_FASTOPEN_NO_COOKIE: |
1335 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
1336 | optval, optlen); |
1337 | case TCP_INQ: |
1338 | return mptcp_put_int_option(msk, optval, optlen, val: msk->recvmsg_inq); |
1339 | case TCP_CORK: |
1340 | return mptcp_put_int_option(msk, optval, optlen, val: msk->cork); |
1341 | case TCP_NODELAY: |
1342 | return mptcp_put_int_option(msk, optval, optlen, val: msk->nodelay); |
1343 | case TCP_NOTSENT_LOWAT: |
1344 | return mptcp_put_int_option(msk, optval, optlen, val: msk->notsent_lowat); |
1345 | } |
1346 | return -EOPNOTSUPP; |
1347 | } |
1348 | |
1349 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, |
1350 | char __user *optval, int __user *optlen) |
1351 | { |
1352 | struct sock *sk = (void *)msk; |
1353 | |
1354 | switch (optname) { |
1355 | case IP_TOS: |
1356 | return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); |
1357 | case IP_BIND_ADDRESS_NO_PORT: |
1358 | return mptcp_put_int_option(msk, optval, optlen, |
1359 | inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); |
1360 | case IP_LOCAL_PORT_RANGE: |
1361 | return mptcp_put_int_option(msk, optval, optlen, |
1362 | READ_ONCE(inet_sk(sk)->local_port_range)); |
1363 | } |
1364 | |
1365 | return -EOPNOTSUPP; |
1366 | } |
1367 | |
1368 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, |
1369 | char __user *optval, int __user *optlen) |
1370 | { |
1371 | switch (optname) { |
1372 | case MPTCP_INFO: |
1373 | return mptcp_getsockopt_info(msk, optval, optlen); |
1374 | case MPTCP_FULL_INFO: |
1375 | return mptcp_getsockopt_full_info(msk, optval, optlen); |
1376 | case MPTCP_TCPINFO: |
1377 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); |
1378 | case MPTCP_SUBFLOW_ADDRS: |
1379 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); |
1380 | } |
1381 | |
1382 | return -EOPNOTSUPP; |
1383 | } |
1384 | |
1385 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
1386 | char __user *optval, int __user *option) |
1387 | { |
1388 | struct mptcp_sock *msk = mptcp_sk(sk); |
1389 | struct sock *ssk; |
1390 | |
1391 | pr_debug("msk=%p" , msk); |
1392 | |
1393 | /* @@ the meaning of setsockopt() when the socket is connected and |
1394 | * there are multiple subflows is not yet defined. It is up to the |
1395 | * MPTCP-level socket to configure the subflows until the subflow |
1396 | * is in TCP fallback, when socket options are passed through |
1397 | * to the one remaining subflow. |
1398 | */ |
1399 | lock_sock(sk); |
1400 | ssk = __mptcp_tcp_fallback(msk); |
1401 | release_sock(sk); |
1402 | if (ssk) |
1403 | return tcp_getsockopt(sk: ssk, level, optname, optval, optlen: option); |
1404 | |
1405 | if (level == SOL_IP) |
1406 | return mptcp_getsockopt_v4(msk, optname, optval, optlen: option); |
1407 | if (level == SOL_TCP) |
1408 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, optlen: option); |
1409 | if (level == SOL_MPTCP) |
1410 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, optlen: option); |
1411 | return -EOPNOTSUPP; |
1412 | } |
1413 | |
1414 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
1415 | { |
1416 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1417 | struct sock *sk = (struct sock *)msk; |
1418 | |
1419 | if (ssk->sk_prot->keepalive) { |
1420 | if (sock_flag(sk, flag: SOCK_KEEPOPEN)) |
1421 | ssk->sk_prot->keepalive(ssk, 1); |
1422 | else |
1423 | ssk->sk_prot->keepalive(ssk, 0); |
1424 | } |
1425 | |
1426 | ssk->sk_priority = sk->sk_priority; |
1427 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
1428 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; |
1429 | ssk->sk_ipv6only = sk->sk_ipv6only; |
1430 | __ip_sock_set_tos(sk: ssk, inet_sk(sk)->tos); |
1431 | |
1432 | if (sk->sk_userlocks & tx_rx_locks) { |
1433 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; |
1434 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { |
1435 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
1436 | mptcp_subflow_ctx(sk: ssk)->cached_sndbuf = sk->sk_sndbuf; |
1437 | } |
1438 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1439 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); |
1440 | } |
1441 | |
1442 | if (sock_flag(sk, flag: SOCK_LINGER)) { |
1443 | ssk->sk_lingertime = sk->sk_lingertime; |
1444 | sock_set_flag(sk: ssk, flag: SOCK_LINGER); |
1445 | } else { |
1446 | sock_reset_flag(sk: ssk, flag: SOCK_LINGER); |
1447 | } |
1448 | |
1449 | if (sk->sk_mark != ssk->sk_mark) { |
1450 | ssk->sk_mark = sk->sk_mark; |
1451 | sk_dst_reset(sk: ssk); |
1452 | } |
1453 | |
1454 | sock_valbool_flag(sk: ssk, bit: SOCK_DBG, valbool: sock_flag(sk, flag: SOCK_DBG)); |
1455 | |
1456 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(sk: ssk)->icsk_ca_ops) |
1457 | tcp_set_congestion_control(sk: ssk, name: msk->ca_name, load: false, cap_net_admin: true); |
1458 | __tcp_sock_set_cork(sk: ssk, on: !!msk->cork); |
1459 | __tcp_sock_set_nodelay(sk: ssk, on: !!msk->nodelay); |
1460 | |
1461 | inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); |
1462 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
1463 | inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); |
1464 | WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); |
1465 | } |
1466 | |
1467 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) |
1468 | { |
1469 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk: ssk); |
1470 | |
1471 | msk_owned_by_me(msk); |
1472 | |
1473 | ssk->sk_rcvlowat = 0; |
1474 | |
1475 | /* subflows must ignore any latency-related settings: will not affect |
1476 | * the user-space - only the msk is relevant - but will foul the |
1477 | * mptcp scheduler |
1478 | */ |
1479 | tcp_sk(ssk)->notsent_lowat = UINT_MAX; |
1480 | |
1481 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
1482 | sync_socket_options(msk, ssk); |
1483 | |
1484 | subflow->setsockopt_seq = msk->setsockopt_seq; |
1485 | } |
1486 | } |
1487 | |
1488 | /* unfortunately this is different enough from the tcp version so |
1489 | * that we can't factor it out |
1490 | */ |
1491 | int mptcp_set_rcvlowat(struct sock *sk, int val) |
1492 | { |
1493 | struct mptcp_subflow_context *subflow; |
1494 | int space, cap; |
1495 | |
1496 | /* bpf can land here with a wrong sk type */ |
1497 | if (sk->sk_protocol == IPPROTO_TCP) |
1498 | return -EINVAL; |
1499 | |
1500 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1501 | cap = sk->sk_rcvbuf >> 1; |
1502 | else |
1503 | cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; |
1504 | val = min(val, cap); |
1505 | WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); |
1506 | |
1507 | /* Check if we need to signal EPOLLIN right now */ |
1508 | if (mptcp_epollin_ready(sk)) |
1509 | sk->sk_data_ready(sk); |
1510 | |
1511 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1512 | return 0; |
1513 | |
1514 | space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win: val); |
1515 | if (space <= sk->sk_rcvbuf) |
1516 | return 0; |
1517 | |
1518 | /* propagate the rcvbuf changes to all the subflows */ |
1519 | WRITE_ONCE(sk->sk_rcvbuf, space); |
1520 | mptcp_for_each_subflow(mptcp_sk(sk), subflow) { |
1521 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
1522 | bool slow; |
1523 | |
1524 | slow = lock_sock_fast(sk: ssk); |
1525 | WRITE_ONCE(ssk->sk_rcvbuf, space); |
1526 | tcp_sk(ssk)->window_clamp = val; |
1527 | unlock_sock_fast(sk: ssk, slow); |
1528 | } |
1529 | return 0; |
1530 | } |
1531 | |