1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/net/sunrpc/xprtsock.c |
4 | * |
5 | * Client-side transport implementation for sockets. |
6 | * |
7 | * TCP callback races fixes (C) 1998 Red Hat |
8 | * TCP send fixes (C) 1998 Red Hat |
9 | * TCP NFS related read + write fixes |
10 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> |
11 | * |
12 | * Rewrite of larges part of the code in order to stabilize TCP stuff. |
13 | * Fix behaviour when socket buffer is full. |
14 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> |
15 | * |
16 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> |
17 | * |
18 | * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. |
19 | * <gilles.quillard@bull.net> |
20 | */ |
21 | |
22 | #include <linux/types.h> |
23 | #include <linux/string.h> |
24 | #include <linux/slab.h> |
25 | #include <linux/module.h> |
26 | #include <linux/capability.h> |
27 | #include <linux/pagemap.h> |
28 | #include <linux/errno.h> |
29 | #include <linux/socket.h> |
30 | #include <linux/in.h> |
31 | #include <linux/net.h> |
32 | #include <linux/mm.h> |
33 | #include <linux/un.h> |
34 | #include <linux/udp.h> |
35 | #include <linux/tcp.h> |
36 | #include <linux/sunrpc/clnt.h> |
37 | #include <linux/sunrpc/addr.h> |
38 | #include <linux/sunrpc/sched.h> |
39 | #include <linux/sunrpc/svcsock.h> |
40 | #include <linux/sunrpc/xprtsock.h> |
41 | #include <linux/file.h> |
42 | #ifdef CONFIG_SUNRPC_BACKCHANNEL |
43 | #include <linux/sunrpc/bc_xprt.h> |
44 | #endif |
45 | |
46 | #include <net/sock.h> |
47 | #include <net/checksum.h> |
48 | #include <net/udp.h> |
49 | #include <net/tcp.h> |
50 | #include <net/tls_prot.h> |
51 | #include <net/handshake.h> |
52 | |
53 | #include <linux/bvec.h> |
54 | #include <linux/highmem.h> |
55 | #include <linux/uio.h> |
56 | #include <linux/sched/mm.h> |
57 | |
58 | #include <trace/events/sock.h> |
59 | #include <trace/events/sunrpc.h> |
60 | |
61 | #include "socklib.h" |
62 | #include "sunrpc.h" |
63 | |
64 | static void xs_close(struct rpc_xprt *xprt); |
65 | static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock); |
66 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, |
67 | struct socket *sock); |
68 | |
69 | /* |
70 | * xprtsock tunables |
71 | */ |
72 | static unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; |
73 | static unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; |
74 | static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; |
75 | |
76 | static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; |
77 | static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; |
78 | |
79 | #define XS_TCP_LINGER_TO (15U * HZ) |
80 | static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; |
81 | |
82 | /* |
83 | * We can register our own files under /proc/sys/sunrpc by |
84 | * calling register_sysctl() again. The files in that |
85 | * directory become the union of all files registered there. |
86 | * |
87 | * We simply need to make sure that we don't collide with |
88 | * someone else's file names! |
89 | */ |
90 | |
91 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; |
92 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; |
93 | static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; |
94 | static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; |
95 | static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; |
96 | |
97 | static struct ctl_table_header *; |
98 | |
99 | static struct xprt_class xs_local_transport; |
100 | static struct xprt_class xs_udp_transport; |
101 | static struct xprt_class xs_tcp_transport; |
102 | static struct xprt_class xs_tcp_tls_transport; |
103 | static struct xprt_class xs_bc_tcp_transport; |
104 | |
105 | /* |
106 | * FIXME: changing the UDP slot table size should also resize the UDP |
107 | * socket buffers for existing UDP transports |
108 | */ |
109 | static struct ctl_table xs_tunables_table[] = { |
110 | { |
111 | .procname = "udp_slot_table_entries" , |
112 | .data = &xprt_udp_slot_table_entries, |
113 | .maxlen = sizeof(unsigned int), |
114 | .mode = 0644, |
115 | .proc_handler = proc_dointvec_minmax, |
116 | .extra1 = &min_slot_table_size, |
117 | .extra2 = &max_slot_table_size |
118 | }, |
119 | { |
120 | .procname = "tcp_slot_table_entries" , |
121 | .data = &xprt_tcp_slot_table_entries, |
122 | .maxlen = sizeof(unsigned int), |
123 | .mode = 0644, |
124 | .proc_handler = proc_dointvec_minmax, |
125 | .extra1 = &min_slot_table_size, |
126 | .extra2 = &max_slot_table_size |
127 | }, |
128 | { |
129 | .procname = "tcp_max_slot_table_entries" , |
130 | .data = &xprt_max_tcp_slot_table_entries, |
131 | .maxlen = sizeof(unsigned int), |
132 | .mode = 0644, |
133 | .proc_handler = proc_dointvec_minmax, |
134 | .extra1 = &min_slot_table_size, |
135 | .extra2 = &max_tcp_slot_table_limit |
136 | }, |
137 | { |
138 | .procname = "min_resvport" , |
139 | .data = &xprt_min_resvport, |
140 | .maxlen = sizeof(unsigned int), |
141 | .mode = 0644, |
142 | .proc_handler = proc_dointvec_minmax, |
143 | .extra1 = &xprt_min_resvport_limit, |
144 | .extra2 = &xprt_max_resvport_limit |
145 | }, |
146 | { |
147 | .procname = "max_resvport" , |
148 | .data = &xprt_max_resvport, |
149 | .maxlen = sizeof(unsigned int), |
150 | .mode = 0644, |
151 | .proc_handler = proc_dointvec_minmax, |
152 | .extra1 = &xprt_min_resvport_limit, |
153 | .extra2 = &xprt_max_resvport_limit |
154 | }, |
155 | { |
156 | .procname = "tcp_fin_timeout" , |
157 | .data = &xs_tcp_fin_timeout, |
158 | .maxlen = sizeof(xs_tcp_fin_timeout), |
159 | .mode = 0644, |
160 | .proc_handler = proc_dointvec_jiffies, |
161 | }, |
162 | { }, |
163 | }; |
164 | |
165 | /* |
166 | * Wait duration for a reply from the RPC portmapper. |
167 | */ |
168 | #define XS_BIND_TO (60U * HZ) |
169 | |
170 | /* |
171 | * Delay if a UDP socket connect error occurs. This is most likely some |
172 | * kind of resource problem on the local host. |
173 | */ |
174 | #define XS_UDP_REEST_TO (2U * HZ) |
175 | |
176 | /* |
177 | * The reestablish timeout allows clients to delay for a bit before attempting |
178 | * to reconnect to a server that just dropped our connection. |
179 | * |
180 | * We implement an exponential backoff when trying to reestablish a TCP |
181 | * transport connection with the server. Some servers like to drop a TCP |
182 | * connection when they are overworked, so we start with a short timeout and |
183 | * increase over time if the server is down or not responding. |
184 | */ |
185 | #define XS_TCP_INIT_REEST_TO (3U * HZ) |
186 | |
187 | /* |
188 | * TCP idle timeout; client drops the transport socket if it is idle |
189 | * for this long. Note that we also timeout UDP sockets to prevent |
190 | * holding port numbers when there is no RPC traffic. |
191 | */ |
192 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) |
193 | |
194 | /* |
195 | * TLS handshake timeout. |
196 | */ |
197 | #define XS_TLS_HANDSHAKE_TO (10U * HZ) |
198 | |
199 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
200 | # undef RPC_DEBUG_DATA |
201 | # define RPCDBG_FACILITY RPCDBG_TRANS |
202 | #endif |
203 | |
204 | #ifdef RPC_DEBUG_DATA |
205 | static void xs_pktdump(char *msg, u32 *packet, unsigned int count) |
206 | { |
207 | u8 *buf = (u8 *) packet; |
208 | int j; |
209 | |
210 | dprintk("RPC: %s\n" , msg); |
211 | for (j = 0; j < count && j < 128; j += 4) { |
212 | if (!(j & 31)) { |
213 | if (j) |
214 | dprintk("\n" ); |
215 | dprintk("0x%04x " , j); |
216 | } |
217 | dprintk("%02x%02x%02x%02x " , |
218 | buf[j], buf[j+1], buf[j+2], buf[j+3]); |
219 | } |
220 | dprintk("\n" ); |
221 | } |
222 | #else |
223 | static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) |
224 | { |
225 | /* NOP */ |
226 | } |
227 | #endif |
228 | |
229 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) |
230 | { |
231 | return (struct rpc_xprt *) sk->sk_user_data; |
232 | } |
233 | |
234 | static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) |
235 | { |
236 | return (struct sockaddr *) &xprt->addr; |
237 | } |
238 | |
239 | static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) |
240 | { |
241 | return (struct sockaddr_un *) &xprt->addr; |
242 | } |
243 | |
244 | static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) |
245 | { |
246 | return (struct sockaddr_in *) &xprt->addr; |
247 | } |
248 | |
249 | static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) |
250 | { |
251 | return (struct sockaddr_in6 *) &xprt->addr; |
252 | } |
253 | |
254 | static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) |
255 | { |
256 | struct sockaddr *sap = xs_addr(xprt); |
257 | struct sockaddr_in6 *sin6; |
258 | struct sockaddr_in *sin; |
259 | struct sockaddr_un *sun; |
260 | char buf[128]; |
261 | |
262 | switch (sap->sa_family) { |
263 | case AF_LOCAL: |
264 | sun = xs_addr_un(xprt); |
265 | if (sun->sun_path[0]) { |
266 | strscpy(p: buf, q: sun->sun_path, size: sizeof(buf)); |
267 | } else { |
268 | buf[0] = '@'; |
269 | strscpy(p: buf+1, q: sun->sun_path+1, size: sizeof(buf)-1); |
270 | } |
271 | xprt->address_strings[RPC_DISPLAY_ADDR] = |
272 | kstrdup(s: buf, GFP_KERNEL); |
273 | break; |
274 | case AF_INET: |
275 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
276 | xprt->address_strings[RPC_DISPLAY_ADDR] = |
277 | kstrdup(s: buf, GFP_KERNEL); |
278 | sin = xs_addr_in(xprt); |
279 | snprintf(buf, size: sizeof(buf), fmt: "%08x" , ntohl(sin->sin_addr.s_addr)); |
280 | break; |
281 | case AF_INET6: |
282 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
283 | xprt->address_strings[RPC_DISPLAY_ADDR] = |
284 | kstrdup(s: buf, GFP_KERNEL); |
285 | sin6 = xs_addr_in6(xprt); |
286 | snprintf(buf, size: sizeof(buf), fmt: "%pi6" , &sin6->sin6_addr); |
287 | break; |
288 | default: |
289 | BUG(); |
290 | } |
291 | |
292 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(s: buf, GFP_KERNEL); |
293 | } |
294 | |
295 | static void xs_format_common_peer_ports(struct rpc_xprt *xprt) |
296 | { |
297 | struct sockaddr *sap = xs_addr(xprt); |
298 | char buf[128]; |
299 | |
300 | snprintf(buf, size: sizeof(buf), fmt: "%u" , rpc_get_port(sap)); |
301 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(s: buf, GFP_KERNEL); |
302 | |
303 | snprintf(buf, size: sizeof(buf), fmt: "%4hx" , rpc_get_port(sap)); |
304 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(s: buf, GFP_KERNEL); |
305 | } |
306 | |
307 | static void xs_format_peer_addresses(struct rpc_xprt *xprt, |
308 | const char *protocol, |
309 | const char *netid) |
310 | { |
311 | xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; |
312 | xprt->address_strings[RPC_DISPLAY_NETID] = netid; |
313 | xs_format_common_peer_addresses(xprt); |
314 | xs_format_common_peer_ports(xprt); |
315 | } |
316 | |
317 | static void xs_update_peer_port(struct rpc_xprt *xprt) |
318 | { |
319 | kfree(objp: xprt->address_strings[RPC_DISPLAY_HEX_PORT]); |
320 | kfree(objp: xprt->address_strings[RPC_DISPLAY_PORT]); |
321 | |
322 | xs_format_common_peer_ports(xprt); |
323 | } |
324 | |
325 | static void xs_free_peer_addresses(struct rpc_xprt *xprt) |
326 | { |
327 | unsigned int i; |
328 | |
329 | for (i = 0; i < RPC_DISPLAY_MAX; i++) |
330 | switch (i) { |
331 | case RPC_DISPLAY_PROTO: |
332 | case RPC_DISPLAY_NETID: |
333 | continue; |
334 | default: |
335 | kfree(objp: xprt->address_strings[i]); |
336 | } |
337 | } |
338 | |
339 | static size_t |
340 | xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) |
341 | { |
342 | size_t i,n; |
343 | |
344 | if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES)) |
345 | return want; |
346 | n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; |
347 | for (i = 0; i < n; i++) { |
348 | if (buf->pages[i]) |
349 | continue; |
350 | buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); |
351 | if (!buf->pages[i]) { |
352 | i *= PAGE_SIZE; |
353 | return i > buf->page_base ? i - buf->page_base : 0; |
354 | } |
355 | } |
356 | return want; |
357 | } |
358 | |
359 | static int |
360 | xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, |
361 | struct cmsghdr *cmsg, int ret) |
362 | { |
363 | u8 content_type = tls_get_record_type(sk: sock->sk, msg: cmsg); |
364 | u8 level, description; |
365 | |
366 | switch (content_type) { |
367 | case 0: |
368 | break; |
369 | case TLS_RECORD_TYPE_DATA: |
370 | /* TLS sets EOR at the end of each application data |
371 | * record, even though there might be more frames |
372 | * waiting to be decrypted. |
373 | */ |
374 | msg->msg_flags &= ~MSG_EOR; |
375 | break; |
376 | case TLS_RECORD_TYPE_ALERT: |
377 | tls_alert_recv(sk: sock->sk, msg, level: &level, description: &description); |
378 | ret = (level == TLS_ALERT_LEVEL_FATAL) ? |
379 | -EACCES : -EAGAIN; |
380 | break; |
381 | default: |
382 | /* discard this record type */ |
383 | ret = -EAGAIN; |
384 | } |
385 | return ret; |
386 | } |
387 | |
388 | static int |
389 | xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) |
390 | { |
391 | union { |
392 | struct cmsghdr cmsg; |
393 | u8 buf[CMSG_SPACE(sizeof(u8))]; |
394 | } u; |
395 | int ret; |
396 | |
397 | msg->msg_control = &u; |
398 | msg->msg_controllen = sizeof(u); |
399 | ret = sock_recvmsg(sock, msg, flags); |
400 | if (msg->msg_controllen != sizeof(u)) |
401 | ret = xs_sock_process_cmsg(sock, msg, cmsg: &u.cmsg, ret); |
402 | return ret; |
403 | } |
404 | |
405 | static ssize_t |
406 | xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) |
407 | { |
408 | ssize_t ret; |
409 | if (seek != 0) |
410 | iov_iter_advance(i: &msg->msg_iter, bytes: seek); |
411 | ret = xs_sock_recv_cmsg(sock, msg, flags); |
412 | return ret > 0 ? ret + seek : ret; |
413 | } |
414 | |
415 | static ssize_t |
416 | xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags, |
417 | struct kvec *kvec, size_t count, size_t seek) |
418 | { |
419 | iov_iter_kvec(i: &msg->msg_iter, ITER_DEST, kvec, nr_segs: 1, count); |
420 | return xs_sock_recvmsg(sock, msg, flags, seek); |
421 | } |
422 | |
423 | static ssize_t |
424 | xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags, |
425 | struct bio_vec *bvec, unsigned long nr, size_t count, |
426 | size_t seek) |
427 | { |
428 | iov_iter_bvec(i: &msg->msg_iter, ITER_DEST, bvec, nr_segs: nr, count); |
429 | return xs_sock_recvmsg(sock, msg, flags, seek); |
430 | } |
431 | |
432 | static ssize_t |
433 | xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, |
434 | size_t count) |
435 | { |
436 | iov_iter_discard(i: &msg->msg_iter, ITER_DEST, count); |
437 | return xs_sock_recv_cmsg(sock, msg, flags); |
438 | } |
439 | |
440 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
441 | static void |
442 | xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) |
443 | { |
444 | struct bvec_iter bi = { |
445 | .bi_size = count, |
446 | }; |
447 | struct bio_vec bv; |
448 | |
449 | bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); |
450 | for_each_bvec(bv, bvec, bi, bi) |
451 | flush_dcache_page(bv.bv_page); |
452 | } |
453 | #else |
454 | static inline void |
455 | xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) |
456 | { |
457 | } |
458 | #endif |
459 | |
460 | static ssize_t |
461 | xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, |
462 | struct xdr_buf *buf, size_t count, size_t seek, size_t *read) |
463 | { |
464 | size_t want, seek_init = seek, offset = 0; |
465 | ssize_t ret; |
466 | |
467 | want = min_t(size_t, count, buf->head[0].iov_len); |
468 | if (seek < want) { |
469 | ret = xs_read_kvec(sock, msg, flags, kvec: &buf->head[0], count: want, seek); |
470 | if (ret <= 0) |
471 | goto sock_err; |
472 | offset += ret; |
473 | if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
474 | goto out; |
475 | if (ret != want) |
476 | goto out; |
477 | seek = 0; |
478 | } else { |
479 | seek -= want; |
480 | offset += want; |
481 | } |
482 | |
483 | want = xs_alloc_sparse_pages( |
484 | buf, min_t(size_t, count - offset, buf->page_len), |
485 | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
486 | if (seek < want) { |
487 | ret = xs_read_bvec(sock, msg, flags, bvec: buf->bvec, |
488 | nr: xdr_buf_pagecount(buf), |
489 | count: want + buf->page_base, |
490 | seek: seek + buf->page_base); |
491 | if (ret <= 0) |
492 | goto sock_err; |
493 | xs_flush_bvec(bvec: buf->bvec, count: ret, seek: seek + buf->page_base); |
494 | ret -= buf->page_base; |
495 | offset += ret; |
496 | if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
497 | goto out; |
498 | if (ret != want) |
499 | goto out; |
500 | seek = 0; |
501 | } else { |
502 | seek -= want; |
503 | offset += want; |
504 | } |
505 | |
506 | want = min_t(size_t, count - offset, buf->tail[0].iov_len); |
507 | if (seek < want) { |
508 | ret = xs_read_kvec(sock, msg, flags, kvec: &buf->tail[0], count: want, seek); |
509 | if (ret <= 0) |
510 | goto sock_err; |
511 | offset += ret; |
512 | if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
513 | goto out; |
514 | if (ret != want) |
515 | goto out; |
516 | } else if (offset < seek_init) |
517 | offset = seek_init; |
518 | ret = -EMSGSIZE; |
519 | out: |
520 | *read = offset - seek_init; |
521 | return ret; |
522 | sock_err: |
523 | offset += seek; |
524 | goto out; |
525 | } |
526 | |
527 | static void |
528 | (struct sock_xprt *transport, struct xdr_buf *buf) |
529 | { |
530 | if (!transport->recv.copied) { |
531 | if (buf->head[0].iov_len >= transport->recv.offset) |
532 | memcpy(buf->head[0].iov_base, |
533 | &transport->recv.xid, |
534 | transport->recv.offset); |
535 | transport->recv.copied = transport->recv.offset; |
536 | } |
537 | } |
538 | |
539 | static bool |
540 | xs_read_stream_request_done(struct sock_xprt *transport) |
541 | { |
542 | return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT); |
543 | } |
544 | |
545 | static void |
546 | xs_read_stream_check_eor(struct sock_xprt *transport, |
547 | struct msghdr *msg) |
548 | { |
549 | if (xs_read_stream_request_done(transport)) |
550 | msg->msg_flags |= MSG_EOR; |
551 | } |
552 | |
553 | static ssize_t |
554 | xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, |
555 | int flags, struct rpc_rqst *req) |
556 | { |
557 | struct xdr_buf *buf = &req->rq_private_buf; |
558 | size_t want, read; |
559 | ssize_t ret; |
560 | |
561 | xs_read_header(transport, buf); |
562 | |
563 | want = transport->recv.len - transport->recv.offset; |
564 | if (want != 0) { |
565 | ret = xs_read_xdr_buf(sock: transport->sock, msg, flags, buf, |
566 | count: transport->recv.copied + want, |
567 | seek: transport->recv.copied, |
568 | read: &read); |
569 | transport->recv.offset += read; |
570 | transport->recv.copied += read; |
571 | } |
572 | |
573 | if (transport->recv.offset == transport->recv.len) |
574 | xs_read_stream_check_eor(transport, msg); |
575 | |
576 | if (want == 0) |
577 | return 0; |
578 | |
579 | switch (ret) { |
580 | default: |
581 | break; |
582 | case -EFAULT: |
583 | case -EMSGSIZE: |
584 | msg->msg_flags |= MSG_TRUNC; |
585 | return read; |
586 | case 0: |
587 | return -ESHUTDOWN; |
588 | } |
589 | return ret < 0 ? ret : read; |
590 | } |
591 | |
592 | static size_t |
593 | (bool isfrag) |
594 | { |
595 | if (isfrag) |
596 | return sizeof(__be32); |
597 | return 3 * sizeof(__be32); |
598 | } |
599 | |
600 | static ssize_t |
601 | (struct sock_xprt *transport, struct msghdr *msg, |
602 | int flags, size_t want, size_t seek) |
603 | { |
604 | struct kvec kvec = { |
605 | .iov_base = &transport->recv.fraghdr, |
606 | .iov_len = want, |
607 | }; |
608 | return xs_read_kvec(sock: transport->sock, msg, flags, kvec: &kvec, count: want, seek); |
609 | } |
610 | |
611 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
612 | static ssize_t |
613 | xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) |
614 | { |
615 | struct rpc_xprt *xprt = &transport->xprt; |
616 | struct rpc_rqst *req; |
617 | ssize_t ret; |
618 | |
619 | /* Is this transport associated with the backchannel? */ |
620 | if (!xprt->bc_serv) |
621 | return -ESHUTDOWN; |
622 | |
623 | /* Look up and lock the request corresponding to the given XID */ |
624 | req = xprt_lookup_bc_request(xprt, xid: transport->recv.xid); |
625 | if (!req) { |
626 | printk(KERN_WARNING "Callback slot table overflowed\n" ); |
627 | return -ESHUTDOWN; |
628 | } |
629 | if (transport->recv.copied && !req->rq_private_buf.len) |
630 | return -ESHUTDOWN; |
631 | |
632 | ret = xs_read_stream_request(transport, msg, flags, req); |
633 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
634 | xprt_complete_bc_request(req, copied: transport->recv.copied); |
635 | else |
636 | req->rq_private_buf.len = transport->recv.copied; |
637 | |
638 | return ret; |
639 | } |
640 | #else /* CONFIG_SUNRPC_BACKCHANNEL */ |
641 | static ssize_t |
642 | xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) |
643 | { |
644 | return -ESHUTDOWN; |
645 | } |
646 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
647 | |
648 | static ssize_t |
649 | xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) |
650 | { |
651 | struct rpc_xprt *xprt = &transport->xprt; |
652 | struct rpc_rqst *req; |
653 | ssize_t ret = 0; |
654 | |
655 | /* Look up and lock the request corresponding to the given XID */ |
656 | spin_lock(lock: &xprt->queue_lock); |
657 | req = xprt_lookup_rqst(xprt, xid: transport->recv.xid); |
658 | if (!req || (transport->recv.copied && !req->rq_private_buf.len)) { |
659 | msg->msg_flags |= MSG_TRUNC; |
660 | goto out; |
661 | } |
662 | xprt_pin_rqst(req); |
663 | spin_unlock(lock: &xprt->queue_lock); |
664 | |
665 | ret = xs_read_stream_request(transport, msg, flags, req); |
666 | |
667 | spin_lock(lock: &xprt->queue_lock); |
668 | if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) |
669 | xprt_complete_rqst(task: req->rq_task, copied: transport->recv.copied); |
670 | else |
671 | req->rq_private_buf.len = transport->recv.copied; |
672 | xprt_unpin_rqst(req); |
673 | out: |
674 | spin_unlock(lock: &xprt->queue_lock); |
675 | return ret; |
676 | } |
677 | |
678 | static ssize_t |
679 | xs_read_stream(struct sock_xprt *transport, int flags) |
680 | { |
681 | struct msghdr msg = { 0 }; |
682 | size_t want, read = 0; |
683 | ssize_t ret = 0; |
684 | |
685 | if (transport->recv.len == 0) { |
686 | want = xs_read_stream_headersize(isfrag: transport->recv.copied != 0); |
687 | ret = xs_read_stream_header(transport, msg: &msg, flags, want, |
688 | seek: transport->recv.offset); |
689 | if (ret <= 0) |
690 | goto out_err; |
691 | transport->recv.offset = ret; |
692 | if (transport->recv.offset != want) |
693 | return transport->recv.offset; |
694 | transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & |
695 | RPC_FRAGMENT_SIZE_MASK; |
696 | transport->recv.offset -= sizeof(transport->recv.fraghdr); |
697 | read = ret; |
698 | } |
699 | |
700 | switch (be32_to_cpu(transport->recv.calldir)) { |
701 | default: |
702 | msg.msg_flags |= MSG_TRUNC; |
703 | break; |
704 | case RPC_CALL: |
705 | ret = xs_read_stream_call(transport, msg: &msg, flags); |
706 | break; |
707 | case RPC_REPLY: |
708 | ret = xs_read_stream_reply(transport, msg: &msg, flags); |
709 | } |
710 | if (msg.msg_flags & MSG_TRUNC) { |
711 | transport->recv.calldir = cpu_to_be32(-1); |
712 | transport->recv.copied = -1; |
713 | } |
714 | if (ret < 0) |
715 | goto out_err; |
716 | read += ret; |
717 | if (transport->recv.offset < transport->recv.len) { |
718 | if (!(msg.msg_flags & MSG_TRUNC)) |
719 | return read; |
720 | msg.msg_flags = 0; |
721 | ret = xs_read_discard(sock: transport->sock, msg: &msg, flags, |
722 | count: transport->recv.len - transport->recv.offset); |
723 | if (ret <= 0) |
724 | goto out_err; |
725 | transport->recv.offset += ret; |
726 | read += ret; |
727 | if (transport->recv.offset != transport->recv.len) |
728 | return read; |
729 | } |
730 | if (xs_read_stream_request_done(transport)) { |
731 | trace_xs_stream_read_request(xs: transport); |
732 | transport->recv.copied = 0; |
733 | } |
734 | transport->recv.offset = 0; |
735 | transport->recv.len = 0; |
736 | return read; |
737 | out_err: |
738 | return ret != 0 ? ret : -ESHUTDOWN; |
739 | } |
740 | |
741 | static __poll_t xs_poll_socket(struct sock_xprt *transport) |
742 | { |
743 | return transport->sock->ops->poll(transport->file, transport->sock, |
744 | NULL); |
745 | } |
746 | |
747 | static bool xs_poll_socket_readable(struct sock_xprt *transport) |
748 | { |
749 | __poll_t events = xs_poll_socket(transport); |
750 | |
751 | return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP); |
752 | } |
753 | |
754 | static void xs_poll_check_readable(struct sock_xprt *transport) |
755 | { |
756 | |
757 | clear_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state); |
758 | if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) |
759 | return; |
760 | if (!xs_poll_socket_readable(transport)) |
761 | return; |
762 | if (!test_and_set_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state)) |
763 | queue_work(wq: xprtiod_workqueue, work: &transport->recv_worker); |
764 | } |
765 | |
766 | static void xs_stream_data_receive(struct sock_xprt *transport) |
767 | { |
768 | size_t read = 0; |
769 | ssize_t ret = 0; |
770 | |
771 | mutex_lock(&transport->recv_mutex); |
772 | if (transport->sock == NULL) |
773 | goto out; |
774 | for (;;) { |
775 | ret = xs_read_stream(transport, MSG_DONTWAIT); |
776 | if (ret < 0) |
777 | break; |
778 | read += ret; |
779 | cond_resched(); |
780 | } |
781 | if (ret == -ESHUTDOWN) |
782 | kernel_sock_shutdown(sock: transport->sock, how: SHUT_RDWR); |
783 | else if (ret == -EACCES) |
784 | xprt_wake_pending_tasks(xprt: &transport->xprt, status: -EACCES); |
785 | else |
786 | xs_poll_check_readable(transport); |
787 | out: |
788 | mutex_unlock(lock: &transport->recv_mutex); |
789 | trace_xs_stream_read_data(xprt: &transport->xprt, err: ret, total: read); |
790 | } |
791 | |
792 | static void xs_stream_data_receive_workfn(struct work_struct *work) |
793 | { |
794 | struct sock_xprt *transport = |
795 | container_of(work, struct sock_xprt, recv_worker); |
796 | unsigned int pflags = memalloc_nofs_save(); |
797 | |
798 | xs_stream_data_receive(transport); |
799 | memalloc_nofs_restore(flags: pflags); |
800 | } |
801 | |
802 | static void |
803 | xs_stream_reset_connect(struct sock_xprt *transport) |
804 | { |
805 | transport->recv.offset = 0; |
806 | transport->recv.len = 0; |
807 | transport->recv.copied = 0; |
808 | transport->xmit.offset = 0; |
809 | } |
810 | |
811 | static void |
812 | xs_stream_start_connect(struct sock_xprt *transport) |
813 | { |
814 | transport->xprt.stat.connect_count++; |
815 | transport->xprt.stat.connect_start = jiffies; |
816 | } |
817 | |
818 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) |
819 | |
820 | /** |
821 | * xs_nospace - handle transmit was incomplete |
822 | * @req: pointer to RPC request |
823 | * @transport: pointer to struct sock_xprt |
824 | * |
825 | */ |
826 | static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) |
827 | { |
828 | struct rpc_xprt *xprt = &transport->xprt; |
829 | struct sock *sk = transport->inet; |
830 | int ret = -EAGAIN; |
831 | |
832 | trace_rpc_socket_nospace(rqst: req, transport); |
833 | |
834 | /* Protect against races with write_space */ |
835 | spin_lock(lock: &xprt->transport_lock); |
836 | |
837 | /* Don't race with disconnect */ |
838 | if (xprt_connected(xprt)) { |
839 | /* wait for more buffer space */ |
840 | set_bit(XPRT_SOCK_NOSPACE, addr: &transport->sock_state); |
841 | set_bit(SOCK_NOSPACE, addr: &sk->sk_socket->flags); |
842 | sk->sk_write_pending++; |
843 | xprt_wait_for_buffer_space(xprt); |
844 | } else |
845 | ret = -ENOTCONN; |
846 | |
847 | spin_unlock(lock: &xprt->transport_lock); |
848 | return ret; |
849 | } |
850 | |
851 | static int xs_sock_nospace(struct rpc_rqst *req) |
852 | { |
853 | struct sock_xprt *transport = |
854 | container_of(req->rq_xprt, struct sock_xprt, xprt); |
855 | struct sock *sk = transport->inet; |
856 | int ret = -EAGAIN; |
857 | |
858 | lock_sock(sk); |
859 | if (!sock_writeable(sk)) |
860 | ret = xs_nospace(req, transport); |
861 | release_sock(sk); |
862 | return ret; |
863 | } |
864 | |
865 | static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) |
866 | { |
867 | struct sock_xprt *transport = |
868 | container_of(req->rq_xprt, struct sock_xprt, xprt); |
869 | struct sock *sk = transport->inet; |
870 | int ret = -EAGAIN; |
871 | |
872 | if (vm_wait) |
873 | return -ENOBUFS; |
874 | lock_sock(sk); |
875 | if (!sk_stream_memory_free(sk)) |
876 | ret = xs_nospace(req, transport); |
877 | release_sock(sk); |
878 | return ret; |
879 | } |
880 | |
881 | static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf) |
882 | { |
883 | return xdr_alloc_bvec(buf, gfp: rpc_task_gfp_mask()); |
884 | } |
885 | |
886 | /* |
887 | * Determine if the previous message in the stream was aborted before it |
888 | * could complete transmission. |
889 | */ |
890 | static bool |
891 | xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req) |
892 | { |
893 | return transport->xmit.offset != 0 && req->rq_bytes_sent == 0; |
894 | } |
895 | |
896 | /* |
897 | * Return the stream record marker field for a record of length < 2^31-1 |
898 | */ |
899 | static rpc_fraghdr |
900 | xs_stream_record_marker(struct xdr_buf *xdr) |
901 | { |
902 | if (!xdr->len) |
903 | return 0; |
904 | return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len); |
905 | } |
906 | |
907 | /** |
908 | * xs_local_send_request - write an RPC request to an AF_LOCAL socket |
909 | * @req: pointer to RPC request |
910 | * |
911 | * Return values: |
912 | * 0: The request has been sent |
913 | * EAGAIN: The socket was blocked, please call again later to |
914 | * complete the request |
915 | * ENOTCONN: Caller needs to invoke connect logic then call again |
916 | * other: Some other error occurred, the request was not sent |
917 | */ |
918 | static int xs_local_send_request(struct rpc_rqst *req) |
919 | { |
920 | struct rpc_xprt *xprt = req->rq_xprt; |
921 | struct sock_xprt *transport = |
922 | container_of(xprt, struct sock_xprt, xprt); |
923 | struct xdr_buf *xdr = &req->rq_snd_buf; |
924 | rpc_fraghdr rm = xs_stream_record_marker(xdr); |
925 | unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; |
926 | struct msghdr msg = { |
927 | .msg_flags = XS_SENDMSG_FLAGS, |
928 | }; |
929 | bool vm_wait; |
930 | unsigned int sent; |
931 | int status; |
932 | |
933 | /* Close the stream if the previous transmission was incomplete */ |
934 | if (xs_send_request_was_aborted(transport, req)) { |
935 | xprt_force_disconnect(xprt); |
936 | return -ENOTCONN; |
937 | } |
938 | |
939 | xs_pktdump(msg: "packet data:" , |
940 | packet: req->rq_svec->iov_base, count: req->rq_svec->iov_len); |
941 | |
942 | vm_wait = sk_stream_is_writeable(sk: transport->inet) ? true : false; |
943 | |
944 | req->rq_xtime = ktime_get(); |
945 | status = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, |
946 | base: transport->xmit.offset, marker: rm, sent_p: &sent); |
947 | dprintk("RPC: %s(%u) = %d\n" , |
948 | __func__, xdr->len - transport->xmit.offset, status); |
949 | |
950 | if (likely(sent > 0) || status == 0) { |
951 | transport->xmit.offset += sent; |
952 | req->rq_bytes_sent = transport->xmit.offset; |
953 | if (likely(req->rq_bytes_sent >= msglen)) { |
954 | req->rq_xmit_bytes_sent += transport->xmit.offset; |
955 | transport->xmit.offset = 0; |
956 | return 0; |
957 | } |
958 | status = -EAGAIN; |
959 | vm_wait = false; |
960 | } |
961 | |
962 | switch (status) { |
963 | case -EAGAIN: |
964 | status = xs_stream_nospace(req, vm_wait); |
965 | break; |
966 | default: |
967 | dprintk("RPC: sendmsg returned unrecognized error %d\n" , |
968 | -status); |
969 | fallthrough; |
970 | case -EPIPE: |
971 | xprt_force_disconnect(xprt); |
972 | status = -ENOTCONN; |
973 | } |
974 | |
975 | return status; |
976 | } |
977 | |
978 | /** |
979 | * xs_udp_send_request - write an RPC request to a UDP socket |
980 | * @req: pointer to RPC request |
981 | * |
982 | * Return values: |
983 | * 0: The request has been sent |
984 | * EAGAIN: The socket was blocked, please call again later to |
985 | * complete the request |
986 | * ENOTCONN: Caller needs to invoke connect logic then call again |
987 | * other: Some other error occurred, the request was not sent |
988 | */ |
989 | static int xs_udp_send_request(struct rpc_rqst *req) |
990 | { |
991 | struct rpc_xprt *xprt = req->rq_xprt; |
992 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
993 | struct xdr_buf *xdr = &req->rq_snd_buf; |
994 | struct msghdr msg = { |
995 | .msg_name = xs_addr(xprt), |
996 | .msg_namelen = xprt->addrlen, |
997 | .msg_flags = XS_SENDMSG_FLAGS, |
998 | }; |
999 | unsigned int sent; |
1000 | int status; |
1001 | |
1002 | xs_pktdump(msg: "packet data:" , |
1003 | packet: req->rq_svec->iov_base, |
1004 | count: req->rq_svec->iov_len); |
1005 | |
1006 | if (!xprt_bound(xprt)) |
1007 | return -ENOTCONN; |
1008 | |
1009 | if (!xprt_request_get_cong(xprt, req)) |
1010 | return -EBADSLT; |
1011 | |
1012 | status = xdr_alloc_bvec(buf: xdr, gfp: rpc_task_gfp_mask()); |
1013 | if (status < 0) |
1014 | return status; |
1015 | req->rq_xtime = ktime_get(); |
1016 | status = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, base: 0, marker: 0, sent_p: &sent); |
1017 | |
1018 | dprintk("RPC: xs_udp_send_request(%u) = %d\n" , |
1019 | xdr->len, status); |
1020 | |
1021 | /* firewall is blocking us, don't return -EAGAIN or we end up looping */ |
1022 | if (status == -EPERM) |
1023 | goto process_status; |
1024 | |
1025 | if (status == -EAGAIN && sock_writeable(sk: transport->inet)) |
1026 | status = -ENOBUFS; |
1027 | |
1028 | if (sent > 0 || status == 0) { |
1029 | req->rq_xmit_bytes_sent += sent; |
1030 | if (sent >= req->rq_slen) |
1031 | return 0; |
1032 | /* Still some bytes left; set up for a retry later. */ |
1033 | status = -EAGAIN; |
1034 | } |
1035 | |
1036 | process_status: |
1037 | switch (status) { |
1038 | case -ENOTSOCK: |
1039 | status = -ENOTCONN; |
1040 | /* Should we call xs_close() here? */ |
1041 | break; |
1042 | case -EAGAIN: |
1043 | status = xs_sock_nospace(req); |
1044 | break; |
1045 | case -ENETUNREACH: |
1046 | case -ENOBUFS: |
1047 | case -EPIPE: |
1048 | case -ECONNREFUSED: |
1049 | case -EPERM: |
1050 | /* When the server has died, an ICMP port unreachable message |
1051 | * prompts ECONNREFUSED. */ |
1052 | break; |
1053 | default: |
1054 | dprintk("RPC: sendmsg returned unrecognized error %d\n" , |
1055 | -status); |
1056 | } |
1057 | |
1058 | return status; |
1059 | } |
1060 | |
1061 | /** |
1062 | * xs_tcp_send_request - write an RPC request to a TCP socket |
1063 | * @req: pointer to RPC request |
1064 | * |
1065 | * Return values: |
1066 | * 0: The request has been sent |
1067 | * EAGAIN: The socket was blocked, please call again later to |
1068 | * complete the request |
1069 | * ENOTCONN: Caller needs to invoke connect logic then call again |
1070 | * other: Some other error occurred, the request was not sent |
1071 | * |
1072 | * XXX: In the case of soft timeouts, should we eventually give up |
1073 | * if sendmsg is not able to make progress? |
1074 | */ |
1075 | static int xs_tcp_send_request(struct rpc_rqst *req) |
1076 | { |
1077 | struct rpc_xprt *xprt = req->rq_xprt; |
1078 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1079 | struct xdr_buf *xdr = &req->rq_snd_buf; |
1080 | rpc_fraghdr rm = xs_stream_record_marker(xdr); |
1081 | unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen; |
1082 | struct msghdr msg = { |
1083 | .msg_flags = XS_SENDMSG_FLAGS, |
1084 | }; |
1085 | bool vm_wait; |
1086 | unsigned int sent; |
1087 | int status; |
1088 | |
1089 | /* Close the stream if the previous transmission was incomplete */ |
1090 | if (xs_send_request_was_aborted(transport, req)) { |
1091 | if (transport->sock != NULL) |
1092 | kernel_sock_shutdown(sock: transport->sock, how: SHUT_RDWR); |
1093 | return -ENOTCONN; |
1094 | } |
1095 | if (!transport->inet) |
1096 | return -ENOTCONN; |
1097 | |
1098 | xs_pktdump(msg: "packet data:" , |
1099 | packet: req->rq_svec->iov_base, |
1100 | count: req->rq_svec->iov_len); |
1101 | |
1102 | if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) |
1103 | xs_tcp_set_socket_timeouts(xprt, sock: transport->sock); |
1104 | |
1105 | xs_set_srcport(transport, sock: transport->sock); |
1106 | |
1107 | /* Continue transmitting the packet/record. We must be careful |
1108 | * to cope with writespace callbacks arriving _after_ we have |
1109 | * called sendmsg(). */ |
1110 | req->rq_xtime = ktime_get(); |
1111 | tcp_sock_set_cork(sk: transport->inet, on: true); |
1112 | |
1113 | vm_wait = sk_stream_is_writeable(sk: transport->inet) ? true : false; |
1114 | |
1115 | do { |
1116 | status = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, |
1117 | base: transport->xmit.offset, marker: rm, sent_p: &sent); |
1118 | |
1119 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n" , |
1120 | xdr->len - transport->xmit.offset, status); |
1121 | |
1122 | /* If we've sent the entire packet, immediately |
1123 | * reset the count of bytes sent. */ |
1124 | transport->xmit.offset += sent; |
1125 | req->rq_bytes_sent = transport->xmit.offset; |
1126 | if (likely(req->rq_bytes_sent >= msglen)) { |
1127 | req->rq_xmit_bytes_sent += transport->xmit.offset; |
1128 | transport->xmit.offset = 0; |
1129 | if (atomic_long_read(v: &xprt->xmit_queuelen) == 1) |
1130 | tcp_sock_set_cork(sk: transport->inet, on: false); |
1131 | return 0; |
1132 | } |
1133 | |
1134 | WARN_ON_ONCE(sent == 0 && status == 0); |
1135 | |
1136 | if (sent > 0) |
1137 | vm_wait = false; |
1138 | |
1139 | } while (status == 0); |
1140 | |
1141 | switch (status) { |
1142 | case -ENOTSOCK: |
1143 | status = -ENOTCONN; |
1144 | /* Should we call xs_close() here? */ |
1145 | break; |
1146 | case -EAGAIN: |
1147 | status = xs_stream_nospace(req, vm_wait); |
1148 | break; |
1149 | case -ECONNRESET: |
1150 | case -ECONNREFUSED: |
1151 | case -ENOTCONN: |
1152 | case -EADDRINUSE: |
1153 | case -ENOBUFS: |
1154 | case -EPIPE: |
1155 | break; |
1156 | default: |
1157 | dprintk("RPC: sendmsg returned unrecognized error %d\n" , |
1158 | -status); |
1159 | } |
1160 | |
1161 | return status; |
1162 | } |
1163 | |
1164 | static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) |
1165 | { |
1166 | transport->old_data_ready = sk->sk_data_ready; |
1167 | transport->old_state_change = sk->sk_state_change; |
1168 | transport->old_write_space = sk->sk_write_space; |
1169 | transport->old_error_report = sk->sk_error_report; |
1170 | } |
1171 | |
1172 | static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) |
1173 | { |
1174 | sk->sk_data_ready = transport->old_data_ready; |
1175 | sk->sk_state_change = transport->old_state_change; |
1176 | sk->sk_write_space = transport->old_write_space; |
1177 | sk->sk_error_report = transport->old_error_report; |
1178 | } |
1179 | |
1180 | static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) |
1181 | { |
1182 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1183 | |
1184 | transport->xprt_err = 0; |
1185 | clear_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state); |
1186 | clear_bit(XPRT_SOCK_WAKE_ERROR, addr: &transport->sock_state); |
1187 | clear_bit(XPRT_SOCK_WAKE_WRITE, addr: &transport->sock_state); |
1188 | clear_bit(XPRT_SOCK_WAKE_DISCONNECT, addr: &transport->sock_state); |
1189 | clear_bit(XPRT_SOCK_NOSPACE, addr: &transport->sock_state); |
1190 | } |
1191 | |
1192 | static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) |
1193 | { |
1194 | set_bit(nr, addr: &transport->sock_state); |
1195 | queue_work(wq: xprtiod_workqueue, work: &transport->error_worker); |
1196 | } |
1197 | |
1198 | static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) |
1199 | { |
1200 | xprt->connect_cookie++; |
1201 | smp_mb__before_atomic(); |
1202 | clear_bit(XPRT_CLOSE_WAIT, addr: &xprt->state); |
1203 | clear_bit(XPRT_CLOSING, addr: &xprt->state); |
1204 | xs_sock_reset_state_flags(xprt); |
1205 | smp_mb__after_atomic(); |
1206 | } |
1207 | |
1208 | /** |
1209 | * xs_error_report - callback to handle TCP socket state errors |
1210 | * @sk: socket |
1211 | * |
1212 | * Note: we don't call sock_error() since there may be a rpc_task |
1213 | * using the socket, and so we don't want to clear sk->sk_err. |
1214 | */ |
1215 | static void xs_error_report(struct sock *sk) |
1216 | { |
1217 | struct sock_xprt *transport; |
1218 | struct rpc_xprt *xprt; |
1219 | |
1220 | if (!(xprt = xprt_from_sock(sk))) |
1221 | return; |
1222 | |
1223 | transport = container_of(xprt, struct sock_xprt, xprt); |
1224 | transport->xprt_err = -sk->sk_err; |
1225 | if (transport->xprt_err == 0) |
1226 | return; |
1227 | dprintk("RPC: xs_error_report client %p, error=%d...\n" , |
1228 | xprt, -transport->xprt_err); |
1229 | trace_rpc_socket_error(xprt, socket: sk->sk_socket, error: transport->xprt_err); |
1230 | |
1231 | /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */ |
1232 | smp_mb__before_atomic(); |
1233 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR); |
1234 | } |
1235 | |
1236 | static void xs_reset_transport(struct sock_xprt *transport) |
1237 | { |
1238 | struct socket *sock = transport->sock; |
1239 | struct sock *sk = transport->inet; |
1240 | struct rpc_xprt *xprt = &transport->xprt; |
1241 | struct file *filp = transport->file; |
1242 | |
1243 | if (sk == NULL) |
1244 | return; |
1245 | /* |
1246 | * Make sure we're calling this in a context from which it is safe |
1247 | * to call __fput_sync(). In practice that means rpciod and the |
1248 | * system workqueue. |
1249 | */ |
1250 | if (!(current->flags & PF_WQ_WORKER)) { |
1251 | WARN_ON_ONCE(1); |
1252 | set_bit(XPRT_CLOSE_WAIT, addr: &xprt->state); |
1253 | return; |
1254 | } |
1255 | |
1256 | if (atomic_read(v: &transport->xprt.swapper)) |
1257 | sk_clear_memalloc(sk); |
1258 | |
1259 | tls_handshake_cancel(sk); |
1260 | |
1261 | kernel_sock_shutdown(sock, how: SHUT_RDWR); |
1262 | |
1263 | mutex_lock(&transport->recv_mutex); |
1264 | lock_sock(sk); |
1265 | transport->inet = NULL; |
1266 | transport->sock = NULL; |
1267 | transport->file = NULL; |
1268 | |
1269 | sk->sk_user_data = NULL; |
1270 | |
1271 | xs_restore_old_callbacks(transport, sk); |
1272 | xprt_clear_connected(xprt); |
1273 | xs_sock_reset_connection_flags(xprt); |
1274 | /* Reset stream record info */ |
1275 | xs_stream_reset_connect(transport); |
1276 | release_sock(sk); |
1277 | mutex_unlock(lock: &transport->recv_mutex); |
1278 | |
1279 | trace_rpc_socket_close(xprt, socket: sock); |
1280 | __fput_sync(filp); |
1281 | |
1282 | xprt_disconnect_done(xprt); |
1283 | } |
1284 | |
1285 | /** |
1286 | * xs_close - close a socket |
1287 | * @xprt: transport |
1288 | * |
1289 | * This is used when all requests are complete; ie, no DRC state remains |
1290 | * on the server we want to save. |
1291 | * |
1292 | * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with |
1293 | * xs_reset_transport() zeroing the socket from underneath a writer. |
1294 | */ |
1295 | static void xs_close(struct rpc_xprt *xprt) |
1296 | { |
1297 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1298 | |
1299 | dprintk("RPC: xs_close xprt %p\n" , xprt); |
1300 | |
1301 | if (transport->sock) |
1302 | tls_handshake_close(sock: transport->sock); |
1303 | xs_reset_transport(transport); |
1304 | xprt->reestablish_timeout = 0; |
1305 | } |
1306 | |
1307 | static void xs_inject_disconnect(struct rpc_xprt *xprt) |
1308 | { |
1309 | dprintk("RPC: injecting transport disconnect on xprt=%p\n" , |
1310 | xprt); |
1311 | xprt_disconnect_done(xprt); |
1312 | } |
1313 | |
1314 | static void xs_xprt_free(struct rpc_xprt *xprt) |
1315 | { |
1316 | xs_free_peer_addresses(xprt); |
1317 | xprt_free(xprt); |
1318 | } |
1319 | |
1320 | /** |
1321 | * xs_destroy - prepare to shutdown a transport |
1322 | * @xprt: doomed transport |
1323 | * |
1324 | */ |
1325 | static void xs_destroy(struct rpc_xprt *xprt) |
1326 | { |
1327 | struct sock_xprt *transport = container_of(xprt, |
1328 | struct sock_xprt, xprt); |
1329 | dprintk("RPC: xs_destroy xprt %p\n" , xprt); |
1330 | |
1331 | cancel_delayed_work_sync(dwork: &transport->connect_worker); |
1332 | xs_close(xprt); |
1333 | cancel_work_sync(work: &transport->recv_worker); |
1334 | cancel_work_sync(work: &transport->error_worker); |
1335 | xs_xprt_free(xprt); |
1336 | module_put(THIS_MODULE); |
1337 | } |
1338 | |
1339 | /** |
1340 | * xs_udp_data_read_skb - receive callback for UDP sockets |
1341 | * @xprt: transport |
1342 | * @sk: socket |
1343 | * @skb: skbuff |
1344 | * |
1345 | */ |
1346 | static void xs_udp_data_read_skb(struct rpc_xprt *xprt, |
1347 | struct sock *sk, |
1348 | struct sk_buff *skb) |
1349 | { |
1350 | struct rpc_task *task; |
1351 | struct rpc_rqst *rovr; |
1352 | int repsize, copied; |
1353 | u32 _xid; |
1354 | __be32 *xp; |
1355 | |
1356 | repsize = skb->len; |
1357 | if (repsize < 4) { |
1358 | dprintk("RPC: impossible RPC reply size %d!\n" , repsize); |
1359 | return; |
1360 | } |
1361 | |
1362 | /* Copy the XID from the skb... */ |
1363 | xp = skb_header_pointer(skb, offset: 0, len: sizeof(_xid), buffer: &_xid); |
1364 | if (xp == NULL) |
1365 | return; |
1366 | |
1367 | /* Look up and lock the request corresponding to the given XID */ |
1368 | spin_lock(lock: &xprt->queue_lock); |
1369 | rovr = xprt_lookup_rqst(xprt, xid: *xp); |
1370 | if (!rovr) |
1371 | goto out_unlock; |
1372 | xprt_pin_rqst(req: rovr); |
1373 | xprt_update_rtt(task: rovr->rq_task); |
1374 | spin_unlock(lock: &xprt->queue_lock); |
1375 | task = rovr->rq_task; |
1376 | |
1377 | if ((copied = rovr->rq_private_buf.buflen) > repsize) |
1378 | copied = repsize; |
1379 | |
1380 | /* Suck it into the iovec, verify checksum if not done by hw. */ |
1381 | if (csum_partial_copy_to_xdr(xdr: &rovr->rq_private_buf, skb)) { |
1382 | spin_lock(lock: &xprt->queue_lock); |
1383 | __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); |
1384 | goto out_unpin; |
1385 | } |
1386 | |
1387 | |
1388 | spin_lock(lock: &xprt->transport_lock); |
1389 | xprt_adjust_cwnd(xprt, task, result: copied); |
1390 | spin_unlock(lock: &xprt->transport_lock); |
1391 | spin_lock(lock: &xprt->queue_lock); |
1392 | xprt_complete_rqst(task, copied); |
1393 | __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); |
1394 | out_unpin: |
1395 | xprt_unpin_rqst(req: rovr); |
1396 | out_unlock: |
1397 | spin_unlock(lock: &xprt->queue_lock); |
1398 | } |
1399 | |
1400 | static void xs_udp_data_receive(struct sock_xprt *transport) |
1401 | { |
1402 | struct sk_buff *skb; |
1403 | struct sock *sk; |
1404 | int err; |
1405 | |
1406 | mutex_lock(&transport->recv_mutex); |
1407 | sk = transport->inet; |
1408 | if (sk == NULL) |
1409 | goto out; |
1410 | for (;;) { |
1411 | skb = skb_recv_udp(sk, MSG_DONTWAIT, err: &err); |
1412 | if (skb == NULL) |
1413 | break; |
1414 | xs_udp_data_read_skb(xprt: &transport->xprt, sk, skb); |
1415 | consume_skb(skb); |
1416 | cond_resched(); |
1417 | } |
1418 | xs_poll_check_readable(transport); |
1419 | out: |
1420 | mutex_unlock(lock: &transport->recv_mutex); |
1421 | } |
1422 | |
1423 | static void xs_udp_data_receive_workfn(struct work_struct *work) |
1424 | { |
1425 | struct sock_xprt *transport = |
1426 | container_of(work, struct sock_xprt, recv_worker); |
1427 | unsigned int pflags = memalloc_nofs_save(); |
1428 | |
1429 | xs_udp_data_receive(transport); |
1430 | memalloc_nofs_restore(flags: pflags); |
1431 | } |
1432 | |
1433 | /** |
1434 | * xs_data_ready - "data ready" callback for sockets |
1435 | * @sk: socket with data to read |
1436 | * |
1437 | */ |
1438 | static void xs_data_ready(struct sock *sk) |
1439 | { |
1440 | struct rpc_xprt *xprt; |
1441 | |
1442 | trace_sk_data_ready(sk); |
1443 | |
1444 | xprt = xprt_from_sock(sk); |
1445 | if (xprt != NULL) { |
1446 | struct sock_xprt *transport = container_of(xprt, |
1447 | struct sock_xprt, xprt); |
1448 | |
1449 | trace_xs_data_ready(xprt); |
1450 | |
1451 | transport->old_data_ready(sk); |
1452 | |
1453 | if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) |
1454 | return; |
1455 | |
1456 | /* Any data means we had a useful conversation, so |
1457 | * then we don't need to delay the next reconnect |
1458 | */ |
1459 | if (xprt->reestablish_timeout) |
1460 | xprt->reestablish_timeout = 0; |
1461 | if (!test_and_set_bit(XPRT_SOCK_DATA_READY, addr: &transport->sock_state)) |
1462 | queue_work(wq: xprtiod_workqueue, work: &transport->recv_worker); |
1463 | } |
1464 | } |
1465 | |
1466 | /* |
1467 | * Helper function to force a TCP close if the server is sending |
1468 | * junk and/or it has put us in CLOSE_WAIT |
1469 | */ |
1470 | static void xs_tcp_force_close(struct rpc_xprt *xprt) |
1471 | { |
1472 | xprt_force_disconnect(xprt); |
1473 | } |
1474 | |
1475 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
1476 | static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt) |
1477 | { |
1478 | return PAGE_SIZE; |
1479 | } |
1480 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
1481 | |
1482 | /** |
1483 | * xs_local_state_change - callback to handle AF_LOCAL socket state changes |
1484 | * @sk: socket whose state has changed |
1485 | * |
1486 | */ |
1487 | static void xs_local_state_change(struct sock *sk) |
1488 | { |
1489 | struct rpc_xprt *xprt; |
1490 | struct sock_xprt *transport; |
1491 | |
1492 | if (!(xprt = xprt_from_sock(sk))) |
1493 | return; |
1494 | transport = container_of(xprt, struct sock_xprt, xprt); |
1495 | if (sk->sk_shutdown & SHUTDOWN_MASK) { |
1496 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1497 | /* Trigger the socket release */ |
1498 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); |
1499 | } |
1500 | } |
1501 | |
1502 | /** |
1503 | * xs_tcp_state_change - callback to handle TCP socket state changes |
1504 | * @sk: socket whose state has changed |
1505 | * |
1506 | */ |
1507 | static void xs_tcp_state_change(struct sock *sk) |
1508 | { |
1509 | struct rpc_xprt *xprt; |
1510 | struct sock_xprt *transport; |
1511 | |
1512 | if (!(xprt = xprt_from_sock(sk))) |
1513 | return; |
1514 | dprintk("RPC: xs_tcp_state_change client %p...\n" , xprt); |
1515 | dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n" , |
1516 | sk->sk_state, xprt_connected(xprt), |
1517 | sock_flag(sk, SOCK_DEAD), |
1518 | sock_flag(sk, SOCK_ZAPPED), |
1519 | sk->sk_shutdown); |
1520 | |
1521 | transport = container_of(xprt, struct sock_xprt, xprt); |
1522 | trace_rpc_socket_state_change(xprt, socket: sk->sk_socket); |
1523 | switch (sk->sk_state) { |
1524 | case TCP_ESTABLISHED: |
1525 | if (!xprt_test_and_set_connected(xprt)) { |
1526 | xprt->connect_cookie++; |
1527 | clear_bit(XPRT_SOCK_CONNECTING, addr: &transport->sock_state); |
1528 | xprt_clear_connecting(xprt); |
1529 | |
1530 | xprt->stat.connect_count++; |
1531 | xprt->stat.connect_time += (long)jiffies - |
1532 | xprt->stat.connect_start; |
1533 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING); |
1534 | } |
1535 | break; |
1536 | case TCP_FIN_WAIT1: |
1537 | /* The client initiated a shutdown of the socket */ |
1538 | xprt->connect_cookie++; |
1539 | xprt->reestablish_timeout = 0; |
1540 | set_bit(XPRT_CLOSING, addr: &xprt->state); |
1541 | smp_mb__before_atomic(); |
1542 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1543 | clear_bit(XPRT_CLOSE_WAIT, addr: &xprt->state); |
1544 | smp_mb__after_atomic(); |
1545 | break; |
1546 | case TCP_CLOSE_WAIT: |
1547 | /* The server initiated a shutdown of the socket */ |
1548 | xprt->connect_cookie++; |
1549 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1550 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); |
1551 | fallthrough; |
1552 | case TCP_CLOSING: |
1553 | /* |
1554 | * If the server closed down the connection, make sure that |
1555 | * we back off before reconnecting |
1556 | */ |
1557 | if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) |
1558 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
1559 | break; |
1560 | case TCP_LAST_ACK: |
1561 | set_bit(XPRT_CLOSING, addr: &xprt->state); |
1562 | smp_mb__before_atomic(); |
1563 | clear_bit(XPRT_CONNECTED, addr: &xprt->state); |
1564 | smp_mb__after_atomic(); |
1565 | break; |
1566 | case TCP_CLOSE: |
1567 | if (test_and_clear_bit(XPRT_SOCK_CONNECTING, |
1568 | addr: &transport->sock_state)) |
1569 | xprt_clear_connecting(xprt); |
1570 | clear_bit(XPRT_CLOSING, addr: &xprt->state); |
1571 | /* Trigger the socket release */ |
1572 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT); |
1573 | } |
1574 | } |
1575 | |
1576 | static void xs_write_space(struct sock *sk) |
1577 | { |
1578 | struct sock_xprt *transport; |
1579 | struct rpc_xprt *xprt; |
1580 | |
1581 | if (!sk->sk_socket) |
1582 | return; |
1583 | clear_bit(SOCK_NOSPACE, addr: &sk->sk_socket->flags); |
1584 | |
1585 | if (unlikely(!(xprt = xprt_from_sock(sk)))) |
1586 | return; |
1587 | transport = container_of(xprt, struct sock_xprt, xprt); |
1588 | if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, addr: &transport->sock_state)) |
1589 | return; |
1590 | xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); |
1591 | sk->sk_write_pending--; |
1592 | } |
1593 | |
1594 | /** |
1595 | * xs_udp_write_space - callback invoked when socket buffer space |
1596 | * becomes available |
1597 | * @sk: socket whose state has changed |
1598 | * |
1599 | * Called when more output buffer space is available for this socket. |
1600 | * We try not to wake our writers until they can make "significant" |
1601 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg |
1602 | * with a bunch of small requests. |
1603 | */ |
1604 | static void xs_udp_write_space(struct sock *sk) |
1605 | { |
1606 | /* from net/core/sock.c:sock_def_write_space */ |
1607 | if (sock_writeable(sk)) |
1608 | xs_write_space(sk); |
1609 | } |
1610 | |
1611 | /** |
1612 | * xs_tcp_write_space - callback invoked when socket buffer space |
1613 | * becomes available |
1614 | * @sk: socket whose state has changed |
1615 | * |
1616 | * Called when more output buffer space is available for this socket. |
1617 | * We try not to wake our writers until they can make "significant" |
1618 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg |
1619 | * with a bunch of small requests. |
1620 | */ |
1621 | static void xs_tcp_write_space(struct sock *sk) |
1622 | { |
1623 | /* from net/core/stream.c:sk_stream_write_space */ |
1624 | if (sk_stream_is_writeable(sk)) |
1625 | xs_write_space(sk); |
1626 | } |
1627 | |
1628 | static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) |
1629 | { |
1630 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1631 | struct sock *sk = transport->inet; |
1632 | |
1633 | if (transport->rcvsize) { |
1634 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
1635 | sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2; |
1636 | } |
1637 | if (transport->sndsize) { |
1638 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
1639 | sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2; |
1640 | sk->sk_write_space(sk); |
1641 | } |
1642 | } |
1643 | |
1644 | /** |
1645 | * xs_udp_set_buffer_size - set send and receive limits |
1646 | * @xprt: generic transport |
1647 | * @sndsize: requested size of send buffer, in bytes |
1648 | * @rcvsize: requested size of receive buffer, in bytes |
1649 | * |
1650 | * Set socket send and receive buffer size limits. |
1651 | */ |
1652 | static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) |
1653 | { |
1654 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
1655 | |
1656 | transport->sndsize = 0; |
1657 | if (sndsize) |
1658 | transport->sndsize = sndsize + 1024; |
1659 | transport->rcvsize = 0; |
1660 | if (rcvsize) |
1661 | transport->rcvsize = rcvsize + 1024; |
1662 | |
1663 | xs_udp_do_set_buffer_size(xprt); |
1664 | } |
1665 | |
1666 | /** |
1667 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport |
1668 | * @xprt: controlling transport |
1669 | * @task: task that timed out |
1670 | * |
1671 | * Adjust the congestion window after a retransmit timeout has occurred. |
1672 | */ |
1673 | static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) |
1674 | { |
1675 | spin_lock(lock: &xprt->transport_lock); |
1676 | xprt_adjust_cwnd(xprt, task, result: -ETIMEDOUT); |
1677 | spin_unlock(lock: &xprt->transport_lock); |
1678 | } |
1679 | |
1680 | static int xs_get_random_port(void) |
1681 | { |
1682 | unsigned short min = xprt_min_resvport, max = xprt_max_resvport; |
1683 | unsigned short range; |
1684 | unsigned short rand; |
1685 | |
1686 | if (max < min) |
1687 | return -EADDRINUSE; |
1688 | range = max - min + 1; |
1689 | rand = get_random_u32_below(ceil: range); |
1690 | return rand + min; |
1691 | } |
1692 | |
1693 | static unsigned short xs_sock_getport(struct socket *sock) |
1694 | { |
1695 | struct sockaddr_storage buf; |
1696 | unsigned short port = 0; |
1697 | |
1698 | if (kernel_getsockname(sock, addr: (struct sockaddr *)&buf) < 0) |
1699 | goto out; |
1700 | switch (buf.ss_family) { |
1701 | case AF_INET6: |
1702 | port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port); |
1703 | break; |
1704 | case AF_INET: |
1705 | port = ntohs(((struct sockaddr_in *)&buf)->sin_port); |
1706 | } |
1707 | out: |
1708 | return port; |
1709 | } |
1710 | |
1711 | /** |
1712 | * xs_set_port - reset the port number in the remote endpoint address |
1713 | * @xprt: generic transport |
1714 | * @port: new port number |
1715 | * |
1716 | */ |
1717 | static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) |
1718 | { |
1719 | dprintk("RPC: setting port for xprt %p to %u\n" , xprt, port); |
1720 | |
1721 | rpc_set_port(sap: xs_addr(xprt), port); |
1722 | xs_update_peer_port(xprt); |
1723 | } |
1724 | |
1725 | static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) |
1726 | { |
1727 | if (transport->srcport == 0 && transport->xprt.reuseport) |
1728 | transport->srcport = xs_sock_getport(sock); |
1729 | } |
1730 | |
1731 | static int xs_get_srcport(struct sock_xprt *transport) |
1732 | { |
1733 | int port = transport->srcport; |
1734 | |
1735 | if (port == 0 && transport->xprt.resvport) |
1736 | port = xs_get_random_port(); |
1737 | return port; |
1738 | } |
1739 | |
1740 | static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) |
1741 | { |
1742 | struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); |
1743 | unsigned short ret = 0; |
1744 | mutex_lock(&sock->recv_mutex); |
1745 | if (sock->sock) |
1746 | ret = xs_sock_getport(sock: sock->sock); |
1747 | mutex_unlock(lock: &sock->recv_mutex); |
1748 | return ret; |
1749 | } |
1750 | |
1751 | static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) |
1752 | { |
1753 | struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); |
1754 | union { |
1755 | struct sockaddr sa; |
1756 | struct sockaddr_storage st; |
1757 | } saddr; |
1758 | int ret = -ENOTCONN; |
1759 | |
1760 | mutex_lock(&sock->recv_mutex); |
1761 | if (sock->sock) { |
1762 | ret = kernel_getsockname(sock: sock->sock, addr: &saddr.sa); |
1763 | if (ret >= 0) |
1764 | ret = snprintf(buf, size: buflen, fmt: "%pISc" , &saddr.sa); |
1765 | } |
1766 | mutex_unlock(lock: &sock->recv_mutex); |
1767 | return ret; |
1768 | } |
1769 | |
1770 | static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) |
1771 | { |
1772 | if (transport->srcport != 0) |
1773 | transport->srcport = 0; |
1774 | if (!transport->xprt.resvport) |
1775 | return 0; |
1776 | if (port <= xprt_min_resvport || port > xprt_max_resvport) |
1777 | return xprt_max_resvport; |
1778 | return --port; |
1779 | } |
1780 | static int xs_bind(struct sock_xprt *transport, struct socket *sock) |
1781 | { |
1782 | struct sockaddr_storage myaddr; |
1783 | int err, nloop = 0; |
1784 | int port = xs_get_srcport(transport); |
1785 | unsigned short last; |
1786 | |
1787 | /* |
1788 | * If we are asking for any ephemeral port (i.e. port == 0 && |
1789 | * transport->xprt.resvport == 0), don't bind. Let the local |
1790 | * port selection happen implicitly when the socket is used |
1791 | * (for example at connect time). |
1792 | * |
1793 | * This ensures that we can continue to establish TCP |
1794 | * connections even when all local ephemeral ports are already |
1795 | * a part of some TCP connection. This makes no difference |
1796 | * for UDP sockets, but also doesn't harm them. |
1797 | * |
1798 | * If we're asking for any reserved port (i.e. port == 0 && |
1799 | * transport->xprt.resvport == 1) xs_get_srcport above will |
1800 | * ensure that port is non-zero and we will bind as needed. |
1801 | */ |
1802 | if (port <= 0) |
1803 | return port; |
1804 | |
1805 | memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); |
1806 | do { |
1807 | rpc_set_port(sap: (struct sockaddr *)&myaddr, port); |
1808 | err = kernel_bind(sock, addr: (struct sockaddr *)&myaddr, |
1809 | addrlen: transport->xprt.addrlen); |
1810 | if (err == 0) { |
1811 | if (transport->xprt.reuseport) |
1812 | transport->srcport = port; |
1813 | break; |
1814 | } |
1815 | last = port; |
1816 | port = xs_next_srcport(transport, port); |
1817 | if (port > last) |
1818 | nloop++; |
1819 | } while (err == -EADDRINUSE && nloop != 2); |
1820 | |
1821 | if (myaddr.ss_family == AF_INET) |
1822 | dprintk("RPC: %s %pI4:%u: %s (%d)\n" , __func__, |
1823 | &((struct sockaddr_in *)&myaddr)->sin_addr, |
1824 | port, err ? "failed" : "ok" , err); |
1825 | else |
1826 | dprintk("RPC: %s %pI6:%u: %s (%d)\n" , __func__, |
1827 | &((struct sockaddr_in6 *)&myaddr)->sin6_addr, |
1828 | port, err ? "failed" : "ok" , err); |
1829 | return err; |
1830 | } |
1831 | |
1832 | /* |
1833 | * We don't support autobind on AF_LOCAL sockets |
1834 | */ |
1835 | static void xs_local_rpcbind(struct rpc_task *task) |
1836 | { |
1837 | xprt_set_bound(xprt: task->tk_xprt); |
1838 | } |
1839 | |
1840 | static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) |
1841 | { |
1842 | } |
1843 | |
1844 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
1845 | static struct lock_class_key xs_key[3]; |
1846 | static struct lock_class_key xs_slock_key[3]; |
1847 | |
1848 | static inline void xs_reclassify_socketu(struct socket *sock) |
1849 | { |
1850 | struct sock *sk = sock->sk; |
1851 | |
1852 | sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC" , |
1853 | &xs_slock_key[0], "sk_lock-AF_LOCAL-RPC" , &xs_key[0]); |
1854 | } |
1855 | |
1856 | static inline void xs_reclassify_socket4(struct socket *sock) |
1857 | { |
1858 | struct sock *sk = sock->sk; |
1859 | |
1860 | sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC" , |
1861 | &xs_slock_key[1], "sk_lock-AF_INET-RPC" , &xs_key[1]); |
1862 | } |
1863 | |
1864 | static inline void xs_reclassify_socket6(struct socket *sock) |
1865 | { |
1866 | struct sock *sk = sock->sk; |
1867 | |
1868 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC" , |
1869 | &xs_slock_key[2], "sk_lock-AF_INET6-RPC" , &xs_key[2]); |
1870 | } |
1871 | |
1872 | static inline void xs_reclassify_socket(int family, struct socket *sock) |
1873 | { |
1874 | if (WARN_ON_ONCE(!sock_allow_reclassification(sock->sk))) |
1875 | return; |
1876 | |
1877 | switch (family) { |
1878 | case AF_LOCAL: |
1879 | xs_reclassify_socketu(sock); |
1880 | break; |
1881 | case AF_INET: |
1882 | xs_reclassify_socket4(sock); |
1883 | break; |
1884 | case AF_INET6: |
1885 | xs_reclassify_socket6(sock); |
1886 | break; |
1887 | } |
1888 | } |
1889 | #else |
1890 | static inline void xs_reclassify_socket(int family, struct socket *sock) |
1891 | { |
1892 | } |
1893 | #endif |
1894 | |
1895 | static void xs_dummy_setup_socket(struct work_struct *work) |
1896 | { |
1897 | } |
1898 | |
1899 | static struct socket *xs_create_sock(struct rpc_xprt *xprt, |
1900 | struct sock_xprt *transport, int family, int type, |
1901 | int protocol, bool reuseport) |
1902 | { |
1903 | struct file *filp; |
1904 | struct socket *sock; |
1905 | int err; |
1906 | |
1907 | err = __sock_create(net: xprt->xprt_net, family, type, proto: protocol, res: &sock, kern: 1); |
1908 | if (err < 0) { |
1909 | dprintk("RPC: can't create %d transport socket (%d).\n" , |
1910 | protocol, -err); |
1911 | goto out; |
1912 | } |
1913 | xs_reclassify_socket(family, sock); |
1914 | |
1915 | if (reuseport) |
1916 | sock_set_reuseport(sk: sock->sk); |
1917 | |
1918 | err = xs_bind(transport, sock); |
1919 | if (err) { |
1920 | sock_release(sock); |
1921 | goto out; |
1922 | } |
1923 | |
1924 | filp = sock_alloc_file(sock, O_NONBLOCK, NULL); |
1925 | if (IS_ERR(ptr: filp)) |
1926 | return ERR_CAST(ptr: filp); |
1927 | transport->file = filp; |
1928 | |
1929 | return sock; |
1930 | out: |
1931 | return ERR_PTR(error: err); |
1932 | } |
1933 | |
1934 | static int xs_local_finish_connecting(struct rpc_xprt *xprt, |
1935 | struct socket *sock) |
1936 | { |
1937 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, |
1938 | xprt); |
1939 | |
1940 | if (!transport->inet) { |
1941 | struct sock *sk = sock->sk; |
1942 | |
1943 | lock_sock(sk); |
1944 | |
1945 | xs_save_old_callbacks(transport, sk); |
1946 | |
1947 | sk->sk_user_data = xprt; |
1948 | sk->sk_data_ready = xs_data_ready; |
1949 | sk->sk_write_space = xs_udp_write_space; |
1950 | sk->sk_state_change = xs_local_state_change; |
1951 | sk->sk_error_report = xs_error_report; |
1952 | sk->sk_use_task_frag = false; |
1953 | |
1954 | xprt_clear_connected(xprt); |
1955 | |
1956 | /* Reset to new socket */ |
1957 | transport->sock = sock; |
1958 | transport->inet = sk; |
1959 | |
1960 | release_sock(sk); |
1961 | } |
1962 | |
1963 | xs_stream_start_connect(transport); |
1964 | |
1965 | return kernel_connect(sock, addr: xs_addr(xprt), addrlen: xprt->addrlen, flags: 0); |
1966 | } |
1967 | |
1968 | /** |
1969 | * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint |
1970 | * @transport: socket transport to connect |
1971 | */ |
1972 | static int xs_local_setup_socket(struct sock_xprt *transport) |
1973 | { |
1974 | struct rpc_xprt *xprt = &transport->xprt; |
1975 | struct file *filp; |
1976 | struct socket *sock; |
1977 | int status; |
1978 | |
1979 | status = __sock_create(net: xprt->xprt_net, AF_LOCAL, |
1980 | type: SOCK_STREAM, proto: 0, res: &sock, kern: 1); |
1981 | if (status < 0) { |
1982 | dprintk("RPC: can't create AF_LOCAL " |
1983 | "transport socket (%d).\n" , -status); |
1984 | goto out; |
1985 | } |
1986 | xs_reclassify_socket(AF_LOCAL, sock); |
1987 | |
1988 | filp = sock_alloc_file(sock, O_NONBLOCK, NULL); |
1989 | if (IS_ERR(ptr: filp)) { |
1990 | status = PTR_ERR(ptr: filp); |
1991 | goto out; |
1992 | } |
1993 | transport->file = filp; |
1994 | |
1995 | dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n" , |
1996 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
1997 | |
1998 | status = xs_local_finish_connecting(xprt, sock); |
1999 | trace_rpc_socket_connect(xprt, socket: sock, error: status); |
2000 | switch (status) { |
2001 | case 0: |
2002 | dprintk("RPC: xprt %p connected to %s\n" , |
2003 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2004 | xprt->stat.connect_count++; |
2005 | xprt->stat.connect_time += (long)jiffies - |
2006 | xprt->stat.connect_start; |
2007 | xprt_set_connected(xprt); |
2008 | break; |
2009 | case -ENOBUFS: |
2010 | break; |
2011 | case -ENOENT: |
2012 | dprintk("RPC: xprt %p: socket %s does not exist\n" , |
2013 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2014 | break; |
2015 | case -ECONNREFUSED: |
2016 | dprintk("RPC: xprt %p: connection refused for %s\n" , |
2017 | xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); |
2018 | break; |
2019 | default: |
2020 | printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n" , |
2021 | __func__, -status, |
2022 | xprt->address_strings[RPC_DISPLAY_ADDR]); |
2023 | } |
2024 | |
2025 | out: |
2026 | xprt_clear_connecting(xprt); |
2027 | xprt_wake_pending_tasks(xprt, status); |
2028 | return status; |
2029 | } |
2030 | |
2031 | static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
2032 | { |
2033 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2034 | int ret; |
2035 | |
2036 | if (transport->file) |
2037 | goto force_disconnect; |
2038 | |
2039 | if (RPC_IS_ASYNC(task)) { |
2040 | /* |
2041 | * We want the AF_LOCAL connect to be resolved in the |
2042 | * filesystem namespace of the process making the rpc |
2043 | * call. Thus we connect synchronously. |
2044 | * |
2045 | * If we want to support asynchronous AF_LOCAL calls, |
2046 | * we'll need to figure out how to pass a namespace to |
2047 | * connect. |
2048 | */ |
2049 | rpc_task_set_rpc_status(task, rpc_status: -ENOTCONN); |
2050 | goto out_wake; |
2051 | } |
2052 | ret = xs_local_setup_socket(transport); |
2053 | if (ret && !RPC_IS_SOFTCONN(task)) |
2054 | msleep_interruptible(msecs: 15000); |
2055 | return; |
2056 | force_disconnect: |
2057 | xprt_force_disconnect(xprt); |
2058 | out_wake: |
2059 | xprt_clear_connecting(xprt); |
2060 | xprt_wake_pending_tasks(xprt, status: -ENOTCONN); |
2061 | } |
2062 | |
2063 | #if IS_ENABLED(CONFIG_SUNRPC_SWAP) |
2064 | /* |
2065 | * Note that this should be called with XPRT_LOCKED held, or recv_mutex |
2066 | * held, or when we otherwise know that we have exclusive access to the |
2067 | * socket, to guard against races with xs_reset_transport. |
2068 | */ |
2069 | static void xs_set_memalloc(struct rpc_xprt *xprt) |
2070 | { |
2071 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, |
2072 | xprt); |
2073 | |
2074 | /* |
2075 | * If there's no sock, then we have nothing to set. The |
2076 | * reconnecting process will get it for us. |
2077 | */ |
2078 | if (!transport->inet) |
2079 | return; |
2080 | if (atomic_read(v: &xprt->swapper)) |
2081 | sk_set_memalloc(sk: transport->inet); |
2082 | } |
2083 | |
2084 | /** |
2085 | * xs_enable_swap - Tag this transport as being used for swap. |
2086 | * @xprt: transport to tag |
2087 | * |
2088 | * Take a reference to this transport on behalf of the rpc_clnt, and |
2089 | * optionally mark it for swapping if it wasn't already. |
2090 | */ |
2091 | static int |
2092 | xs_enable_swap(struct rpc_xprt *xprt) |
2093 | { |
2094 | struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); |
2095 | |
2096 | mutex_lock(&xs->recv_mutex); |
2097 | if (atomic_inc_return(v: &xprt->swapper) == 1 && |
2098 | xs->inet) |
2099 | sk_set_memalloc(sk: xs->inet); |
2100 | mutex_unlock(lock: &xs->recv_mutex); |
2101 | return 0; |
2102 | } |
2103 | |
2104 | /** |
2105 | * xs_disable_swap - Untag this transport as being used for swap. |
2106 | * @xprt: transport to tag |
2107 | * |
2108 | * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the |
2109 | * swapper refcount goes to 0, untag the socket as a memalloc socket. |
2110 | */ |
2111 | static void |
2112 | xs_disable_swap(struct rpc_xprt *xprt) |
2113 | { |
2114 | struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); |
2115 | |
2116 | mutex_lock(&xs->recv_mutex); |
2117 | if (atomic_dec_and_test(v: &xprt->swapper) && |
2118 | xs->inet) |
2119 | sk_clear_memalloc(sk: xs->inet); |
2120 | mutex_unlock(lock: &xs->recv_mutex); |
2121 | } |
2122 | #else |
2123 | static void xs_set_memalloc(struct rpc_xprt *xprt) |
2124 | { |
2125 | } |
2126 | |
2127 | static int |
2128 | xs_enable_swap(struct rpc_xprt *xprt) |
2129 | { |
2130 | return -EINVAL; |
2131 | } |
2132 | |
2133 | static void |
2134 | xs_disable_swap(struct rpc_xprt *xprt) |
2135 | { |
2136 | } |
2137 | #endif |
2138 | |
2139 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
2140 | { |
2141 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2142 | |
2143 | if (!transport->inet) { |
2144 | struct sock *sk = sock->sk; |
2145 | |
2146 | lock_sock(sk); |
2147 | |
2148 | xs_save_old_callbacks(transport, sk); |
2149 | |
2150 | sk->sk_user_data = xprt; |
2151 | sk->sk_data_ready = xs_data_ready; |
2152 | sk->sk_write_space = xs_udp_write_space; |
2153 | sk->sk_use_task_frag = false; |
2154 | |
2155 | xprt_set_connected(xprt); |
2156 | |
2157 | /* Reset to new socket */ |
2158 | transport->sock = sock; |
2159 | transport->inet = sk; |
2160 | |
2161 | xs_set_memalloc(xprt); |
2162 | |
2163 | release_sock(sk); |
2164 | } |
2165 | xs_udp_do_set_buffer_size(xprt); |
2166 | |
2167 | xprt->stat.connect_start = jiffies; |
2168 | } |
2169 | |
2170 | static void xs_udp_setup_socket(struct work_struct *work) |
2171 | { |
2172 | struct sock_xprt *transport = |
2173 | container_of(work, struct sock_xprt, connect_worker.work); |
2174 | struct rpc_xprt *xprt = &transport->xprt; |
2175 | struct socket *sock; |
2176 | int status = -EIO; |
2177 | unsigned int pflags = current->flags; |
2178 | |
2179 | if (atomic_read(v: &xprt->swapper)) |
2180 | current->flags |= PF_MEMALLOC; |
2181 | sock = xs_create_sock(xprt, transport, |
2182 | family: xs_addr(xprt)->sa_family, type: SOCK_DGRAM, |
2183 | IPPROTO_UDP, reuseport: false); |
2184 | if (IS_ERR(ptr: sock)) |
2185 | goto out; |
2186 | |
2187 | dprintk("RPC: worker connecting xprt %p via %s to " |
2188 | "%s (port %s)\n" , xprt, |
2189 | xprt->address_strings[RPC_DISPLAY_PROTO], |
2190 | xprt->address_strings[RPC_DISPLAY_ADDR], |
2191 | xprt->address_strings[RPC_DISPLAY_PORT]); |
2192 | |
2193 | xs_udp_finish_connecting(xprt, sock); |
2194 | trace_rpc_socket_connect(xprt, socket: sock, error: 0); |
2195 | status = 0; |
2196 | out: |
2197 | xprt_clear_connecting(xprt); |
2198 | xprt_unlock_connect(xprt, transport); |
2199 | xprt_wake_pending_tasks(xprt, status); |
2200 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
2201 | } |
2202 | |
2203 | /** |
2204 | * xs_tcp_shutdown - gracefully shut down a TCP socket |
2205 | * @xprt: transport |
2206 | * |
2207 | * Initiates a graceful shutdown of the TCP socket by calling the |
2208 | * equivalent of shutdown(SHUT_RDWR); |
2209 | */ |
2210 | static void xs_tcp_shutdown(struct rpc_xprt *xprt) |
2211 | { |
2212 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2213 | struct socket *sock = transport->sock; |
2214 | int skst = transport->inet ? transport->inet->sk_state : TCP_CLOSE; |
2215 | |
2216 | if (sock == NULL) |
2217 | return; |
2218 | if (!xprt->reuseport) { |
2219 | xs_close(xprt); |
2220 | return; |
2221 | } |
2222 | switch (skst) { |
2223 | case TCP_FIN_WAIT1: |
2224 | case TCP_FIN_WAIT2: |
2225 | case TCP_LAST_ACK: |
2226 | break; |
2227 | case TCP_ESTABLISHED: |
2228 | case TCP_CLOSE_WAIT: |
2229 | kernel_sock_shutdown(sock, how: SHUT_RDWR); |
2230 | trace_rpc_socket_shutdown(xprt, socket: sock); |
2231 | break; |
2232 | default: |
2233 | xs_reset_transport(transport); |
2234 | } |
2235 | } |
2236 | |
2237 | static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, |
2238 | struct socket *sock) |
2239 | { |
2240 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2241 | struct net *net = sock_net(sk: sock->sk); |
2242 | unsigned long connect_timeout; |
2243 | unsigned long syn_retries; |
2244 | unsigned int keepidle; |
2245 | unsigned int keepcnt; |
2246 | unsigned int timeo; |
2247 | unsigned long t; |
2248 | |
2249 | spin_lock(lock: &xprt->transport_lock); |
2250 | keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); |
2251 | keepcnt = xprt->timeout->to_retries + 1; |
2252 | timeo = jiffies_to_msecs(j: xprt->timeout->to_initval) * |
2253 | (xprt->timeout->to_retries + 1); |
2254 | clear_bit(XPRT_SOCK_UPD_TIMEOUT, addr: &transport->sock_state); |
2255 | spin_unlock(lock: &xprt->transport_lock); |
2256 | |
2257 | /* TCP Keepalive options */ |
2258 | sock_set_keepalive(sk: sock->sk); |
2259 | tcp_sock_set_keepidle(sk: sock->sk, val: keepidle); |
2260 | tcp_sock_set_keepintvl(sk: sock->sk, val: keepidle); |
2261 | tcp_sock_set_keepcnt(sk: sock->sk, val: keepcnt); |
2262 | |
2263 | /* TCP user timeout (see RFC5482) */ |
2264 | tcp_sock_set_user_timeout(sk: sock->sk, val: timeo); |
2265 | |
2266 | /* Connect timeout */ |
2267 | connect_timeout = max_t(unsigned long, |
2268 | DIV_ROUND_UP(xprt->connect_timeout, HZ), 1); |
2269 | syn_retries = max_t(unsigned long, |
2270 | READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1); |
2271 | for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++) |
2272 | ; |
2273 | if (t <= syn_retries) |
2274 | tcp_sock_set_syncnt(sk: sock->sk, val: t - 1); |
2275 | } |
2276 | |
2277 | static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt, |
2278 | unsigned long connect_timeout) |
2279 | { |
2280 | struct sock_xprt *transport = |
2281 | container_of(xprt, struct sock_xprt, xprt); |
2282 | struct rpc_timeout to; |
2283 | unsigned long initval; |
2284 | |
2285 | memcpy(&to, xprt->timeout, sizeof(to)); |
2286 | /* Arbitrary lower limit */ |
2287 | initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO); |
2288 | to.to_initval = initval; |
2289 | to.to_maxval = initval; |
2290 | to.to_retries = 0; |
2291 | memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout)); |
2292 | xprt->timeout = &transport->tcp_timeout; |
2293 | xprt->connect_timeout = connect_timeout; |
2294 | } |
2295 | |
2296 | static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, |
2297 | unsigned long connect_timeout, |
2298 | unsigned long reconnect_timeout) |
2299 | { |
2300 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2301 | |
2302 | spin_lock(lock: &xprt->transport_lock); |
2303 | if (reconnect_timeout < xprt->max_reconnect_timeout) |
2304 | xprt->max_reconnect_timeout = reconnect_timeout; |
2305 | if (connect_timeout < xprt->connect_timeout) |
2306 | xs_tcp_do_set_connect_timeout(xprt, connect_timeout); |
2307 | set_bit(XPRT_SOCK_UPD_TIMEOUT, addr: &transport->sock_state); |
2308 | spin_unlock(lock: &xprt->transport_lock); |
2309 | } |
2310 | |
2311 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
2312 | { |
2313 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2314 | |
2315 | if (!transport->inet) { |
2316 | struct sock *sk = sock->sk; |
2317 | |
2318 | /* Avoid temporary address, they are bad for long-lived |
2319 | * connections such as NFS mounts. |
2320 | * RFC4941, section 3.6 suggests that: |
2321 | * Individual applications, which have specific |
2322 | * knowledge about the normal duration of connections, |
2323 | * MAY override this as appropriate. |
2324 | */ |
2325 | if (xs_addr(xprt)->sa_family == PF_INET6) { |
2326 | ip6_sock_set_addr_preferences(sk, |
2327 | IPV6_PREFER_SRC_PUBLIC); |
2328 | } |
2329 | |
2330 | xs_tcp_set_socket_timeouts(xprt, sock); |
2331 | tcp_sock_set_nodelay(sk); |
2332 | |
2333 | lock_sock(sk); |
2334 | |
2335 | xs_save_old_callbacks(transport, sk); |
2336 | |
2337 | sk->sk_user_data = xprt; |
2338 | sk->sk_data_ready = xs_data_ready; |
2339 | sk->sk_state_change = xs_tcp_state_change; |
2340 | sk->sk_write_space = xs_tcp_write_space; |
2341 | sk->sk_error_report = xs_error_report; |
2342 | sk->sk_use_task_frag = false; |
2343 | |
2344 | /* socket options */ |
2345 | sock_reset_flag(sk, flag: SOCK_LINGER); |
2346 | |
2347 | xprt_clear_connected(xprt); |
2348 | |
2349 | /* Reset to new socket */ |
2350 | transport->sock = sock; |
2351 | transport->inet = sk; |
2352 | |
2353 | release_sock(sk); |
2354 | } |
2355 | |
2356 | if (!xprt_bound(xprt)) |
2357 | return -ENOTCONN; |
2358 | |
2359 | xs_set_memalloc(xprt); |
2360 | |
2361 | xs_stream_start_connect(transport); |
2362 | |
2363 | /* Tell the socket layer to start connecting... */ |
2364 | set_bit(XPRT_SOCK_CONNECTING, addr: &transport->sock_state); |
2365 | return kernel_connect(sock, addr: xs_addr(xprt), addrlen: xprt->addrlen, O_NONBLOCK); |
2366 | } |
2367 | |
2368 | /** |
2369 | * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint |
2370 | * @work: queued work item |
2371 | * |
2372 | * Invoked by a work queue tasklet. |
2373 | */ |
2374 | static void xs_tcp_setup_socket(struct work_struct *work) |
2375 | { |
2376 | struct sock_xprt *transport = |
2377 | container_of(work, struct sock_xprt, connect_worker.work); |
2378 | struct socket *sock = transport->sock; |
2379 | struct rpc_xprt *xprt = &transport->xprt; |
2380 | int status; |
2381 | unsigned int pflags = current->flags; |
2382 | |
2383 | if (atomic_read(v: &xprt->swapper)) |
2384 | current->flags |= PF_MEMALLOC; |
2385 | |
2386 | if (xprt_connected(xprt)) |
2387 | goto out; |
2388 | if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, |
2389 | addr: &transport->sock_state) || |
2390 | !sock) { |
2391 | xs_reset_transport(transport); |
2392 | sock = xs_create_sock(xprt, transport, family: xs_addr(xprt)->sa_family, |
2393 | type: SOCK_STREAM, IPPROTO_TCP, reuseport: true); |
2394 | if (IS_ERR(ptr: sock)) { |
2395 | xprt_wake_pending_tasks(xprt, status: PTR_ERR(ptr: sock)); |
2396 | goto out; |
2397 | } |
2398 | } |
2399 | |
2400 | dprintk("RPC: worker connecting xprt %p via %s to " |
2401 | "%s (port %s)\n" , xprt, |
2402 | xprt->address_strings[RPC_DISPLAY_PROTO], |
2403 | xprt->address_strings[RPC_DISPLAY_ADDR], |
2404 | xprt->address_strings[RPC_DISPLAY_PORT]); |
2405 | |
2406 | status = xs_tcp_finish_connecting(xprt, sock); |
2407 | trace_rpc_socket_connect(xprt, socket: sock, error: status); |
2408 | dprintk("RPC: %p connect status %d connected %d sock state %d\n" , |
2409 | xprt, -status, xprt_connected(xprt), |
2410 | sock->sk->sk_state); |
2411 | switch (status) { |
2412 | case 0: |
2413 | case -EINPROGRESS: |
2414 | /* SYN_SENT! */ |
2415 | set_bit(XPRT_SOCK_CONNECT_SENT, addr: &transport->sock_state); |
2416 | if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) |
2417 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
2418 | fallthrough; |
2419 | case -EALREADY: |
2420 | goto out_unlock; |
2421 | case -EADDRNOTAVAIL: |
2422 | /* Source port number is unavailable. Try a new one! */ |
2423 | transport->srcport = 0; |
2424 | status = -EAGAIN; |
2425 | break; |
2426 | case -EINVAL: |
2427 | /* Happens, for instance, if the user specified a link |
2428 | * local IPv6 address without a scope-id. |
2429 | */ |
2430 | case -ECONNREFUSED: |
2431 | case -ECONNRESET: |
2432 | case -ENETDOWN: |
2433 | case -ENETUNREACH: |
2434 | case -EHOSTUNREACH: |
2435 | case -EADDRINUSE: |
2436 | case -ENOBUFS: |
2437 | break; |
2438 | default: |
2439 | printk("%s: connect returned unhandled error %d\n" , |
2440 | __func__, status); |
2441 | status = -EAGAIN; |
2442 | } |
2443 | |
2444 | /* xs_tcp_force_close() wakes tasks with a fixed error code. |
2445 | * We need to wake them first to ensure the correct error code. |
2446 | */ |
2447 | xprt_wake_pending_tasks(xprt, status); |
2448 | xs_tcp_force_close(xprt); |
2449 | out: |
2450 | xprt_clear_connecting(xprt); |
2451 | out_unlock: |
2452 | xprt_unlock_connect(xprt, transport); |
2453 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
2454 | } |
2455 | |
2456 | /* |
2457 | * Transfer the connected socket to @upper_transport, then mark that |
2458 | * xprt CONNECTED. |
2459 | */ |
2460 | static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt, |
2461 | struct sock_xprt *upper_transport) |
2462 | { |
2463 | struct sock_xprt *lower_transport = |
2464 | container_of(lower_xprt, struct sock_xprt, xprt); |
2465 | struct rpc_xprt *upper_xprt = &upper_transport->xprt; |
2466 | |
2467 | if (!upper_transport->inet) { |
2468 | struct socket *sock = lower_transport->sock; |
2469 | struct sock *sk = sock->sk; |
2470 | |
2471 | /* Avoid temporary address, they are bad for long-lived |
2472 | * connections such as NFS mounts. |
2473 | * RFC4941, section 3.6 suggests that: |
2474 | * Individual applications, which have specific |
2475 | * knowledge about the normal duration of connections, |
2476 | * MAY override this as appropriate. |
2477 | */ |
2478 | if (xs_addr(xprt: upper_xprt)->sa_family == PF_INET6) |
2479 | ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC); |
2480 | |
2481 | xs_tcp_set_socket_timeouts(xprt: upper_xprt, sock); |
2482 | tcp_sock_set_nodelay(sk); |
2483 | |
2484 | lock_sock(sk); |
2485 | |
2486 | /* @sk is already connected, so it now has the RPC callbacks. |
2487 | * Reach into @lower_transport to save the original ones. |
2488 | */ |
2489 | upper_transport->old_data_ready = lower_transport->old_data_ready; |
2490 | upper_transport->old_state_change = lower_transport->old_state_change; |
2491 | upper_transport->old_write_space = lower_transport->old_write_space; |
2492 | upper_transport->old_error_report = lower_transport->old_error_report; |
2493 | sk->sk_user_data = upper_xprt; |
2494 | |
2495 | /* socket options */ |
2496 | sock_reset_flag(sk, flag: SOCK_LINGER); |
2497 | |
2498 | xprt_clear_connected(xprt: upper_xprt); |
2499 | |
2500 | upper_transport->sock = sock; |
2501 | upper_transport->inet = sk; |
2502 | upper_transport->file = lower_transport->file; |
2503 | |
2504 | release_sock(sk); |
2505 | |
2506 | /* Reset lower_transport before shutting down its clnt */ |
2507 | mutex_lock(&lower_transport->recv_mutex); |
2508 | lower_transport->inet = NULL; |
2509 | lower_transport->sock = NULL; |
2510 | lower_transport->file = NULL; |
2511 | |
2512 | xprt_clear_connected(xprt: lower_xprt); |
2513 | xs_sock_reset_connection_flags(xprt: lower_xprt); |
2514 | xs_stream_reset_connect(transport: lower_transport); |
2515 | mutex_unlock(lock: &lower_transport->recv_mutex); |
2516 | } |
2517 | |
2518 | if (!xprt_bound(xprt: upper_xprt)) |
2519 | return -ENOTCONN; |
2520 | |
2521 | xs_set_memalloc(xprt: upper_xprt); |
2522 | |
2523 | if (!xprt_test_and_set_connected(xprt: upper_xprt)) { |
2524 | upper_xprt->connect_cookie++; |
2525 | clear_bit(XPRT_SOCK_CONNECTING, addr: &upper_transport->sock_state); |
2526 | xprt_clear_connecting(xprt: upper_xprt); |
2527 | |
2528 | upper_xprt->stat.connect_count++; |
2529 | upper_xprt->stat.connect_time += (long)jiffies - |
2530 | upper_xprt->stat.connect_start; |
2531 | xs_run_error_worker(transport: upper_transport, XPRT_SOCK_WAKE_PENDING); |
2532 | } |
2533 | return 0; |
2534 | } |
2535 | |
2536 | /** |
2537 | * xs_tls_handshake_done - TLS handshake completion handler |
2538 | * @data: address of xprt to wake |
2539 | * @status: status of handshake |
2540 | * @peerid: serial number of key containing the remote's identity |
2541 | * |
2542 | */ |
2543 | static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) |
2544 | { |
2545 | struct rpc_xprt *lower_xprt = data; |
2546 | struct sock_xprt *lower_transport = |
2547 | container_of(lower_xprt, struct sock_xprt, xprt); |
2548 | |
2549 | lower_transport->xprt_err = status ? -EACCES : 0; |
2550 | complete(&lower_transport->handshake_done); |
2551 | xprt_put(xprt: lower_xprt); |
2552 | } |
2553 | |
2554 | static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec) |
2555 | { |
2556 | struct sock_xprt *lower_transport = |
2557 | container_of(lower_xprt, struct sock_xprt, xprt); |
2558 | struct tls_handshake_args args = { |
2559 | .ta_sock = lower_transport->sock, |
2560 | .ta_done = xs_tls_handshake_done, |
2561 | .ta_data = xprt_get(xprt: lower_xprt), |
2562 | .ta_peername = lower_xprt->servername, |
2563 | }; |
2564 | struct sock *sk = lower_transport->inet; |
2565 | int rc; |
2566 | |
2567 | init_completion(x: &lower_transport->handshake_done); |
2568 | set_bit(XPRT_SOCK_IGNORE_RECV, addr: &lower_transport->sock_state); |
2569 | lower_transport->xprt_err = -ETIMEDOUT; |
2570 | switch (xprtsec->policy) { |
2571 | case RPC_XPRTSEC_TLS_ANON: |
2572 | rc = tls_client_hello_anon(args: &args, GFP_KERNEL); |
2573 | if (rc) |
2574 | goto out_put_xprt; |
2575 | break; |
2576 | case RPC_XPRTSEC_TLS_X509: |
2577 | args.ta_my_cert = xprtsec->cert_serial; |
2578 | args.ta_my_privkey = xprtsec->privkey_serial; |
2579 | rc = tls_client_hello_x509(args: &args, GFP_KERNEL); |
2580 | if (rc) |
2581 | goto out_put_xprt; |
2582 | break; |
2583 | default: |
2584 | rc = -EACCES; |
2585 | goto out_put_xprt; |
2586 | } |
2587 | |
2588 | rc = wait_for_completion_interruptible_timeout(x: &lower_transport->handshake_done, |
2589 | XS_TLS_HANDSHAKE_TO); |
2590 | if (rc <= 0) { |
2591 | if (!tls_handshake_cancel(sk)) { |
2592 | if (rc == 0) |
2593 | rc = -ETIMEDOUT; |
2594 | goto out_put_xprt; |
2595 | } |
2596 | } |
2597 | |
2598 | rc = lower_transport->xprt_err; |
2599 | |
2600 | out: |
2601 | xs_stream_reset_connect(transport: lower_transport); |
2602 | clear_bit(XPRT_SOCK_IGNORE_RECV, addr: &lower_transport->sock_state); |
2603 | return rc; |
2604 | |
2605 | out_put_xprt: |
2606 | xprt_put(xprt: lower_xprt); |
2607 | goto out; |
2608 | } |
2609 | |
2610 | /** |
2611 | * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket |
2612 | * @work: queued work item |
2613 | * |
2614 | * Invoked by a work queue tasklet. |
2615 | * |
2616 | * For RPC-with-TLS, there is a two-stage connection process. |
2617 | * |
2618 | * The "upper-layer xprt" is visible to the RPC consumer. Once it has |
2619 | * been marked connected, the consumer knows that a TCP connection and |
2620 | * a TLS session have been established. |
2621 | * |
2622 | * A "lower-layer xprt", created in this function, handles the mechanics |
2623 | * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and |
2624 | * then driving the TLS handshake. Once all that is complete, the upper |
2625 | * layer xprt is marked connected. |
2626 | */ |
2627 | static void xs_tcp_tls_setup_socket(struct work_struct *work) |
2628 | { |
2629 | struct sock_xprt *upper_transport = |
2630 | container_of(work, struct sock_xprt, connect_worker.work); |
2631 | struct rpc_clnt *upper_clnt = upper_transport->clnt; |
2632 | struct rpc_xprt *upper_xprt = &upper_transport->xprt; |
2633 | struct rpc_create_args args = { |
2634 | .net = upper_xprt->xprt_net, |
2635 | .protocol = upper_xprt->prot, |
2636 | .address = (struct sockaddr *)&upper_xprt->addr, |
2637 | .addrsize = upper_xprt->addrlen, |
2638 | .timeout = upper_clnt->cl_timeout, |
2639 | .servername = upper_xprt->servername, |
2640 | .program = upper_clnt->cl_program, |
2641 | .prognumber = upper_clnt->cl_prog, |
2642 | .version = upper_clnt->cl_vers, |
2643 | .authflavor = RPC_AUTH_TLS, |
2644 | .cred = upper_clnt->cl_cred, |
2645 | .xprtsec = { |
2646 | .policy = RPC_XPRTSEC_NONE, |
2647 | }, |
2648 | }; |
2649 | unsigned int pflags = current->flags; |
2650 | struct rpc_clnt *lower_clnt; |
2651 | struct rpc_xprt *lower_xprt; |
2652 | int status; |
2653 | |
2654 | if (atomic_read(v: &upper_xprt->swapper)) |
2655 | current->flags |= PF_MEMALLOC; |
2656 | |
2657 | xs_stream_start_connect(transport: upper_transport); |
2658 | |
2659 | /* This implicitly sends an RPC_AUTH_TLS probe */ |
2660 | lower_clnt = rpc_create(args: &args); |
2661 | if (IS_ERR(ptr: lower_clnt)) { |
2662 | trace_rpc_tls_unavailable(clnt: upper_clnt, xprt: upper_xprt); |
2663 | clear_bit(XPRT_SOCK_CONNECTING, addr: &upper_transport->sock_state); |
2664 | xprt_clear_connecting(xprt: upper_xprt); |
2665 | xprt_wake_pending_tasks(xprt: upper_xprt, status: PTR_ERR(ptr: lower_clnt)); |
2666 | xs_run_error_worker(transport: upper_transport, XPRT_SOCK_WAKE_PENDING); |
2667 | goto out_unlock; |
2668 | } |
2669 | |
2670 | /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on |
2671 | * the lower xprt. |
2672 | */ |
2673 | rcu_read_lock(); |
2674 | lower_xprt = rcu_dereference(lower_clnt->cl_xprt); |
2675 | rcu_read_unlock(); |
2676 | |
2677 | if (wait_on_bit_lock(word: &lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE)) |
2678 | goto out_unlock; |
2679 | |
2680 | status = xs_tls_handshake_sync(lower_xprt, xprtsec: &upper_xprt->xprtsec); |
2681 | if (status) { |
2682 | trace_rpc_tls_not_started(clnt: upper_clnt, xprt: upper_xprt); |
2683 | goto out_close; |
2684 | } |
2685 | |
2686 | status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport); |
2687 | if (status) |
2688 | goto out_close; |
2689 | xprt_release_write(lower_xprt, NULL); |
2690 | |
2691 | trace_rpc_socket_connect(xprt: upper_xprt, socket: upper_transport->sock, error: 0); |
2692 | if (!xprt_test_and_set_connected(xprt: upper_xprt)) { |
2693 | upper_xprt->connect_cookie++; |
2694 | clear_bit(XPRT_SOCK_CONNECTING, addr: &upper_transport->sock_state); |
2695 | xprt_clear_connecting(xprt: upper_xprt); |
2696 | |
2697 | upper_xprt->stat.connect_count++; |
2698 | upper_xprt->stat.connect_time += (long)jiffies - |
2699 | upper_xprt->stat.connect_start; |
2700 | xs_run_error_worker(transport: upper_transport, XPRT_SOCK_WAKE_PENDING); |
2701 | } |
2702 | rpc_shutdown_client(lower_clnt); |
2703 | |
2704 | out_unlock: |
2705 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
2706 | upper_transport->clnt = NULL; |
2707 | xprt_unlock_connect(upper_xprt, upper_transport); |
2708 | return; |
2709 | |
2710 | out_close: |
2711 | xprt_release_write(lower_xprt, NULL); |
2712 | rpc_shutdown_client(lower_clnt); |
2713 | |
2714 | /* xprt_force_disconnect() wakes tasks with a fixed tk_status code. |
2715 | * Wake them first here to ensure they get our tk_status code. |
2716 | */ |
2717 | xprt_wake_pending_tasks(xprt: upper_xprt, status); |
2718 | xs_tcp_force_close(xprt: upper_xprt); |
2719 | xprt_clear_connecting(xprt: upper_xprt); |
2720 | goto out_unlock; |
2721 | } |
2722 | |
2723 | /** |
2724 | * xs_connect - connect a socket to a remote endpoint |
2725 | * @xprt: pointer to transport structure |
2726 | * @task: address of RPC task that manages state of connect request |
2727 | * |
2728 | * TCP: If the remote end dropped the connection, delay reconnecting. |
2729 | * |
2730 | * UDP socket connects are synchronous, but we use a work queue anyway |
2731 | * to guarantee that even unprivileged user processes can set up a |
2732 | * socket on a privileged port. |
2733 | * |
2734 | * If a UDP socket connect fails, the delay behavior here prevents |
2735 | * retry floods (hard mounts). |
2736 | */ |
2737 | static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
2738 | { |
2739 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2740 | unsigned long delay = 0; |
2741 | |
2742 | WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); |
2743 | |
2744 | if (transport->sock != NULL) { |
2745 | dprintk("RPC: xs_connect delayed xprt %p for %lu " |
2746 | "seconds\n" , xprt, xprt->reestablish_timeout / HZ); |
2747 | |
2748 | delay = xprt_reconnect_delay(xprt); |
2749 | xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); |
2750 | |
2751 | } else |
2752 | dprintk("RPC: xs_connect scheduled xprt %p\n" , xprt); |
2753 | |
2754 | transport->clnt = task->tk_client; |
2755 | queue_delayed_work(wq: xprtiod_workqueue, |
2756 | dwork: &transport->connect_worker, |
2757 | delay); |
2758 | } |
2759 | |
2760 | static void xs_wake_disconnect(struct sock_xprt *transport) |
2761 | { |
2762 | if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, addr: &transport->sock_state)) |
2763 | xs_tcp_force_close(xprt: &transport->xprt); |
2764 | } |
2765 | |
2766 | static void xs_wake_write(struct sock_xprt *transport) |
2767 | { |
2768 | if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, addr: &transport->sock_state)) |
2769 | xprt_write_space(xprt: &transport->xprt); |
2770 | } |
2771 | |
2772 | static void xs_wake_error(struct sock_xprt *transport) |
2773 | { |
2774 | int sockerr; |
2775 | |
2776 | if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, addr: &transport->sock_state)) |
2777 | return; |
2778 | sockerr = xchg(&transport->xprt_err, 0); |
2779 | if (sockerr < 0) { |
2780 | xprt_wake_pending_tasks(xprt: &transport->xprt, status: sockerr); |
2781 | xs_tcp_force_close(xprt: &transport->xprt); |
2782 | } |
2783 | } |
2784 | |
2785 | static void xs_wake_pending(struct sock_xprt *transport) |
2786 | { |
2787 | if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, addr: &transport->sock_state)) |
2788 | xprt_wake_pending_tasks(xprt: &transport->xprt, status: -EAGAIN); |
2789 | } |
2790 | |
2791 | static void xs_error_handle(struct work_struct *work) |
2792 | { |
2793 | struct sock_xprt *transport = container_of(work, |
2794 | struct sock_xprt, error_worker); |
2795 | |
2796 | xs_wake_disconnect(transport); |
2797 | xs_wake_write(transport); |
2798 | xs_wake_error(transport); |
2799 | xs_wake_pending(transport); |
2800 | } |
2801 | |
2802 | /** |
2803 | * xs_local_print_stats - display AF_LOCAL socket-specific stats |
2804 | * @xprt: rpc_xprt struct containing statistics |
2805 | * @seq: output file |
2806 | * |
2807 | */ |
2808 | static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
2809 | { |
2810 | long idle_time = 0; |
2811 | |
2812 | if (xprt_connected(xprt)) |
2813 | idle_time = (long)(jiffies - xprt->last_used) / HZ; |
2814 | |
2815 | seq_printf(m: seq, fmt: "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " |
2816 | "%llu %llu %lu %llu %llu\n" , |
2817 | xprt->stat.bind_count, |
2818 | xprt->stat.connect_count, |
2819 | xprt->stat.connect_time / HZ, |
2820 | idle_time, |
2821 | xprt->stat.sends, |
2822 | xprt->stat.recvs, |
2823 | xprt->stat.bad_xids, |
2824 | xprt->stat.req_u, |
2825 | xprt->stat.bklog_u, |
2826 | xprt->stat.max_slots, |
2827 | xprt->stat.sending_u, |
2828 | xprt->stat.pending_u); |
2829 | } |
2830 | |
2831 | /** |
2832 | * xs_udp_print_stats - display UDP socket-specific stats |
2833 | * @xprt: rpc_xprt struct containing statistics |
2834 | * @seq: output file |
2835 | * |
2836 | */ |
2837 | static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
2838 | { |
2839 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2840 | |
2841 | seq_printf(m: seq, fmt: "\txprt:\tudp %u %lu %lu %lu %lu %llu %llu " |
2842 | "%lu %llu %llu\n" , |
2843 | transport->srcport, |
2844 | xprt->stat.bind_count, |
2845 | xprt->stat.sends, |
2846 | xprt->stat.recvs, |
2847 | xprt->stat.bad_xids, |
2848 | xprt->stat.req_u, |
2849 | xprt->stat.bklog_u, |
2850 | xprt->stat.max_slots, |
2851 | xprt->stat.sending_u, |
2852 | xprt->stat.pending_u); |
2853 | } |
2854 | |
2855 | /** |
2856 | * xs_tcp_print_stats - display TCP socket-specific stats |
2857 | * @xprt: rpc_xprt struct containing statistics |
2858 | * @seq: output file |
2859 | * |
2860 | */ |
2861 | static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
2862 | { |
2863 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
2864 | long idle_time = 0; |
2865 | |
2866 | if (xprt_connected(xprt)) |
2867 | idle_time = (long)(jiffies - xprt->last_used) / HZ; |
2868 | |
2869 | seq_printf(m: seq, fmt: "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu " |
2870 | "%llu %llu %lu %llu %llu\n" , |
2871 | transport->srcport, |
2872 | xprt->stat.bind_count, |
2873 | xprt->stat.connect_count, |
2874 | xprt->stat.connect_time / HZ, |
2875 | idle_time, |
2876 | xprt->stat.sends, |
2877 | xprt->stat.recvs, |
2878 | xprt->stat.bad_xids, |
2879 | xprt->stat.req_u, |
2880 | xprt->stat.bklog_u, |
2881 | xprt->stat.max_slots, |
2882 | xprt->stat.sending_u, |
2883 | xprt->stat.pending_u); |
2884 | } |
2885 | |
2886 | /* |
2887 | * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason |
2888 | * we allocate pages instead doing a kmalloc like rpc_malloc is because we want |
2889 | * to use the server side send routines. |
2890 | */ |
2891 | static int bc_malloc(struct rpc_task *task) |
2892 | { |
2893 | struct rpc_rqst *rqst = task->tk_rqstp; |
2894 | size_t size = rqst->rq_callsize; |
2895 | struct page *page; |
2896 | struct rpc_buffer *buf; |
2897 | |
2898 | if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) { |
2899 | WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n" , |
2900 | size); |
2901 | return -EINVAL; |
2902 | } |
2903 | |
2904 | page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); |
2905 | if (!page) |
2906 | return -ENOMEM; |
2907 | |
2908 | buf = page_address(page); |
2909 | buf->len = PAGE_SIZE; |
2910 | |
2911 | rqst->rq_buffer = buf->data; |
2912 | rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; |
2913 | return 0; |
2914 | } |
2915 | |
2916 | /* |
2917 | * Free the space allocated in the bc_alloc routine |
2918 | */ |
2919 | static void bc_free(struct rpc_task *task) |
2920 | { |
2921 | void *buffer = task->tk_rqstp->rq_buffer; |
2922 | struct rpc_buffer *buf; |
2923 | |
2924 | buf = container_of(buffer, struct rpc_buffer, data); |
2925 | free_page((unsigned long)buf); |
2926 | } |
2927 | |
2928 | static int bc_sendto(struct rpc_rqst *req) |
2929 | { |
2930 | struct xdr_buf *xdr = &req->rq_snd_buf; |
2931 | struct sock_xprt *transport = |
2932 | container_of(req->rq_xprt, struct sock_xprt, xprt); |
2933 | struct msghdr msg = { |
2934 | .msg_flags = 0, |
2935 | }; |
2936 | rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | |
2937 | (u32)xdr->len); |
2938 | unsigned int sent = 0; |
2939 | int err; |
2940 | |
2941 | req->rq_xtime = ktime_get(); |
2942 | err = xdr_alloc_bvec(buf: xdr, gfp: rpc_task_gfp_mask()); |
2943 | if (err < 0) |
2944 | return err; |
2945 | err = xprt_sock_sendmsg(sock: transport->sock, msg: &msg, xdr, base: 0, marker, sent_p: &sent); |
2946 | xdr_free_bvec(buf: xdr); |
2947 | if (err < 0 || sent != (xdr->len + sizeof(marker))) |
2948 | return -EAGAIN; |
2949 | return sent; |
2950 | } |
2951 | |
2952 | /** |
2953 | * bc_send_request - Send a backchannel Call on a TCP socket |
2954 | * @req: rpc_rqst containing Call message to be sent |
2955 | * |
2956 | * xpt_mutex ensures @rqstp's whole message is written to the socket |
2957 | * without interruption. |
2958 | * |
2959 | * Return values: |
2960 | * %0 if the message was sent successfully |
2961 | * %ENOTCONN if the message was not sent |
2962 | */ |
2963 | static int bc_send_request(struct rpc_rqst *req) |
2964 | { |
2965 | struct svc_xprt *xprt; |
2966 | int len; |
2967 | |
2968 | /* |
2969 | * Get the server socket associated with this callback xprt |
2970 | */ |
2971 | xprt = req->rq_xprt->bc_xprt; |
2972 | |
2973 | /* |
2974 | * Grab the mutex to serialize data as the connection is shared |
2975 | * with the fore channel |
2976 | */ |
2977 | mutex_lock(&xprt->xpt_mutex); |
2978 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) |
2979 | len = -ENOTCONN; |
2980 | else |
2981 | len = bc_sendto(req); |
2982 | mutex_unlock(lock: &xprt->xpt_mutex); |
2983 | |
2984 | if (len > 0) |
2985 | len = 0; |
2986 | |
2987 | return len; |
2988 | } |
2989 | |
2990 | /* |
2991 | * The close routine. Since this is client initiated, we do nothing |
2992 | */ |
2993 | |
2994 | static void bc_close(struct rpc_xprt *xprt) |
2995 | { |
2996 | xprt_disconnect_done(xprt); |
2997 | } |
2998 | |
2999 | /* |
3000 | * The xprt destroy routine. Again, because this connection is client |
3001 | * initiated, we do nothing |
3002 | */ |
3003 | |
3004 | static void bc_destroy(struct rpc_xprt *xprt) |
3005 | { |
3006 | dprintk("RPC: bc_destroy xprt %p\n" , xprt); |
3007 | |
3008 | xs_xprt_free(xprt); |
3009 | module_put(THIS_MODULE); |
3010 | } |
3011 | |
3012 | static const struct rpc_xprt_ops xs_local_ops = { |
3013 | .reserve_xprt = xprt_reserve_xprt, |
3014 | .release_xprt = xprt_release_xprt, |
3015 | .alloc_slot = xprt_alloc_slot, |
3016 | .free_slot = xprt_free_slot, |
3017 | .rpcbind = xs_local_rpcbind, |
3018 | .set_port = xs_local_set_port, |
3019 | .connect = xs_local_connect, |
3020 | .buf_alloc = rpc_malloc, |
3021 | .buf_free = rpc_free, |
3022 | .prepare_request = xs_stream_prepare_request, |
3023 | .send_request = xs_local_send_request, |
3024 | .wait_for_reply_request = xprt_wait_for_reply_request_def, |
3025 | .close = xs_close, |
3026 | .destroy = xs_destroy, |
3027 | .print_stats = xs_local_print_stats, |
3028 | .enable_swap = xs_enable_swap, |
3029 | .disable_swap = xs_disable_swap, |
3030 | }; |
3031 | |
3032 | static const struct rpc_xprt_ops xs_udp_ops = { |
3033 | .set_buffer_size = xs_udp_set_buffer_size, |
3034 | .reserve_xprt = xprt_reserve_xprt_cong, |
3035 | .release_xprt = xprt_release_xprt_cong, |
3036 | .alloc_slot = xprt_alloc_slot, |
3037 | .free_slot = xprt_free_slot, |
3038 | .rpcbind = rpcb_getport_async, |
3039 | .set_port = xs_set_port, |
3040 | .connect = xs_connect, |
3041 | .get_srcaddr = xs_sock_srcaddr, |
3042 | .get_srcport = xs_sock_srcport, |
3043 | .buf_alloc = rpc_malloc, |
3044 | .buf_free = rpc_free, |
3045 | .send_request = xs_udp_send_request, |
3046 | .wait_for_reply_request = xprt_wait_for_reply_request_rtt, |
3047 | .timer = xs_udp_timer, |
3048 | .release_request = xprt_release_rqst_cong, |
3049 | .close = xs_close, |
3050 | .destroy = xs_destroy, |
3051 | .print_stats = xs_udp_print_stats, |
3052 | .enable_swap = xs_enable_swap, |
3053 | .disable_swap = xs_disable_swap, |
3054 | .inject_disconnect = xs_inject_disconnect, |
3055 | }; |
3056 | |
3057 | static const struct rpc_xprt_ops xs_tcp_ops = { |
3058 | .reserve_xprt = xprt_reserve_xprt, |
3059 | .release_xprt = xprt_release_xprt, |
3060 | .alloc_slot = xprt_alloc_slot, |
3061 | .free_slot = xprt_free_slot, |
3062 | .rpcbind = rpcb_getport_async, |
3063 | .set_port = xs_set_port, |
3064 | .connect = xs_connect, |
3065 | .get_srcaddr = xs_sock_srcaddr, |
3066 | .get_srcport = xs_sock_srcport, |
3067 | .buf_alloc = rpc_malloc, |
3068 | .buf_free = rpc_free, |
3069 | .prepare_request = xs_stream_prepare_request, |
3070 | .send_request = xs_tcp_send_request, |
3071 | .wait_for_reply_request = xprt_wait_for_reply_request_def, |
3072 | .close = xs_tcp_shutdown, |
3073 | .destroy = xs_destroy, |
3074 | .set_connect_timeout = xs_tcp_set_connect_timeout, |
3075 | .print_stats = xs_tcp_print_stats, |
3076 | .enable_swap = xs_enable_swap, |
3077 | .disable_swap = xs_disable_swap, |
3078 | .inject_disconnect = xs_inject_disconnect, |
3079 | #ifdef CONFIG_SUNRPC_BACKCHANNEL |
3080 | .bc_setup = xprt_setup_bc, |
3081 | .bc_maxpayload = xs_tcp_bc_maxpayload, |
3082 | .bc_num_slots = xprt_bc_max_slots, |
3083 | .bc_free_rqst = xprt_free_bc_rqst, |
3084 | .bc_destroy = xprt_destroy_bc, |
3085 | #endif |
3086 | }; |
3087 | |
3088 | /* |
3089 | * The rpc_xprt_ops for the server backchannel |
3090 | */ |
3091 | |
3092 | static const struct rpc_xprt_ops bc_tcp_ops = { |
3093 | .reserve_xprt = xprt_reserve_xprt, |
3094 | .release_xprt = xprt_release_xprt, |
3095 | .alloc_slot = xprt_alloc_slot, |
3096 | .free_slot = xprt_free_slot, |
3097 | .buf_alloc = bc_malloc, |
3098 | .buf_free = bc_free, |
3099 | .send_request = bc_send_request, |
3100 | .wait_for_reply_request = xprt_wait_for_reply_request_def, |
3101 | .close = bc_close, |
3102 | .destroy = bc_destroy, |
3103 | .print_stats = xs_tcp_print_stats, |
3104 | .enable_swap = xs_enable_swap, |
3105 | .disable_swap = xs_disable_swap, |
3106 | .inject_disconnect = xs_inject_disconnect, |
3107 | }; |
3108 | |
3109 | static int xs_init_anyaddr(const int family, struct sockaddr *sap) |
3110 | { |
3111 | static const struct sockaddr_in sin = { |
3112 | .sin_family = AF_INET, |
3113 | .sin_addr.s_addr = htonl(INADDR_ANY), |
3114 | }; |
3115 | static const struct sockaddr_in6 sin6 = { |
3116 | .sin6_family = AF_INET6, |
3117 | .sin6_addr = IN6ADDR_ANY_INIT, |
3118 | }; |
3119 | |
3120 | switch (family) { |
3121 | case AF_LOCAL: |
3122 | break; |
3123 | case AF_INET: |
3124 | memcpy(sap, &sin, sizeof(sin)); |
3125 | break; |
3126 | case AF_INET6: |
3127 | memcpy(sap, &sin6, sizeof(sin6)); |
3128 | break; |
3129 | default: |
3130 | dprintk("RPC: %s: Bad address family\n" , __func__); |
3131 | return -EAFNOSUPPORT; |
3132 | } |
3133 | return 0; |
3134 | } |
3135 | |
3136 | static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, |
3137 | unsigned int slot_table_size, |
3138 | unsigned int max_slot_table_size) |
3139 | { |
3140 | struct rpc_xprt *xprt; |
3141 | struct sock_xprt *new; |
3142 | |
3143 | if (args->addrlen > sizeof(xprt->addr)) { |
3144 | dprintk("RPC: xs_setup_xprt: address too large\n" ); |
3145 | return ERR_PTR(error: -EBADF); |
3146 | } |
3147 | |
3148 | xprt = xprt_alloc(net: args->net, size: sizeof(*new), num_prealloc: slot_table_size, |
3149 | max_req: max_slot_table_size); |
3150 | if (xprt == NULL) { |
3151 | dprintk("RPC: xs_setup_xprt: couldn't allocate " |
3152 | "rpc_xprt\n" ); |
3153 | return ERR_PTR(error: -ENOMEM); |
3154 | } |
3155 | |
3156 | new = container_of(xprt, struct sock_xprt, xprt); |
3157 | mutex_init(&new->recv_mutex); |
3158 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); |
3159 | xprt->addrlen = args->addrlen; |
3160 | if (args->srcaddr) |
3161 | memcpy(&new->srcaddr, args->srcaddr, args->addrlen); |
3162 | else { |
3163 | int err; |
3164 | err = xs_init_anyaddr(family: args->dstaddr->sa_family, |
3165 | sap: (struct sockaddr *)&new->srcaddr); |
3166 | if (err != 0) { |
3167 | xprt_free(xprt); |
3168 | return ERR_PTR(error: err); |
3169 | } |
3170 | } |
3171 | |
3172 | return xprt; |
3173 | } |
3174 | |
3175 | static const struct rpc_timeout xs_local_default_timeout = { |
3176 | .to_initval = 10 * HZ, |
3177 | .to_maxval = 10 * HZ, |
3178 | .to_retries = 2, |
3179 | }; |
3180 | |
3181 | /** |
3182 | * xs_setup_local - Set up transport to use an AF_LOCAL socket |
3183 | * @args: rpc transport creation arguments |
3184 | * |
3185 | * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP |
3186 | */ |
3187 | static struct rpc_xprt *xs_setup_local(struct xprt_create *args) |
3188 | { |
3189 | struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; |
3190 | struct sock_xprt *transport; |
3191 | struct rpc_xprt *xprt; |
3192 | struct rpc_xprt *ret; |
3193 | |
3194 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3195 | max_slot_table_size: xprt_max_tcp_slot_table_entries); |
3196 | if (IS_ERR(ptr: xprt)) |
3197 | return xprt; |
3198 | transport = container_of(xprt, struct sock_xprt, xprt); |
3199 | |
3200 | xprt->prot = 0; |
3201 | xprt->xprt_class = &xs_local_transport; |
3202 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3203 | |
3204 | xprt->bind_timeout = XS_BIND_TO; |
3205 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
3206 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3207 | |
3208 | xprt->ops = &xs_local_ops; |
3209 | xprt->timeout = &xs_local_default_timeout; |
3210 | |
3211 | INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); |
3212 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3213 | INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); |
3214 | |
3215 | switch (sun->sun_family) { |
3216 | case AF_LOCAL: |
3217 | if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') { |
3218 | dprintk("RPC: bad AF_LOCAL address: %s\n" , |
3219 | sun->sun_path); |
3220 | ret = ERR_PTR(error: -EINVAL); |
3221 | goto out_err; |
3222 | } |
3223 | xprt_set_bound(xprt); |
3224 | xs_format_peer_addresses(xprt, protocol: "local" , RPCBIND_NETID_LOCAL); |
3225 | break; |
3226 | default: |
3227 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3228 | goto out_err; |
3229 | } |
3230 | |
3231 | dprintk("RPC: set up xprt to %s via AF_LOCAL\n" , |
3232 | xprt->address_strings[RPC_DISPLAY_ADDR]); |
3233 | |
3234 | if (try_module_get(THIS_MODULE)) |
3235 | return xprt; |
3236 | ret = ERR_PTR(error: -EINVAL); |
3237 | out_err: |
3238 | xs_xprt_free(xprt); |
3239 | return ret; |
3240 | } |
3241 | |
3242 | static const struct rpc_timeout xs_udp_default_timeout = { |
3243 | .to_initval = 5 * HZ, |
3244 | .to_maxval = 30 * HZ, |
3245 | .to_increment = 5 * HZ, |
3246 | .to_retries = 5, |
3247 | }; |
3248 | |
3249 | /** |
3250 | * xs_setup_udp - Set up transport to use a UDP socket |
3251 | * @args: rpc transport creation arguments |
3252 | * |
3253 | */ |
3254 | static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) |
3255 | { |
3256 | struct sockaddr *addr = args->dstaddr; |
3257 | struct rpc_xprt *xprt; |
3258 | struct sock_xprt *transport; |
3259 | struct rpc_xprt *ret; |
3260 | |
3261 | xprt = xs_setup_xprt(args, slot_table_size: xprt_udp_slot_table_entries, |
3262 | max_slot_table_size: xprt_udp_slot_table_entries); |
3263 | if (IS_ERR(ptr: xprt)) |
3264 | return xprt; |
3265 | transport = container_of(xprt, struct sock_xprt, xprt); |
3266 | |
3267 | xprt->prot = IPPROTO_UDP; |
3268 | xprt->xprt_class = &xs_udp_transport; |
3269 | /* XXX: header size can vary due to auth type, IPv6, etc. */ |
3270 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); |
3271 | |
3272 | xprt->bind_timeout = XS_BIND_TO; |
3273 | xprt->reestablish_timeout = XS_UDP_REEST_TO; |
3274 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3275 | |
3276 | xprt->ops = &xs_udp_ops; |
3277 | |
3278 | xprt->timeout = &xs_udp_default_timeout; |
3279 | |
3280 | INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); |
3281 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3282 | INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); |
3283 | |
3284 | switch (addr->sa_family) { |
3285 | case AF_INET: |
3286 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
3287 | xprt_set_bound(xprt); |
3288 | |
3289 | xs_format_peer_addresses(xprt, protocol: "udp" , RPCBIND_NETID_UDP); |
3290 | break; |
3291 | case AF_INET6: |
3292 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
3293 | xprt_set_bound(xprt); |
3294 | |
3295 | xs_format_peer_addresses(xprt, protocol: "udp" , RPCBIND_NETID_UDP6); |
3296 | break; |
3297 | default: |
3298 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3299 | goto out_err; |
3300 | } |
3301 | |
3302 | if (xprt_bound(xprt)) |
3303 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3304 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3305 | xprt->address_strings[RPC_DISPLAY_PORT], |
3306 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3307 | else |
3308 | dprintk("RPC: set up xprt to %s (autobind) via %s\n" , |
3309 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3310 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3311 | |
3312 | if (try_module_get(THIS_MODULE)) |
3313 | return xprt; |
3314 | ret = ERR_PTR(error: -EINVAL); |
3315 | out_err: |
3316 | xs_xprt_free(xprt); |
3317 | return ret; |
3318 | } |
3319 | |
3320 | static const struct rpc_timeout xs_tcp_default_timeout = { |
3321 | .to_initval = 60 * HZ, |
3322 | .to_maxval = 60 * HZ, |
3323 | .to_retries = 2, |
3324 | }; |
3325 | |
3326 | /** |
3327 | * xs_setup_tcp - Set up transport to use a TCP socket |
3328 | * @args: rpc transport creation arguments |
3329 | * |
3330 | */ |
3331 | static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) |
3332 | { |
3333 | struct sockaddr *addr = args->dstaddr; |
3334 | struct rpc_xprt *xprt; |
3335 | struct sock_xprt *transport; |
3336 | struct rpc_xprt *ret; |
3337 | unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; |
3338 | |
3339 | if (args->flags & XPRT_CREATE_INFINITE_SLOTS) |
3340 | max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; |
3341 | |
3342 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3343 | max_slot_table_size); |
3344 | if (IS_ERR(ptr: xprt)) |
3345 | return xprt; |
3346 | transport = container_of(xprt, struct sock_xprt, xprt); |
3347 | |
3348 | xprt->prot = IPPROTO_TCP; |
3349 | xprt->xprt_class = &xs_tcp_transport; |
3350 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3351 | |
3352 | xprt->bind_timeout = XS_BIND_TO; |
3353 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
3354 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3355 | |
3356 | xprt->ops = &xs_tcp_ops; |
3357 | xprt->timeout = &xs_tcp_default_timeout; |
3358 | |
3359 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
3360 | if (args->reconnect_timeout) |
3361 | xprt->max_reconnect_timeout = args->reconnect_timeout; |
3362 | |
3363 | xprt->connect_timeout = xprt->timeout->to_initval * |
3364 | (xprt->timeout->to_retries + 1); |
3365 | if (args->connect_timeout) |
3366 | xs_tcp_do_set_connect_timeout(xprt, connect_timeout: args->connect_timeout); |
3367 | |
3368 | INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); |
3369 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3370 | INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); |
3371 | |
3372 | switch (addr->sa_family) { |
3373 | case AF_INET: |
3374 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
3375 | xprt_set_bound(xprt); |
3376 | |
3377 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP); |
3378 | break; |
3379 | case AF_INET6: |
3380 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
3381 | xprt_set_bound(xprt); |
3382 | |
3383 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP6); |
3384 | break; |
3385 | default: |
3386 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3387 | goto out_err; |
3388 | } |
3389 | |
3390 | if (xprt_bound(xprt)) |
3391 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3392 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3393 | xprt->address_strings[RPC_DISPLAY_PORT], |
3394 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3395 | else |
3396 | dprintk("RPC: set up xprt to %s (autobind) via %s\n" , |
3397 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3398 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3399 | |
3400 | if (try_module_get(THIS_MODULE)) |
3401 | return xprt; |
3402 | ret = ERR_PTR(error: -EINVAL); |
3403 | out_err: |
3404 | xs_xprt_free(xprt); |
3405 | return ret; |
3406 | } |
3407 | |
3408 | /** |
3409 | * xs_setup_tcp_tls - Set up transport to use a TCP with TLS |
3410 | * @args: rpc transport creation arguments |
3411 | * |
3412 | */ |
3413 | static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args) |
3414 | { |
3415 | struct sockaddr *addr = args->dstaddr; |
3416 | struct rpc_xprt *xprt; |
3417 | struct sock_xprt *transport; |
3418 | struct rpc_xprt *ret; |
3419 | unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; |
3420 | |
3421 | if (args->flags & XPRT_CREATE_INFINITE_SLOTS) |
3422 | max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; |
3423 | |
3424 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3425 | max_slot_table_size); |
3426 | if (IS_ERR(ptr: xprt)) |
3427 | return xprt; |
3428 | transport = container_of(xprt, struct sock_xprt, xprt); |
3429 | |
3430 | xprt->prot = IPPROTO_TCP; |
3431 | xprt->xprt_class = &xs_tcp_transport; |
3432 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3433 | |
3434 | xprt->bind_timeout = XS_BIND_TO; |
3435 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; |
3436 | xprt->idle_timeout = XS_IDLE_DISC_TO; |
3437 | |
3438 | xprt->ops = &xs_tcp_ops; |
3439 | xprt->timeout = &xs_tcp_default_timeout; |
3440 | |
3441 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
3442 | xprt->connect_timeout = xprt->timeout->to_initval * |
3443 | (xprt->timeout->to_retries + 1); |
3444 | |
3445 | INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); |
3446 | INIT_WORK(&transport->error_worker, xs_error_handle); |
3447 | |
3448 | switch (args->xprtsec.policy) { |
3449 | case RPC_XPRTSEC_TLS_ANON: |
3450 | case RPC_XPRTSEC_TLS_X509: |
3451 | xprt->xprtsec = args->xprtsec; |
3452 | INIT_DELAYED_WORK(&transport->connect_worker, |
3453 | xs_tcp_tls_setup_socket); |
3454 | break; |
3455 | default: |
3456 | ret = ERR_PTR(error: -EACCES); |
3457 | goto out_err; |
3458 | } |
3459 | |
3460 | switch (addr->sa_family) { |
3461 | case AF_INET: |
3462 | if (((struct sockaddr_in *)addr)->sin_port != htons(0)) |
3463 | xprt_set_bound(xprt); |
3464 | |
3465 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP); |
3466 | break; |
3467 | case AF_INET6: |
3468 | if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) |
3469 | xprt_set_bound(xprt); |
3470 | |
3471 | xs_format_peer_addresses(xprt, protocol: "tcp" , RPCBIND_NETID_TCP6); |
3472 | break; |
3473 | default: |
3474 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3475 | goto out_err; |
3476 | } |
3477 | |
3478 | if (xprt_bound(xprt)) |
3479 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3480 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3481 | xprt->address_strings[RPC_DISPLAY_PORT], |
3482 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3483 | else |
3484 | dprintk("RPC: set up xprt to %s (autobind) via %s\n" , |
3485 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3486 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3487 | |
3488 | if (try_module_get(THIS_MODULE)) |
3489 | return xprt; |
3490 | ret = ERR_PTR(error: -EINVAL); |
3491 | out_err: |
3492 | xs_xprt_free(xprt); |
3493 | return ret; |
3494 | } |
3495 | |
3496 | /** |
3497 | * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket |
3498 | * @args: rpc transport creation arguments |
3499 | * |
3500 | */ |
3501 | static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) |
3502 | { |
3503 | struct sockaddr *addr = args->dstaddr; |
3504 | struct rpc_xprt *xprt; |
3505 | struct sock_xprt *transport; |
3506 | struct svc_sock *bc_sock; |
3507 | struct rpc_xprt *ret; |
3508 | |
3509 | xprt = xs_setup_xprt(args, slot_table_size: xprt_tcp_slot_table_entries, |
3510 | max_slot_table_size: xprt_tcp_slot_table_entries); |
3511 | if (IS_ERR(ptr: xprt)) |
3512 | return xprt; |
3513 | transport = container_of(xprt, struct sock_xprt, xprt); |
3514 | |
3515 | xprt->prot = IPPROTO_TCP; |
3516 | xprt->xprt_class = &xs_bc_tcp_transport; |
3517 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
3518 | xprt->timeout = &xs_tcp_default_timeout; |
3519 | |
3520 | /* backchannel */ |
3521 | xprt_set_bound(xprt); |
3522 | xprt->bind_timeout = 0; |
3523 | xprt->reestablish_timeout = 0; |
3524 | xprt->idle_timeout = 0; |
3525 | |
3526 | xprt->ops = &bc_tcp_ops; |
3527 | |
3528 | switch (addr->sa_family) { |
3529 | case AF_INET: |
3530 | xs_format_peer_addresses(xprt, protocol: "tcp" , |
3531 | RPCBIND_NETID_TCP); |
3532 | break; |
3533 | case AF_INET6: |
3534 | xs_format_peer_addresses(xprt, protocol: "tcp" , |
3535 | RPCBIND_NETID_TCP6); |
3536 | break; |
3537 | default: |
3538 | ret = ERR_PTR(error: -EAFNOSUPPORT); |
3539 | goto out_err; |
3540 | } |
3541 | |
3542 | dprintk("RPC: set up xprt to %s (port %s) via %s\n" , |
3543 | xprt->address_strings[RPC_DISPLAY_ADDR], |
3544 | xprt->address_strings[RPC_DISPLAY_PORT], |
3545 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
3546 | |
3547 | /* |
3548 | * Once we've associated a backchannel xprt with a connection, |
3549 | * we want to keep it around as long as the connection lasts, |
3550 | * in case we need to start using it for a backchannel again; |
3551 | * this reference won't be dropped until bc_xprt is destroyed. |
3552 | */ |
3553 | xprt_get(xprt); |
3554 | args->bc_xprt->xpt_bc_xprt = xprt; |
3555 | xprt->bc_xprt = args->bc_xprt; |
3556 | bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); |
3557 | transport->sock = bc_sock->sk_sock; |
3558 | transport->inet = bc_sock->sk_sk; |
3559 | |
3560 | /* |
3561 | * Since we don't want connections for the backchannel, we set |
3562 | * the xprt status to connected |
3563 | */ |
3564 | xprt_set_connected(xprt); |
3565 | |
3566 | if (try_module_get(THIS_MODULE)) |
3567 | return xprt; |
3568 | |
3569 | args->bc_xprt->xpt_bc_xprt = NULL; |
3570 | args->bc_xprt->xpt_bc_xps = NULL; |
3571 | xprt_put(xprt); |
3572 | ret = ERR_PTR(error: -EINVAL); |
3573 | out_err: |
3574 | xs_xprt_free(xprt); |
3575 | return ret; |
3576 | } |
3577 | |
3578 | static struct xprt_class xs_local_transport = { |
3579 | .list = LIST_HEAD_INIT(xs_local_transport.list), |
3580 | .name = "named UNIX socket" , |
3581 | .owner = THIS_MODULE, |
3582 | .ident = XPRT_TRANSPORT_LOCAL, |
3583 | .setup = xs_setup_local, |
3584 | .netid = { "" }, |
3585 | }; |
3586 | |
3587 | static struct xprt_class xs_udp_transport = { |
3588 | .list = LIST_HEAD_INIT(xs_udp_transport.list), |
3589 | .name = "udp" , |
3590 | .owner = THIS_MODULE, |
3591 | .ident = XPRT_TRANSPORT_UDP, |
3592 | .setup = xs_setup_udp, |
3593 | .netid = { "udp" , "udp6" , "" }, |
3594 | }; |
3595 | |
3596 | static struct xprt_class xs_tcp_transport = { |
3597 | .list = LIST_HEAD_INIT(xs_tcp_transport.list), |
3598 | .name = "tcp" , |
3599 | .owner = THIS_MODULE, |
3600 | .ident = XPRT_TRANSPORT_TCP, |
3601 | .setup = xs_setup_tcp, |
3602 | .netid = { "tcp" , "tcp6" , "" }, |
3603 | }; |
3604 | |
3605 | static struct xprt_class xs_tcp_tls_transport = { |
3606 | .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list), |
3607 | .name = "tcp-with-tls" , |
3608 | .owner = THIS_MODULE, |
3609 | .ident = XPRT_TRANSPORT_TCP_TLS, |
3610 | .setup = xs_setup_tcp_tls, |
3611 | .netid = { "tcp" , "tcp6" , "" }, |
3612 | }; |
3613 | |
3614 | static struct xprt_class xs_bc_tcp_transport = { |
3615 | .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), |
3616 | .name = "tcp NFSv4.1 backchannel" , |
3617 | .owner = THIS_MODULE, |
3618 | .ident = XPRT_TRANSPORT_BC_TCP, |
3619 | .setup = xs_setup_bc_tcp, |
3620 | .netid = { "" }, |
3621 | }; |
3622 | |
3623 | /** |
3624 | * init_socket_xprt - set up xprtsock's sysctls, register with RPC client |
3625 | * |
3626 | */ |
3627 | int init_socket_xprt(void) |
3628 | { |
3629 | if (!sunrpc_table_header) |
3630 | sunrpc_table_header = register_sysctl("sunrpc" , xs_tunables_table); |
3631 | |
3632 | xprt_register_transport(type: &xs_local_transport); |
3633 | xprt_register_transport(type: &xs_udp_transport); |
3634 | xprt_register_transport(type: &xs_tcp_transport); |
3635 | xprt_register_transport(type: &xs_tcp_tls_transport); |
3636 | xprt_register_transport(type: &xs_bc_tcp_transport); |
3637 | |
3638 | return 0; |
3639 | } |
3640 | |
3641 | /** |
3642 | * cleanup_socket_xprt - remove xprtsock's sysctls, unregister |
3643 | * |
3644 | */ |
3645 | void cleanup_socket_xprt(void) |
3646 | { |
3647 | if (sunrpc_table_header) { |
3648 | unregister_sysctl_table(table: sunrpc_table_header); |
3649 | sunrpc_table_header = NULL; |
3650 | } |
3651 | |
3652 | xprt_unregister_transport(type: &xs_local_transport); |
3653 | xprt_unregister_transport(type: &xs_udp_transport); |
3654 | xprt_unregister_transport(type: &xs_tcp_transport); |
3655 | xprt_unregister_transport(type: &xs_tcp_tls_transport); |
3656 | xprt_unregister_transport(type: &xs_bc_tcp_transport); |
3657 | } |
3658 | |
3659 | static int param_set_portnr(const char *val, const struct kernel_param *kp) |
3660 | { |
3661 | return param_set_uint_minmax(val, kp, |
3662 | RPC_MIN_RESVPORT, |
3663 | RPC_MAX_RESVPORT); |
3664 | } |
3665 | |
3666 | static const struct kernel_param_ops param_ops_portnr = { |
3667 | .set = param_set_portnr, |
3668 | .get = param_get_uint, |
3669 | }; |
3670 | |
3671 | #define param_check_portnr(name, p) \ |
3672 | __param_check(name, p, unsigned int); |
3673 | |
3674 | module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); |
3675 | module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); |
3676 | |
3677 | static int param_set_slot_table_size(const char *val, |
3678 | const struct kernel_param *kp) |
3679 | { |
3680 | return param_set_uint_minmax(val, kp, |
3681 | RPC_MIN_SLOT_TABLE, |
3682 | RPC_MAX_SLOT_TABLE); |
3683 | } |
3684 | |
3685 | static const struct kernel_param_ops param_ops_slot_table_size = { |
3686 | .set = param_set_slot_table_size, |
3687 | .get = param_get_uint, |
3688 | }; |
3689 | |
3690 | #define param_check_slot_table_size(name, p) \ |
3691 | __param_check(name, p, unsigned int); |
3692 | |
3693 | static int param_set_max_slot_table_size(const char *val, |
3694 | const struct kernel_param *kp) |
3695 | { |
3696 | return param_set_uint_minmax(val, kp, |
3697 | RPC_MIN_SLOT_TABLE, |
3698 | RPC_MAX_SLOT_TABLE_LIMIT); |
3699 | } |
3700 | |
3701 | static const struct kernel_param_ops param_ops_max_slot_table_size = { |
3702 | .set = param_set_max_slot_table_size, |
3703 | .get = param_get_uint, |
3704 | }; |
3705 | |
3706 | #define param_check_max_slot_table_size(name, p) \ |
3707 | __param_check(name, p, unsigned int); |
3708 | |
3709 | module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, |
3710 | slot_table_size, 0644); |
3711 | module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, |
3712 | max_slot_table_size, 0644); |
3713 | module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, |
3714 | slot_table_size, 0644); |
3715 | |