1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | * |
6 | * This software is available to you under a choice of one of two |
7 | * licenses. You may choose to be licensed under the terms of the GNU |
8 | * General Public License (GPL) Version 2, available from the file |
9 | * COPYING in the main directory of this source tree, or the BSD-type |
10 | * license below: |
11 | * |
12 | * Redistribution and use in source and binary forms, with or without |
13 | * modification, are permitted provided that the following conditions |
14 | * are met: |
15 | * |
16 | * Redistributions of source code must retain the above copyright |
17 | * notice, this list of conditions and the following disclaimer. |
18 | * |
19 | * Redistributions in binary form must reproduce the above |
20 | * copyright notice, this list of conditions and the following |
21 | * disclaimer in the documentation and/or other materials provided |
22 | * with the distribution. |
23 | * |
24 | * Neither the name of the Network Appliance, Inc. nor the names of |
25 | * its contributors may be used to endorse or promote products |
26 | * derived from this software without specific prior written |
27 | * permission. |
28 | * |
29 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
30 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
31 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
32 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
33 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
34 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
35 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
36 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
37 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
38 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
39 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
40 | */ |
41 | |
42 | /* |
43 | * transport.c |
44 | * |
45 | * This file contains the top-level implementation of an RPC RDMA |
46 | * transport. |
47 | * |
48 | * Naming convention: functions beginning with xprt_ are part of the |
49 | * transport switch. All others are RPC RDMA internal. |
50 | */ |
51 | |
52 | #include <linux/module.h> |
53 | #include <linux/slab.h> |
54 | #include <linux/seq_file.h> |
55 | #include <linux/smp.h> |
56 | |
57 | #include <linux/sunrpc/addr.h> |
58 | #include <linux/sunrpc/svc_rdma.h> |
59 | |
60 | #include "xprt_rdma.h" |
61 | #include <trace/events/rpcrdma.h> |
62 | |
63 | /* |
64 | * tunables |
65 | */ |
66 | |
67 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
68 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
69 | unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
70 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; |
71 | int xprt_rdma_pad_optimize; |
72 | static struct xprt_class xprt_rdma; |
73 | |
74 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
75 | |
76 | static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; |
77 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; |
78 | static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; |
79 | static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; |
80 | static unsigned int max_padding = PAGE_SIZE; |
81 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; |
82 | static unsigned int max_memreg = RPCRDMA_LAST - 1; |
83 | static unsigned int dummy; |
84 | |
85 | static struct ctl_table_header *; |
86 | |
87 | static struct ctl_table xr_tunables_table[] = { |
88 | { |
89 | .procname = "rdma_slot_table_entries" , |
90 | .data = &xprt_rdma_slot_table_entries, |
91 | .maxlen = sizeof(unsigned int), |
92 | .mode = 0644, |
93 | .proc_handler = proc_dointvec_minmax, |
94 | .extra1 = &min_slot_table_size, |
95 | .extra2 = &max_slot_table_size |
96 | }, |
97 | { |
98 | .procname = "rdma_max_inline_read" , |
99 | .data = &xprt_rdma_max_inline_read, |
100 | .maxlen = sizeof(unsigned int), |
101 | .mode = 0644, |
102 | .proc_handler = proc_dointvec_minmax, |
103 | .extra1 = &min_inline_size, |
104 | .extra2 = &max_inline_size, |
105 | }, |
106 | { |
107 | .procname = "rdma_max_inline_write" , |
108 | .data = &xprt_rdma_max_inline_write, |
109 | .maxlen = sizeof(unsigned int), |
110 | .mode = 0644, |
111 | .proc_handler = proc_dointvec_minmax, |
112 | .extra1 = &min_inline_size, |
113 | .extra2 = &max_inline_size, |
114 | }, |
115 | { |
116 | .procname = "rdma_inline_write_padding" , |
117 | .data = &dummy, |
118 | .maxlen = sizeof(unsigned int), |
119 | .mode = 0644, |
120 | .proc_handler = proc_dointvec_minmax, |
121 | .extra1 = SYSCTL_ZERO, |
122 | .extra2 = &max_padding, |
123 | }, |
124 | { |
125 | .procname = "rdma_memreg_strategy" , |
126 | .data = &xprt_rdma_memreg_strategy, |
127 | .maxlen = sizeof(unsigned int), |
128 | .mode = 0644, |
129 | .proc_handler = proc_dointvec_minmax, |
130 | .extra1 = &min_memreg, |
131 | .extra2 = &max_memreg, |
132 | }, |
133 | { |
134 | .procname = "rdma_pad_optimize" , |
135 | .data = &xprt_rdma_pad_optimize, |
136 | .maxlen = sizeof(unsigned int), |
137 | .mode = 0644, |
138 | .proc_handler = proc_dointvec, |
139 | }, |
140 | { }, |
141 | }; |
142 | |
143 | #endif |
144 | |
145 | static const struct rpc_xprt_ops xprt_rdma_procs; |
146 | |
147 | static void |
148 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) |
149 | { |
150 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; |
151 | char buf[20]; |
152 | |
153 | snprintf(buf, size: sizeof(buf), fmt: "%08x" , ntohl(sin->sin_addr.s_addr)); |
154 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(s: buf, GFP_KERNEL); |
155 | |
156 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; |
157 | } |
158 | |
159 | static void |
160 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) |
161 | { |
162 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; |
163 | char buf[40]; |
164 | |
165 | snprintf(buf, size: sizeof(buf), fmt: "%pi6" , &sin6->sin6_addr); |
166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(s: buf, GFP_KERNEL); |
167 | |
168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; |
169 | } |
170 | |
171 | void |
172 | xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) |
173 | { |
174 | char buf[128]; |
175 | |
176 | switch (sap->sa_family) { |
177 | case AF_INET: |
178 | xprt_rdma_format_addresses4(xprt, sap); |
179 | break; |
180 | case AF_INET6: |
181 | xprt_rdma_format_addresses6(xprt, sap); |
182 | break; |
183 | default: |
184 | pr_err("rpcrdma: Unrecognized address family\n" ); |
185 | return; |
186 | } |
187 | |
188 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
189 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(s: buf, GFP_KERNEL); |
190 | |
191 | snprintf(buf, size: sizeof(buf), fmt: "%u" , rpc_get_port(sap)); |
192 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(s: buf, GFP_KERNEL); |
193 | |
194 | snprintf(buf, size: sizeof(buf), fmt: "%4hx" , rpc_get_port(sap)); |
195 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(s: buf, GFP_KERNEL); |
196 | |
197 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma" ; |
198 | } |
199 | |
200 | void |
201 | xprt_rdma_free_addresses(struct rpc_xprt *xprt) |
202 | { |
203 | unsigned int i; |
204 | |
205 | for (i = 0; i < RPC_DISPLAY_MAX; i++) |
206 | switch (i) { |
207 | case RPC_DISPLAY_PROTO: |
208 | case RPC_DISPLAY_NETID: |
209 | continue; |
210 | default: |
211 | kfree(objp: xprt->address_strings[i]); |
212 | } |
213 | } |
214 | |
215 | /** |
216 | * xprt_rdma_connect_worker - establish connection in the background |
217 | * @work: worker thread context |
218 | * |
219 | * Requester holds the xprt's send lock to prevent activity on this |
220 | * transport while a fresh connection is being established. RPC tasks |
221 | * sleep on the xprt's pending queue waiting for connect to complete. |
222 | */ |
223 | static void |
224 | xprt_rdma_connect_worker(struct work_struct *work) |
225 | { |
226 | struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, |
227 | rx_connect_worker.work); |
228 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
229 | unsigned int pflags = current->flags; |
230 | int rc; |
231 | |
232 | if (atomic_read(v: &xprt->swapper)) |
233 | current->flags |= PF_MEMALLOC; |
234 | rc = rpcrdma_xprt_connect(r_xprt); |
235 | xprt_clear_connecting(xprt); |
236 | if (!rc) { |
237 | xprt->connect_cookie++; |
238 | xprt->stat.connect_count++; |
239 | xprt->stat.connect_time += (long)jiffies - |
240 | xprt->stat.connect_start; |
241 | xprt_set_connected(xprt); |
242 | rc = -EAGAIN; |
243 | } else |
244 | rpcrdma_xprt_disconnect(r_xprt); |
245 | xprt_unlock_connect(xprt, r_xprt); |
246 | xprt_wake_pending_tasks(xprt, status: rc); |
247 | current_restore_flags(orig_flags: pflags, PF_MEMALLOC); |
248 | } |
249 | |
250 | /** |
251 | * xprt_rdma_inject_disconnect - inject a connection fault |
252 | * @xprt: transport context |
253 | * |
254 | * If @xprt is connected, disconnect it to simulate spurious |
255 | * connection loss. Caller must hold @xprt's send lock to |
256 | * ensure that data structures and hardware resources are |
257 | * stable during the rdma_disconnect() call. |
258 | */ |
259 | static void |
260 | xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) |
261 | { |
262 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
263 | |
264 | trace_xprtrdma_op_inject_dsc(r_xprt); |
265 | rdma_disconnect(id: r_xprt->rx_ep->re_id); |
266 | } |
267 | |
268 | /** |
269 | * xprt_rdma_destroy - Full tear down of transport |
270 | * @xprt: doomed transport context |
271 | * |
272 | * Caller guarantees there will be no more calls to us with |
273 | * this @xprt. |
274 | */ |
275 | static void |
276 | xprt_rdma_destroy(struct rpc_xprt *xprt) |
277 | { |
278 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
279 | |
280 | cancel_delayed_work_sync(dwork: &r_xprt->rx_connect_worker); |
281 | |
282 | rpcrdma_xprt_disconnect(r_xprt); |
283 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
284 | |
285 | xprt_rdma_free_addresses(xprt); |
286 | xprt_free(xprt); |
287 | |
288 | module_put(THIS_MODULE); |
289 | } |
290 | |
291 | /* 60 second timeout, no retries */ |
292 | static const struct rpc_timeout xprt_rdma_default_timeout = { |
293 | .to_initval = 60 * HZ, |
294 | .to_maxval = 60 * HZ, |
295 | }; |
296 | |
297 | /** |
298 | * xprt_setup_rdma - Set up transport to use RDMA |
299 | * |
300 | * @args: rpc transport arguments |
301 | */ |
302 | static struct rpc_xprt * |
303 | xprt_setup_rdma(struct xprt_create *args) |
304 | { |
305 | struct rpc_xprt *xprt; |
306 | struct rpcrdma_xprt *new_xprt; |
307 | struct sockaddr *sap; |
308 | int rc; |
309 | |
310 | if (args->addrlen > sizeof(xprt->addr)) |
311 | return ERR_PTR(error: -EBADF); |
312 | |
313 | if (!try_module_get(THIS_MODULE)) |
314 | return ERR_PTR(error: -EIO); |
315 | |
316 | xprt = xprt_alloc(net: args->net, size: sizeof(struct rpcrdma_xprt), num_prealloc: 0, |
317 | max_req: xprt_rdma_slot_table_entries); |
318 | if (!xprt) { |
319 | module_put(THIS_MODULE); |
320 | return ERR_PTR(error: -ENOMEM); |
321 | } |
322 | |
323 | xprt->timeout = &xprt_rdma_default_timeout; |
324 | xprt->connect_timeout = xprt->timeout->to_initval; |
325 | xprt->max_reconnect_timeout = xprt->timeout->to_maxval; |
326 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
327 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
328 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; |
329 | |
330 | xprt->resvport = 0; /* privileged port not needed */ |
331 | xprt->ops = &xprt_rdma_procs; |
332 | |
333 | /* |
334 | * Set up RDMA-specific connect data. |
335 | */ |
336 | sap = args->dstaddr; |
337 | |
338 | /* Ensure xprt->addr holds valid server TCP (not RDMA) |
339 | * address, for any side protocols which peek at it */ |
340 | xprt->prot = IPPROTO_TCP; |
341 | xprt->xprt_class = &xprt_rdma; |
342 | xprt->addrlen = args->addrlen; |
343 | memcpy(&xprt->addr, sap, xprt->addrlen); |
344 | |
345 | if (rpc_get_port(sap)) |
346 | xprt_set_bound(xprt); |
347 | xprt_rdma_format_addresses(xprt, sap); |
348 | |
349 | new_xprt = rpcx_to_rdmax(xprt); |
350 | rc = rpcrdma_buffer_create(new_xprt); |
351 | if (rc) { |
352 | xprt_rdma_free_addresses(xprt); |
353 | xprt_free(xprt); |
354 | module_put(THIS_MODULE); |
355 | return ERR_PTR(error: rc); |
356 | } |
357 | |
358 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
359 | xprt_rdma_connect_worker); |
360 | |
361 | xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; |
362 | |
363 | return xprt; |
364 | } |
365 | |
366 | /** |
367 | * xprt_rdma_close - close a transport connection |
368 | * @xprt: transport context |
369 | * |
370 | * Called during autoclose or device removal. |
371 | * |
372 | * Caller holds @xprt's send lock to prevent activity on this |
373 | * transport while the connection is torn down. |
374 | */ |
375 | void xprt_rdma_close(struct rpc_xprt *xprt) |
376 | { |
377 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
378 | |
379 | rpcrdma_xprt_disconnect(r_xprt); |
380 | |
381 | xprt->reestablish_timeout = 0; |
382 | ++xprt->connect_cookie; |
383 | xprt_disconnect_done(xprt); |
384 | } |
385 | |
386 | /** |
387 | * xprt_rdma_set_port - update server port with rpcbind result |
388 | * @xprt: controlling RPC transport |
389 | * @port: new port value |
390 | * |
391 | * Transport connect status is unchanged. |
392 | */ |
393 | static void |
394 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) |
395 | { |
396 | struct sockaddr *sap = (struct sockaddr *)&xprt->addr; |
397 | char buf[8]; |
398 | |
399 | rpc_set_port(sap, port); |
400 | |
401 | kfree(objp: xprt->address_strings[RPC_DISPLAY_PORT]); |
402 | snprintf(buf, size: sizeof(buf), fmt: "%u" , port); |
403 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(s: buf, GFP_KERNEL); |
404 | |
405 | kfree(objp: xprt->address_strings[RPC_DISPLAY_HEX_PORT]); |
406 | snprintf(buf, size: sizeof(buf), fmt: "%4hx" , port); |
407 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(s: buf, GFP_KERNEL); |
408 | } |
409 | |
410 | /** |
411 | * xprt_rdma_timer - invoked when an RPC times out |
412 | * @xprt: controlling RPC transport |
413 | * @task: RPC task that timed out |
414 | * |
415 | * Invoked when the transport is still connected, but an RPC |
416 | * retransmit timeout occurs. |
417 | * |
418 | * Since RDMA connections don't have a keep-alive, forcibly |
419 | * disconnect and retry to connect. This drives full |
420 | * detection of the network path, and retransmissions of |
421 | * all pending RPCs. |
422 | */ |
423 | static void |
424 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) |
425 | { |
426 | xprt_force_disconnect(xprt); |
427 | } |
428 | |
429 | /** |
430 | * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection |
431 | * @xprt: controlling transport instance |
432 | * @connect_timeout: reconnect timeout after client disconnects |
433 | * @reconnect_timeout: reconnect timeout after server disconnects |
434 | * |
435 | */ |
436 | static void xprt_rdma_set_connect_timeout(struct rpc_xprt *xprt, |
437 | unsigned long connect_timeout, |
438 | unsigned long reconnect_timeout) |
439 | { |
440 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
441 | |
442 | trace_xprtrdma_op_set_cto(r_xprt, connect: connect_timeout, reconnect: reconnect_timeout); |
443 | |
444 | spin_lock(lock: &xprt->transport_lock); |
445 | |
446 | if (connect_timeout < xprt->connect_timeout) { |
447 | struct rpc_timeout to; |
448 | unsigned long initval; |
449 | |
450 | to = *xprt->timeout; |
451 | initval = connect_timeout; |
452 | if (initval < RPCRDMA_INIT_REEST_TO << 1) |
453 | initval = RPCRDMA_INIT_REEST_TO << 1; |
454 | to.to_initval = initval; |
455 | to.to_maxval = initval; |
456 | r_xprt->rx_timeout = to; |
457 | xprt->timeout = &r_xprt->rx_timeout; |
458 | xprt->connect_timeout = connect_timeout; |
459 | } |
460 | |
461 | if (reconnect_timeout < xprt->max_reconnect_timeout) |
462 | xprt->max_reconnect_timeout = reconnect_timeout; |
463 | |
464 | spin_unlock(lock: &xprt->transport_lock); |
465 | } |
466 | |
467 | /** |
468 | * xprt_rdma_connect - schedule an attempt to reconnect |
469 | * @xprt: transport state |
470 | * @task: RPC scheduler context (unused) |
471 | * |
472 | */ |
473 | static void |
474 | xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
475 | { |
476 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
477 | struct rpcrdma_ep *ep = r_xprt->rx_ep; |
478 | unsigned long delay; |
479 | |
480 | WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt)); |
481 | |
482 | delay = 0; |
483 | if (ep && ep->re_connect_status != 0) { |
484 | delay = xprt_reconnect_delay(xprt); |
485 | xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); |
486 | } |
487 | trace_xprtrdma_op_connect(r_xprt, delay); |
488 | queue_delayed_work(wq: system_long_wq, dwork: &r_xprt->rx_connect_worker, delay); |
489 | } |
490 | |
491 | /** |
492 | * xprt_rdma_alloc_slot - allocate an rpc_rqst |
493 | * @xprt: controlling RPC transport |
494 | * @task: RPC task requesting a fresh rpc_rqst |
495 | * |
496 | * tk_status values: |
497 | * %0 if task->tk_rqstp points to a fresh rpc_rqst |
498 | * %-EAGAIN if no rpc_rqst is available; queued on backlog |
499 | */ |
500 | static void |
501 | xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) |
502 | { |
503 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
504 | struct rpcrdma_req *req; |
505 | |
506 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); |
507 | if (!req) |
508 | goto out_sleep; |
509 | task->tk_rqstp = &req->rl_slot; |
510 | task->tk_status = 0; |
511 | return; |
512 | |
513 | out_sleep: |
514 | task->tk_status = -ENOMEM; |
515 | xprt_add_backlog(xprt, task); |
516 | } |
517 | |
518 | /** |
519 | * xprt_rdma_free_slot - release an rpc_rqst |
520 | * @xprt: controlling RPC transport |
521 | * @rqst: rpc_rqst to release |
522 | * |
523 | */ |
524 | static void |
525 | xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) |
526 | { |
527 | struct rpcrdma_xprt *r_xprt = |
528 | container_of(xprt, struct rpcrdma_xprt, rx_xprt); |
529 | |
530 | rpcrdma_reply_put(buffers: &r_xprt->rx_buf, req: rpcr_to_rdmar(rqst)); |
531 | if (!xprt_wake_up_backlog(xprt, req: rqst)) { |
532 | memset(rqst, 0, sizeof(*rqst)); |
533 | rpcrdma_buffer_put(buffers: &r_xprt->rx_buf, req: rpcr_to_rdmar(rqst)); |
534 | } |
535 | } |
536 | |
537 | static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, |
538 | struct rpcrdma_regbuf *rb, size_t size, |
539 | gfp_t flags) |
540 | { |
541 | if (unlikely(rdmab_length(rb) < size)) { |
542 | if (!rpcrdma_regbuf_realloc(rb, size, flags)) |
543 | return false; |
544 | r_xprt->rx_stats.hardway_register_count += size; |
545 | } |
546 | return true; |
547 | } |
548 | |
549 | /** |
550 | * xprt_rdma_allocate - allocate transport resources for an RPC |
551 | * @task: RPC task |
552 | * |
553 | * Return values: |
554 | * 0: Success; rq_buffer points to RPC buffer to use |
555 | * ENOMEM: Out of memory, call again later |
556 | * EIO: A permanent error occurred, do not retry |
557 | */ |
558 | static int |
559 | xprt_rdma_allocate(struct rpc_task *task) |
560 | { |
561 | struct rpc_rqst *rqst = task->tk_rqstp; |
562 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
563 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
564 | gfp_t flags = rpc_task_gfp_mask(); |
565 | |
566 | if (!rpcrdma_check_regbuf(r_xprt, rb: req->rl_sendbuf, size: rqst->rq_callsize, |
567 | flags)) |
568 | goto out_fail; |
569 | if (!rpcrdma_check_regbuf(r_xprt, rb: req->rl_recvbuf, size: rqst->rq_rcvsize, |
570 | flags)) |
571 | goto out_fail; |
572 | |
573 | rqst->rq_buffer = rdmab_data(rb: req->rl_sendbuf); |
574 | rqst->rq_rbuffer = rdmab_data(rb: req->rl_recvbuf); |
575 | return 0; |
576 | |
577 | out_fail: |
578 | return -ENOMEM; |
579 | } |
580 | |
581 | /** |
582 | * xprt_rdma_free - release resources allocated by xprt_rdma_allocate |
583 | * @task: RPC task |
584 | * |
585 | * Caller guarantees rqst->rq_buffer is non-NULL. |
586 | */ |
587 | static void |
588 | xprt_rdma_free(struct rpc_task *task) |
589 | { |
590 | struct rpc_rqst *rqst = task->tk_rqstp; |
591 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
592 | |
593 | if (unlikely(!list_empty(&req->rl_registered))) { |
594 | trace_xprtrdma_mrs_zap(task); |
595 | frwr_unmap_sync(rpcx_to_rdmax(rqst->rq_xprt), req); |
596 | } |
597 | |
598 | /* XXX: If the RPC is completing because of a signal and |
599 | * not because a reply was received, we ought to ensure |
600 | * that the Send completion has fired, so that memory |
601 | * involved with the Send is not still visible to the NIC. |
602 | */ |
603 | } |
604 | |
605 | /** |
606 | * xprt_rdma_send_request - marshal and send an RPC request |
607 | * @rqst: RPC message in rq_snd_buf |
608 | * |
609 | * Caller holds the transport's write lock. |
610 | * |
611 | * Returns: |
612 | * %0 if the RPC message has been sent |
613 | * %-ENOTCONN if the caller should reconnect and call again |
614 | * %-EAGAIN if the caller should call again |
615 | * %-ENOBUFS if the caller should call again after a delay |
616 | * %-EMSGSIZE if encoding ran out of buffer space. The request |
617 | * was not sent. Do not try to send this message again. |
618 | * %-EIO if an I/O error occurred. The request was not sent. |
619 | * Do not try to send this message again. |
620 | */ |
621 | static int |
622 | xprt_rdma_send_request(struct rpc_rqst *rqst) |
623 | { |
624 | struct rpc_xprt *xprt = rqst->rq_xprt; |
625 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
626 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
627 | int rc = 0; |
628 | |
629 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
630 | if (unlikely(!rqst->rq_buffer)) |
631 | return xprt_rdma_bc_send_reply(rqst); |
632 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
633 | |
634 | if (!xprt_connected(xprt)) |
635 | return -ENOTCONN; |
636 | |
637 | if (!xprt_request_get_cong(xprt, req: rqst)) |
638 | return -EBADSLT; |
639 | |
640 | rc = rpcrdma_marshal_req(r_xprt, rqst); |
641 | if (rc < 0) |
642 | goto failed_marshal; |
643 | |
644 | /* Must suppress retransmit to maintain credits */ |
645 | if (rqst->rq_connect_cookie == xprt->connect_cookie) |
646 | goto drop_connection; |
647 | rqst->rq_xtime = ktime_get(); |
648 | |
649 | if (frwr_send(r_xprt, req)) |
650 | goto drop_connection; |
651 | |
652 | rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; |
653 | |
654 | /* An RPC with no reply will throw off credit accounting, |
655 | * so drop the connection to reset the credit grant. |
656 | */ |
657 | if (!rpc_reply_expected(task: rqst->rq_task)) |
658 | goto drop_connection; |
659 | return 0; |
660 | |
661 | failed_marshal: |
662 | if (rc != -ENOTCONN) |
663 | return rc; |
664 | drop_connection: |
665 | xprt_rdma_close(xprt); |
666 | return -ENOTCONN; |
667 | } |
668 | |
669 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
670 | { |
671 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
672 | long idle_time = 0; |
673 | |
674 | if (xprt_connected(xprt)) |
675 | idle_time = (long)(jiffies - xprt->last_used) / HZ; |
676 | |
677 | seq_puts(m: seq, s: "\txprt:\trdma " ); |
678 | seq_printf(m: seq, fmt: "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu " , |
679 | 0, /* need a local port? */ |
680 | xprt->stat.bind_count, |
681 | xprt->stat.connect_count, |
682 | xprt->stat.connect_time / HZ, |
683 | idle_time, |
684 | xprt->stat.sends, |
685 | xprt->stat.recvs, |
686 | xprt->stat.bad_xids, |
687 | xprt->stat.req_u, |
688 | xprt->stat.bklog_u); |
689 | seq_printf(m: seq, fmt: "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu " , |
690 | r_xprt->rx_stats.read_chunk_count, |
691 | r_xprt->rx_stats.write_chunk_count, |
692 | r_xprt->rx_stats.reply_chunk_count, |
693 | r_xprt->rx_stats.total_rdma_request, |
694 | r_xprt->rx_stats.total_rdma_reply, |
695 | r_xprt->rx_stats.pullup_copy_count, |
696 | r_xprt->rx_stats.fixup_copy_count, |
697 | r_xprt->rx_stats.hardway_register_count, |
698 | r_xprt->rx_stats.failed_marshal_count, |
699 | r_xprt->rx_stats.bad_reply_count, |
700 | r_xprt->rx_stats.nomsg_call_count); |
701 | seq_printf(m: seq, fmt: "%lu %lu %lu %lu %lu %lu\n" , |
702 | r_xprt->rx_stats.mrs_recycled, |
703 | r_xprt->rx_stats.mrs_orphaned, |
704 | r_xprt->rx_stats.mrs_allocated, |
705 | r_xprt->rx_stats.local_inv_needed, |
706 | r_xprt->rx_stats.empty_sendctx_q, |
707 | r_xprt->rx_stats.reply_waits_for_send); |
708 | } |
709 | |
710 | static int |
711 | xprt_rdma_enable_swap(struct rpc_xprt *xprt) |
712 | { |
713 | return 0; |
714 | } |
715 | |
716 | static void |
717 | xprt_rdma_disable_swap(struct rpc_xprt *xprt) |
718 | { |
719 | } |
720 | |
721 | /* |
722 | * Plumbing for rpc transport switch and kernel module |
723 | */ |
724 | |
725 | static const struct rpc_xprt_ops xprt_rdma_procs = { |
726 | .reserve_xprt = xprt_reserve_xprt_cong, |
727 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
728 | .alloc_slot = xprt_rdma_alloc_slot, |
729 | .free_slot = xprt_rdma_free_slot, |
730 | .release_request = xprt_release_rqst_cong, /* ditto */ |
731 | .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */ |
732 | .timer = xprt_rdma_timer, |
733 | .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ |
734 | .set_port = xprt_rdma_set_port, |
735 | .connect = xprt_rdma_connect, |
736 | .buf_alloc = xprt_rdma_allocate, |
737 | .buf_free = xprt_rdma_free, |
738 | .send_request = xprt_rdma_send_request, |
739 | .close = xprt_rdma_close, |
740 | .destroy = xprt_rdma_destroy, |
741 | .set_connect_timeout = xprt_rdma_set_connect_timeout, |
742 | .print_stats = xprt_rdma_print_stats, |
743 | .enable_swap = xprt_rdma_enable_swap, |
744 | .disable_swap = xprt_rdma_disable_swap, |
745 | .inject_disconnect = xprt_rdma_inject_disconnect, |
746 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
747 | .bc_setup = xprt_rdma_bc_setup, |
748 | .bc_maxpayload = xprt_rdma_bc_maxpayload, |
749 | .bc_num_slots = xprt_rdma_bc_max_slots, |
750 | .bc_free_rqst = xprt_rdma_bc_free_rqst, |
751 | .bc_destroy = xprt_rdma_bc_destroy, |
752 | #endif |
753 | }; |
754 | |
755 | static struct xprt_class xprt_rdma = { |
756 | .list = LIST_HEAD_INIT(xprt_rdma.list), |
757 | .name = "rdma" , |
758 | .owner = THIS_MODULE, |
759 | .ident = XPRT_TRANSPORT_RDMA, |
760 | .setup = xprt_setup_rdma, |
761 | .netid = { "rdma" , "rdma6" , "" }, |
762 | }; |
763 | |
764 | void xprt_rdma_cleanup(void) |
765 | { |
766 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
767 | if (sunrpc_table_header) { |
768 | unregister_sysctl_table(table: sunrpc_table_header); |
769 | sunrpc_table_header = NULL; |
770 | } |
771 | #endif |
772 | |
773 | xprt_unregister_transport(type: &xprt_rdma); |
774 | xprt_unregister_transport(type: &xprt_rdma_bc); |
775 | } |
776 | |
777 | int xprt_rdma_init(void) |
778 | { |
779 | int rc; |
780 | |
781 | rc = xprt_register_transport(type: &xprt_rdma); |
782 | if (rc) |
783 | return rc; |
784 | |
785 | rc = xprt_register_transport(type: &xprt_rdma_bc); |
786 | if (rc) { |
787 | xprt_unregister_transport(type: &xprt_rdma); |
788 | return rc; |
789 | } |
790 | |
791 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
792 | if (!sunrpc_table_header) |
793 | sunrpc_table_header = register_sysctl("sunrpc" , xr_tunables_table); |
794 | #endif |
795 | return 0; |
796 | } |
797 | |