1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright (c) 2016-2018 Oracle. All rights reserved. |
4 | * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. |
5 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. |
6 | * |
7 | * This software is available to you under a choice of one of two |
8 | * licenses. You may choose to be licensed under the terms of the GNU |
9 | * General Public License (GPL) Version 2, available from the file |
10 | * COPYING in the main directory of this source tree, or the BSD-type |
11 | * license below: |
12 | * |
13 | * Redistribution and use in source and binary forms, with or without |
14 | * modification, are permitted provided that the following conditions |
15 | * are met: |
16 | * |
17 | * Redistributions of source code must retain the above copyright |
18 | * notice, this list of conditions and the following disclaimer. |
19 | * |
20 | * Redistributions in binary form must reproduce the above |
21 | * copyright notice, this list of conditions and the following |
22 | * disclaimer in the documentation and/or other materials provided |
23 | * with the distribution. |
24 | * |
25 | * Neither the name of the Network Appliance, Inc. nor the names of |
26 | * its contributors may be used to endorse or promote products |
27 | * derived from this software without specific prior written |
28 | * permission. |
29 | * |
30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
31 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
32 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
33 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
34 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
35 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
36 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
37 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
38 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
39 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
40 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
41 | * |
42 | * Author: Tom Tucker <tom@opengridcomputing.com> |
43 | */ |
44 | |
45 | /* Operation |
46 | * |
47 | * The main entry point is svc_rdma_sendto. This is called by the |
48 | * RPC server when an RPC Reply is ready to be transmitted to a client. |
49 | * |
50 | * The passed-in svc_rqst contains a struct xdr_buf which holds an |
51 | * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA |
52 | * transport header, post all Write WRs needed for this Reply, then post |
53 | * a Send WR conveying the transport header and the RPC message itself to |
54 | * the client. |
55 | * |
56 | * svc_rdma_sendto must fully transmit the Reply before returning, as |
57 | * the svc_rqst will be recycled as soon as sendto returns. Remaining |
58 | * resources referred to by the svc_rqst are also recycled at that time. |
59 | * Therefore any resources that must remain longer must be detached |
60 | * from the svc_rqst and released later. |
61 | * |
62 | * Page Management |
63 | * |
64 | * The I/O that performs Reply transmission is asynchronous, and may |
65 | * complete well after sendto returns. Thus pages under I/O must be |
66 | * removed from the svc_rqst before sendto returns. |
67 | * |
68 | * The logic here depends on Send Queue and completion ordering. Since |
69 | * the Send WR is always posted last, it will always complete last. Thus |
70 | * when it completes, it is guaranteed that all previous Write WRs have |
71 | * also completed. |
72 | * |
73 | * Write WRs are constructed and posted. Each Write segment gets its own |
74 | * svc_rdma_rw_ctxt, allowing the Write completion handler to find and |
75 | * DMA-unmap the pages under I/O for that Write segment. The Write |
76 | * completion handler does not release any pages. |
77 | * |
78 | * When the Send WR is constructed, it also gets its own svc_rdma_send_ctxt. |
79 | * The ownership of all of the Reply's pages are transferred into that |
80 | * ctxt, the Send WR is posted, and sendto returns. |
81 | * |
82 | * The svc_rdma_send_ctxt is presented when the Send WR completes. The |
83 | * Send completion handler finally releases the Reply's pages. |
84 | * |
85 | * This mechanism also assumes that completions on the transport's Send |
86 | * Completion Queue do not run in parallel. Otherwise a Write completion |
87 | * and Send completion running at the same time could release pages that |
88 | * are still DMA-mapped. |
89 | * |
90 | * Error Handling |
91 | * |
92 | * - If the Send WR is posted successfully, it will either complete |
93 | * successfully, or get flushed. Either way, the Send completion |
94 | * handler releases the Reply's pages. |
95 | * - If the Send WR cannot be not posted, the forward path releases |
96 | * the Reply's pages. |
97 | * |
98 | * This handles the case, without the use of page reference counting, |
99 | * where two different Write segments send portions of the same page. |
100 | */ |
101 | |
102 | #include <linux/spinlock.h> |
103 | #include <asm/unaligned.h> |
104 | |
105 | #include <rdma/ib_verbs.h> |
106 | #include <rdma/rdma_cm.h> |
107 | |
108 | #include <linux/sunrpc/debug.h> |
109 | #include <linux/sunrpc/svc_rdma.h> |
110 | |
111 | #include "xprt_rdma.h" |
112 | #include <trace/events/rpcrdma.h> |
113 | |
114 | static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc); |
115 | |
116 | static struct svc_rdma_send_ctxt * |
117 | svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) |
118 | { |
119 | int node = ibdev_to_node(ibdev: rdma->sc_cm_id->device); |
120 | struct svc_rdma_send_ctxt *ctxt; |
121 | dma_addr_t addr; |
122 | void *buffer; |
123 | int i; |
124 | |
125 | ctxt = kzalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges), |
126 | GFP_KERNEL, node); |
127 | if (!ctxt) |
128 | goto fail0; |
129 | buffer = kmalloc_node(size: rdma->sc_max_req_size, GFP_KERNEL, node); |
130 | if (!buffer) |
131 | goto fail1; |
132 | addr = ib_dma_map_single(dev: rdma->sc_pd->device, cpu_addr: buffer, |
133 | size: rdma->sc_max_req_size, direction: DMA_TO_DEVICE); |
134 | if (ib_dma_mapping_error(dev: rdma->sc_pd->device, dma_addr: addr)) |
135 | goto fail2; |
136 | |
137 | svc_rdma_send_cid_init(rdma, cid: &ctxt->sc_cid); |
138 | |
139 | ctxt->sc_rdma = rdma; |
140 | ctxt->sc_send_wr.next = NULL; |
141 | ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; |
142 | ctxt->sc_send_wr.sg_list = ctxt->sc_sges; |
143 | ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; |
144 | ctxt->sc_cqe.done = svc_rdma_wc_send; |
145 | ctxt->sc_xprt_buf = buffer; |
146 | xdr_buf_init(buf: &ctxt->sc_hdrbuf, start: ctxt->sc_xprt_buf, |
147 | len: rdma->sc_max_req_size); |
148 | ctxt->sc_sges[0].addr = addr; |
149 | |
150 | for (i = 0; i < rdma->sc_max_send_sges; i++) |
151 | ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey; |
152 | return ctxt; |
153 | |
154 | fail2: |
155 | kfree(objp: buffer); |
156 | fail1: |
157 | kfree(objp: ctxt); |
158 | fail0: |
159 | return NULL; |
160 | } |
161 | |
162 | /** |
163 | * svc_rdma_send_ctxts_destroy - Release all send_ctxt's for an xprt |
164 | * @rdma: svcxprt_rdma being torn down |
165 | * |
166 | */ |
167 | void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma) |
168 | { |
169 | struct svc_rdma_send_ctxt *ctxt; |
170 | struct llist_node *node; |
171 | |
172 | while ((node = llist_del_first(head: &rdma->sc_send_ctxts)) != NULL) { |
173 | ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node); |
174 | ib_dma_unmap_single(dev: rdma->sc_pd->device, |
175 | addr: ctxt->sc_sges[0].addr, |
176 | size: rdma->sc_max_req_size, |
177 | direction: DMA_TO_DEVICE); |
178 | kfree(objp: ctxt->sc_xprt_buf); |
179 | kfree(objp: ctxt); |
180 | } |
181 | } |
182 | |
183 | /** |
184 | * svc_rdma_send_ctxt_get - Get a free send_ctxt |
185 | * @rdma: controlling svcxprt_rdma |
186 | * |
187 | * Returns a ready-to-use send_ctxt, or NULL if none are |
188 | * available and a fresh one cannot be allocated. |
189 | */ |
190 | struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma) |
191 | { |
192 | struct svc_rdma_send_ctxt *ctxt; |
193 | struct llist_node *node; |
194 | |
195 | spin_lock(lock: &rdma->sc_send_lock); |
196 | node = llist_del_first(head: &rdma->sc_send_ctxts); |
197 | spin_unlock(lock: &rdma->sc_send_lock); |
198 | if (!node) |
199 | goto out_empty; |
200 | |
201 | ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node); |
202 | |
203 | out: |
204 | rpcrdma_set_xdrlen(xdr: &ctxt->sc_hdrbuf, len: 0); |
205 | xdr_init_encode(xdr: &ctxt->sc_stream, buf: &ctxt->sc_hdrbuf, |
206 | p: ctxt->sc_xprt_buf, NULL); |
207 | |
208 | svc_rdma_cc_init(rdma, cc: &ctxt->sc_reply_info.wi_cc); |
209 | ctxt->sc_send_wr.num_sge = 0; |
210 | ctxt->sc_cur_sge_no = 0; |
211 | ctxt->sc_page_count = 0; |
212 | ctxt->sc_wr_chain = &ctxt->sc_send_wr; |
213 | ctxt->sc_sqecount = 1; |
214 | |
215 | return ctxt; |
216 | |
217 | out_empty: |
218 | ctxt = svc_rdma_send_ctxt_alloc(rdma); |
219 | if (!ctxt) |
220 | return NULL; |
221 | goto out; |
222 | } |
223 | |
224 | static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, |
225 | struct svc_rdma_send_ctxt *ctxt) |
226 | { |
227 | struct ib_device *device = rdma->sc_cm_id->device; |
228 | unsigned int i; |
229 | |
230 | svc_rdma_reply_chunk_release(rdma, ctxt); |
231 | |
232 | if (ctxt->sc_page_count) |
233 | release_pages(ctxt->sc_pages, nr: ctxt->sc_page_count); |
234 | |
235 | /* The first SGE contains the transport header, which |
236 | * remains mapped until @ctxt is destroyed. |
237 | */ |
238 | for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) { |
239 | trace_svcrdma_dma_unmap_page(cid: &ctxt->sc_cid, |
240 | dma_addr: ctxt->sc_sges[i].addr, |
241 | length: ctxt->sc_sges[i].length); |
242 | ib_dma_unmap_page(dev: device, |
243 | addr: ctxt->sc_sges[i].addr, |
244 | size: ctxt->sc_sges[i].length, |
245 | direction: DMA_TO_DEVICE); |
246 | } |
247 | |
248 | llist_add(new: &ctxt->sc_node, head: &rdma->sc_send_ctxts); |
249 | } |
250 | |
251 | static void svc_rdma_send_ctxt_put_async(struct work_struct *work) |
252 | { |
253 | struct svc_rdma_send_ctxt *ctxt; |
254 | |
255 | ctxt = container_of(work, struct svc_rdma_send_ctxt, sc_work); |
256 | svc_rdma_send_ctxt_release(rdma: ctxt->sc_rdma, ctxt); |
257 | } |
258 | |
259 | /** |
260 | * svc_rdma_send_ctxt_put - Return send_ctxt to free list |
261 | * @rdma: controlling svcxprt_rdma |
262 | * @ctxt: object to return to the free list |
263 | * |
264 | * Pages left in sc_pages are DMA unmapped and released. |
265 | */ |
266 | void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, |
267 | struct svc_rdma_send_ctxt *ctxt) |
268 | { |
269 | INIT_WORK(&ctxt->sc_work, svc_rdma_send_ctxt_put_async); |
270 | queue_work(wq: svcrdma_wq, work: &ctxt->sc_work); |
271 | } |
272 | |
273 | /** |
274 | * svc_rdma_wake_send_waiters - manage Send Queue accounting |
275 | * @rdma: controlling transport |
276 | * @avail: Number of additional SQEs that are now available |
277 | * |
278 | */ |
279 | void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail) |
280 | { |
281 | atomic_add(i: avail, v: &rdma->sc_sq_avail); |
282 | smp_mb__after_atomic(); |
283 | if (unlikely(waitqueue_active(&rdma->sc_send_wait))) |
284 | wake_up(&rdma->sc_send_wait); |
285 | } |
286 | |
287 | /** |
288 | * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC |
289 | * @cq: Completion Queue context |
290 | * @wc: Work Completion object |
291 | * |
292 | * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that |
293 | * the Send completion handler could be running. |
294 | */ |
295 | static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) |
296 | { |
297 | struct svcxprt_rdma *rdma = cq->cq_context; |
298 | struct ib_cqe *cqe = wc->wr_cqe; |
299 | struct svc_rdma_send_ctxt *ctxt = |
300 | container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); |
301 | |
302 | svc_rdma_wake_send_waiters(rdma, avail: ctxt->sc_sqecount); |
303 | |
304 | if (unlikely(wc->status != IB_WC_SUCCESS)) |
305 | goto flushed; |
306 | |
307 | trace_svcrdma_wc_send(cid: &ctxt->sc_cid); |
308 | svc_rdma_send_ctxt_put(rdma, ctxt); |
309 | return; |
310 | |
311 | flushed: |
312 | if (wc->status != IB_WC_WR_FLUSH_ERR) |
313 | trace_svcrdma_wc_send_err(wc, cid: &ctxt->sc_cid); |
314 | else |
315 | trace_svcrdma_wc_send_flush(wc, cid: &ctxt->sc_cid); |
316 | svc_rdma_send_ctxt_put(rdma, ctxt); |
317 | svc_xprt_deferred_close(xprt: &rdma->sc_xprt); |
318 | } |
319 | |
320 | /** |
321 | * svc_rdma_post_send - Post a WR chain to the Send Queue |
322 | * @rdma: transport context |
323 | * @ctxt: WR chain to post |
324 | * |
325 | * Copy fields in @ctxt to stack variables in order to guarantee |
326 | * that these values remain available after the ib_post_send() call. |
327 | * In some error flow cases, svc_rdma_wc_send() releases @ctxt. |
328 | * |
329 | * Note there is potential for starvation when the Send Queue is |
330 | * full because there is no order to when waiting threads are |
331 | * awoken. The transport is typically provisioned with a deep |
332 | * enough Send Queue that SQ exhaustion should be a rare event. |
333 | * |
334 | * Return values: |
335 | * %0: @ctxt's WR chain was posted successfully |
336 | * %-ENOTCONN: The connection was lost |
337 | */ |
338 | int svc_rdma_post_send(struct svcxprt_rdma *rdma, |
339 | struct svc_rdma_send_ctxt *ctxt) |
340 | { |
341 | struct ib_send_wr *first_wr = ctxt->sc_wr_chain; |
342 | struct ib_send_wr *send_wr = &ctxt->sc_send_wr; |
343 | const struct ib_send_wr *bad_wr = first_wr; |
344 | struct rpc_rdma_cid cid = ctxt->sc_cid; |
345 | int ret, sqecount = ctxt->sc_sqecount; |
346 | |
347 | might_sleep(); |
348 | |
349 | /* Sync the transport header buffer */ |
350 | ib_dma_sync_single_for_device(dev: rdma->sc_pd->device, |
351 | addr: send_wr->sg_list[0].addr, |
352 | size: send_wr->sg_list[0].length, |
353 | dir: DMA_TO_DEVICE); |
354 | |
355 | /* If the SQ is full, wait until an SQ entry is available */ |
356 | while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) { |
357 | if (atomic_sub_return(i: sqecount, v: &rdma->sc_sq_avail) < 0) { |
358 | svc_rdma_wake_send_waiters(rdma, avail: sqecount); |
359 | |
360 | /* When the transport is torn down, assume |
361 | * ib_drain_sq() will trigger enough Send |
362 | * completions to wake us. The XPT_CLOSE test |
363 | * above should then cause the while loop to |
364 | * exit. |
365 | */ |
366 | percpu_counter_inc(fbc: &svcrdma_stat_sq_starve); |
367 | trace_svcrdma_sq_full(rdma, cid: &cid); |
368 | wait_event(rdma->sc_send_wait, |
369 | atomic_read(&rdma->sc_sq_avail) > 0); |
370 | trace_svcrdma_sq_retry(rdma, cid: &cid); |
371 | continue; |
372 | } |
373 | |
374 | trace_svcrdma_post_send(ctxt); |
375 | ret = ib_post_send(qp: rdma->sc_qp, send_wr: first_wr, bad_send_wr: &bad_wr); |
376 | if (ret) { |
377 | trace_svcrdma_sq_post_err(rdma, cid: &cid, status: ret); |
378 | svc_xprt_deferred_close(xprt: &rdma->sc_xprt); |
379 | |
380 | /* If even one WR was posted, there will be a |
381 | * Send completion that bumps sc_sq_avail. |
382 | */ |
383 | if (bad_wr == first_wr) { |
384 | svc_rdma_wake_send_waiters(rdma, avail: sqecount); |
385 | break; |
386 | } |
387 | } |
388 | return 0; |
389 | } |
390 | return -ENOTCONN; |
391 | } |
392 | |
393 | /** |
394 | * svc_rdma_encode_read_list - Encode RPC Reply's Read chunk list |
395 | * @sctxt: Send context for the RPC Reply |
396 | * |
397 | * Return values: |
398 | * On success, returns length in bytes of the Reply XDR buffer |
399 | * that was consumed by the Reply Read list |
400 | * %-EMSGSIZE on XDR buffer overflow |
401 | */ |
402 | static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt) |
403 | { |
404 | /* RPC-over-RDMA version 1 replies never have a Read list. */ |
405 | return xdr_stream_encode_item_absent(xdr: &sctxt->sc_stream); |
406 | } |
407 | |
408 | /** |
409 | * svc_rdma_encode_write_segment - Encode one Write segment |
410 | * @sctxt: Send context for the RPC Reply |
411 | * @chunk: Write chunk to push |
412 | * @remaining: remaining bytes of the payload left in the Write chunk |
413 | * @segno: which segment in the chunk |
414 | * |
415 | * Return values: |
416 | * On success, returns length in bytes of the Reply XDR buffer |
417 | * that was consumed by the Write segment, and updates @remaining |
418 | * %-EMSGSIZE on XDR buffer overflow |
419 | */ |
420 | static ssize_t svc_rdma_encode_write_segment(struct svc_rdma_send_ctxt *sctxt, |
421 | const struct svc_rdma_chunk *chunk, |
422 | u32 *remaining, unsigned int segno) |
423 | { |
424 | const struct svc_rdma_segment *segment = &chunk->ch_segments[segno]; |
425 | const size_t len = rpcrdma_segment_maxsz * sizeof(__be32); |
426 | u32 length; |
427 | __be32 *p; |
428 | |
429 | p = xdr_reserve_space(xdr: &sctxt->sc_stream, nbytes: len); |
430 | if (!p) |
431 | return -EMSGSIZE; |
432 | |
433 | length = min_t(u32, *remaining, segment->rs_length); |
434 | *remaining -= length; |
435 | xdr_encode_rdma_segment(p, handle: segment->rs_handle, length, |
436 | offset: segment->rs_offset); |
437 | trace_svcrdma_encode_wseg(ctxt: sctxt, segno, handle: segment->rs_handle, length, |
438 | offset: segment->rs_offset); |
439 | return len; |
440 | } |
441 | |
442 | /** |
443 | * svc_rdma_encode_write_chunk - Encode one Write chunk |
444 | * @sctxt: Send context for the RPC Reply |
445 | * @chunk: Write chunk to push |
446 | * |
447 | * Copy a Write chunk from the Call transport header to the |
448 | * Reply transport header. Update each segment's length field |
449 | * to reflect the number of bytes written in that segment. |
450 | * |
451 | * Return values: |
452 | * On success, returns length in bytes of the Reply XDR buffer |
453 | * that was consumed by the Write chunk |
454 | * %-EMSGSIZE on XDR buffer overflow |
455 | */ |
456 | static ssize_t svc_rdma_encode_write_chunk(struct svc_rdma_send_ctxt *sctxt, |
457 | const struct svc_rdma_chunk *chunk) |
458 | { |
459 | u32 remaining = chunk->ch_payload_length; |
460 | unsigned int segno; |
461 | ssize_t len, ret; |
462 | |
463 | len = 0; |
464 | ret = xdr_stream_encode_item_present(xdr: &sctxt->sc_stream); |
465 | if (ret < 0) |
466 | return ret; |
467 | len += ret; |
468 | |
469 | ret = xdr_stream_encode_u32(xdr: &sctxt->sc_stream, n: chunk->ch_segcount); |
470 | if (ret < 0) |
471 | return ret; |
472 | len += ret; |
473 | |
474 | for (segno = 0; segno < chunk->ch_segcount; segno++) { |
475 | ret = svc_rdma_encode_write_segment(sctxt, chunk, remaining: &remaining, segno); |
476 | if (ret < 0) |
477 | return ret; |
478 | len += ret; |
479 | } |
480 | |
481 | return len; |
482 | } |
483 | |
484 | /** |
485 | * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list |
486 | * @rctxt: Reply context with information about the RPC Call |
487 | * @sctxt: Send context for the RPC Reply |
488 | * |
489 | * Return values: |
490 | * On success, returns length in bytes of the Reply XDR buffer |
491 | * that was consumed by the Reply's Write list |
492 | * %-EMSGSIZE on XDR buffer overflow |
493 | */ |
494 | static ssize_t svc_rdma_encode_write_list(struct svc_rdma_recv_ctxt *rctxt, |
495 | struct svc_rdma_send_ctxt *sctxt) |
496 | { |
497 | struct svc_rdma_chunk *chunk; |
498 | ssize_t len, ret; |
499 | |
500 | len = 0; |
501 | pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) { |
502 | ret = svc_rdma_encode_write_chunk(sctxt, chunk); |
503 | if (ret < 0) |
504 | return ret; |
505 | len += ret; |
506 | } |
507 | |
508 | /* Terminate the Write list */ |
509 | ret = xdr_stream_encode_item_absent(xdr: &sctxt->sc_stream); |
510 | if (ret < 0) |
511 | return ret; |
512 | |
513 | return len + ret; |
514 | } |
515 | |
516 | /** |
517 | * svc_rdma_encode_reply_chunk - Encode RPC Reply's Reply chunk |
518 | * @rctxt: Reply context with information about the RPC Call |
519 | * @sctxt: Send context for the RPC Reply |
520 | * @length: size in bytes of the payload in the Reply chunk |
521 | * |
522 | * Return values: |
523 | * On success, returns length in bytes of the Reply XDR buffer |
524 | * that was consumed by the Reply's Reply chunk |
525 | * %-EMSGSIZE on XDR buffer overflow |
526 | * %-E2BIG if the RPC message is larger than the Reply chunk |
527 | */ |
528 | static ssize_t |
529 | svc_rdma_encode_reply_chunk(struct svc_rdma_recv_ctxt *rctxt, |
530 | struct svc_rdma_send_ctxt *sctxt, |
531 | unsigned int length) |
532 | { |
533 | struct svc_rdma_chunk *chunk; |
534 | |
535 | if (pcl_is_empty(pcl: &rctxt->rc_reply_pcl)) |
536 | return xdr_stream_encode_item_absent(xdr: &sctxt->sc_stream); |
537 | |
538 | chunk = pcl_first_chunk(pcl: &rctxt->rc_reply_pcl); |
539 | if (length > chunk->ch_length) |
540 | return -E2BIG; |
541 | |
542 | chunk->ch_payload_length = length; |
543 | return svc_rdma_encode_write_chunk(sctxt, chunk); |
544 | } |
545 | |
546 | struct svc_rdma_map_data { |
547 | struct svcxprt_rdma *md_rdma; |
548 | struct svc_rdma_send_ctxt *md_ctxt; |
549 | }; |
550 | |
551 | /** |
552 | * svc_rdma_page_dma_map - DMA map one page |
553 | * @data: pointer to arguments |
554 | * @page: struct page to DMA map |
555 | * @offset: offset into the page |
556 | * @len: number of bytes to map |
557 | * |
558 | * Returns: |
559 | * %0 if DMA mapping was successful |
560 | * %-EIO if the page cannot be DMA mapped |
561 | */ |
562 | static int svc_rdma_page_dma_map(void *data, struct page *page, |
563 | unsigned long offset, unsigned int len) |
564 | { |
565 | struct svc_rdma_map_data *args = data; |
566 | struct svcxprt_rdma *rdma = args->md_rdma; |
567 | struct svc_rdma_send_ctxt *ctxt = args->md_ctxt; |
568 | struct ib_device *dev = rdma->sc_cm_id->device; |
569 | dma_addr_t dma_addr; |
570 | |
571 | ++ctxt->sc_cur_sge_no; |
572 | |
573 | dma_addr = ib_dma_map_page(dev, page, offset, size: len, direction: DMA_TO_DEVICE); |
574 | if (ib_dma_mapping_error(dev, dma_addr)) |
575 | goto out_maperr; |
576 | |
577 | trace_svcrdma_dma_map_page(cid: &ctxt->sc_cid, dma_addr, length: len); |
578 | ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr; |
579 | ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len; |
580 | ctxt->sc_send_wr.num_sge++; |
581 | return 0; |
582 | |
583 | out_maperr: |
584 | trace_svcrdma_dma_map_err(cid: &ctxt->sc_cid, dma_addr, length: len); |
585 | return -EIO; |
586 | } |
587 | |
588 | /** |
589 | * svc_rdma_iov_dma_map - DMA map an iovec |
590 | * @data: pointer to arguments |
591 | * @iov: kvec to DMA map |
592 | * |
593 | * ib_dma_map_page() is used here because svc_rdma_dma_unmap() |
594 | * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively. |
595 | * |
596 | * Returns: |
597 | * %0 if DMA mapping was successful |
598 | * %-EIO if the iovec cannot be DMA mapped |
599 | */ |
600 | static int svc_rdma_iov_dma_map(void *data, const struct kvec *iov) |
601 | { |
602 | if (!iov->iov_len) |
603 | return 0; |
604 | return svc_rdma_page_dma_map(data, virt_to_page(iov->iov_base), |
605 | offset_in_page(iov->iov_base), |
606 | len: iov->iov_len); |
607 | } |
608 | |
609 | /** |
610 | * svc_rdma_xb_dma_map - DMA map all segments of an xdr_buf |
611 | * @xdr: xdr_buf containing portion of an RPC message to transmit |
612 | * @data: pointer to arguments |
613 | * |
614 | * Returns: |
615 | * %0 if DMA mapping was successful |
616 | * %-EIO if DMA mapping failed |
617 | * |
618 | * On failure, any DMA mappings that have been already done must be |
619 | * unmapped by the caller. |
620 | */ |
621 | static int svc_rdma_xb_dma_map(const struct xdr_buf *xdr, void *data) |
622 | { |
623 | unsigned int len, remaining; |
624 | unsigned long pageoff; |
625 | struct page **ppages; |
626 | int ret; |
627 | |
628 | ret = svc_rdma_iov_dma_map(data, iov: &xdr->head[0]); |
629 | if (ret < 0) |
630 | return ret; |
631 | |
632 | ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); |
633 | pageoff = offset_in_page(xdr->page_base); |
634 | remaining = xdr->page_len; |
635 | while (remaining) { |
636 | len = min_t(u32, PAGE_SIZE - pageoff, remaining); |
637 | |
638 | ret = svc_rdma_page_dma_map(data, page: *ppages++, offset: pageoff, len); |
639 | if (ret < 0) |
640 | return ret; |
641 | |
642 | remaining -= len; |
643 | pageoff = 0; |
644 | } |
645 | |
646 | ret = svc_rdma_iov_dma_map(data, iov: &xdr->tail[0]); |
647 | if (ret < 0) |
648 | return ret; |
649 | |
650 | return xdr->len; |
651 | } |
652 | |
653 | struct svc_rdma_pullup_data { |
654 | u8 *pd_dest; |
655 | unsigned int pd_length; |
656 | unsigned int pd_num_sges; |
657 | }; |
658 | |
659 | /** |
660 | * svc_rdma_xb_count_sges - Count how many SGEs will be needed |
661 | * @xdr: xdr_buf containing portion of an RPC message to transmit |
662 | * @data: pointer to arguments |
663 | * |
664 | * Returns: |
665 | * Number of SGEs needed to Send the contents of @xdr inline |
666 | */ |
667 | static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr, |
668 | void *data) |
669 | { |
670 | struct svc_rdma_pullup_data *args = data; |
671 | unsigned int remaining; |
672 | unsigned long offset; |
673 | |
674 | if (xdr->head[0].iov_len) |
675 | ++args->pd_num_sges; |
676 | |
677 | offset = offset_in_page(xdr->page_base); |
678 | remaining = xdr->page_len; |
679 | while (remaining) { |
680 | ++args->pd_num_sges; |
681 | remaining -= min_t(u32, PAGE_SIZE - offset, remaining); |
682 | offset = 0; |
683 | } |
684 | |
685 | if (xdr->tail[0].iov_len) |
686 | ++args->pd_num_sges; |
687 | |
688 | args->pd_length += xdr->len; |
689 | return 0; |
690 | } |
691 | |
692 | /** |
693 | * svc_rdma_pull_up_needed - Determine whether to use pull-up |
694 | * @rdma: controlling transport |
695 | * @sctxt: send_ctxt for the Send WR |
696 | * @write_pcl: Write chunk list provided by client |
697 | * @xdr: xdr_buf containing RPC message to transmit |
698 | * |
699 | * Returns: |
700 | * %true if pull-up must be used |
701 | * %false otherwise |
702 | */ |
703 | static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma, |
704 | const struct svc_rdma_send_ctxt *sctxt, |
705 | const struct svc_rdma_pcl *write_pcl, |
706 | const struct xdr_buf *xdr) |
707 | { |
708 | /* Resources needed for the transport header */ |
709 | struct svc_rdma_pullup_data args = { |
710 | .pd_length = sctxt->sc_hdrbuf.len, |
711 | .pd_num_sges = 1, |
712 | }; |
713 | int ret; |
714 | |
715 | ret = pcl_process_nonpayloads(pcl: write_pcl, xdr, |
716 | actor: svc_rdma_xb_count_sges, data: &args); |
717 | if (ret < 0) |
718 | return false; |
719 | |
720 | if (args.pd_length < RPCRDMA_PULLUP_THRESH) |
721 | return true; |
722 | return args.pd_num_sges >= rdma->sc_max_send_sges; |
723 | } |
724 | |
725 | /** |
726 | * svc_rdma_xb_linearize - Copy region of xdr_buf to flat buffer |
727 | * @xdr: xdr_buf containing portion of an RPC message to copy |
728 | * @data: pointer to arguments |
729 | * |
730 | * Returns: |
731 | * Always zero. |
732 | */ |
733 | static int svc_rdma_xb_linearize(const struct xdr_buf *xdr, |
734 | void *data) |
735 | { |
736 | struct svc_rdma_pullup_data *args = data; |
737 | unsigned int len, remaining; |
738 | unsigned long pageoff; |
739 | struct page **ppages; |
740 | |
741 | if (xdr->head[0].iov_len) { |
742 | memcpy(args->pd_dest, xdr->head[0].iov_base, xdr->head[0].iov_len); |
743 | args->pd_dest += xdr->head[0].iov_len; |
744 | } |
745 | |
746 | ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); |
747 | pageoff = offset_in_page(xdr->page_base); |
748 | remaining = xdr->page_len; |
749 | while (remaining) { |
750 | len = min_t(u32, PAGE_SIZE - pageoff, remaining); |
751 | memcpy(args->pd_dest, page_address(*ppages) + pageoff, len); |
752 | remaining -= len; |
753 | args->pd_dest += len; |
754 | pageoff = 0; |
755 | ppages++; |
756 | } |
757 | |
758 | if (xdr->tail[0].iov_len) { |
759 | memcpy(args->pd_dest, xdr->tail[0].iov_base, xdr->tail[0].iov_len); |
760 | args->pd_dest += xdr->tail[0].iov_len; |
761 | } |
762 | |
763 | args->pd_length += xdr->len; |
764 | return 0; |
765 | } |
766 | |
767 | /** |
768 | * svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer |
769 | * @rdma: controlling transport |
770 | * @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared |
771 | * @write_pcl: Write chunk list provided by client |
772 | * @xdr: prepared xdr_buf containing RPC message |
773 | * |
774 | * The device is not capable of sending the reply directly. |
775 | * Assemble the elements of @xdr into the transport header buffer. |
776 | * |
777 | * Assumptions: |
778 | * pull_up_needed has determined that @xdr will fit in the buffer. |
779 | * |
780 | * Returns: |
781 | * %0 if pull-up was successful |
782 | * %-EMSGSIZE if a buffer manipulation problem occurred |
783 | */ |
784 | static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma, |
785 | struct svc_rdma_send_ctxt *sctxt, |
786 | const struct svc_rdma_pcl *write_pcl, |
787 | const struct xdr_buf *xdr) |
788 | { |
789 | struct svc_rdma_pullup_data args = { |
790 | .pd_dest = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len, |
791 | }; |
792 | int ret; |
793 | |
794 | ret = pcl_process_nonpayloads(pcl: write_pcl, xdr, |
795 | actor: svc_rdma_xb_linearize, data: &args); |
796 | if (ret < 0) |
797 | return ret; |
798 | |
799 | sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len + args.pd_length; |
800 | trace_svcrdma_send_pullup(ctxt: sctxt, msglen: args.pd_length); |
801 | return 0; |
802 | } |
803 | |
804 | /* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message |
805 | * @rdma: controlling transport |
806 | * @sctxt: send_ctxt for the Send WR |
807 | * @write_pcl: Write chunk list provided by client |
808 | * @reply_pcl: Reply chunk provided by client |
809 | * @xdr: prepared xdr_buf containing RPC message |
810 | * |
811 | * Returns: |
812 | * %0 if DMA mapping was successful. |
813 | * %-EMSGSIZE if a buffer manipulation problem occurred |
814 | * %-EIO if DMA mapping failed |
815 | * |
816 | * The Send WR's num_sge field is set in all cases. |
817 | */ |
818 | int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, |
819 | struct svc_rdma_send_ctxt *sctxt, |
820 | const struct svc_rdma_pcl *write_pcl, |
821 | const struct svc_rdma_pcl *reply_pcl, |
822 | const struct xdr_buf *xdr) |
823 | { |
824 | struct svc_rdma_map_data args = { |
825 | .md_rdma = rdma, |
826 | .md_ctxt = sctxt, |
827 | }; |
828 | |
829 | /* Set up the (persistently-mapped) transport header SGE. */ |
830 | sctxt->sc_send_wr.num_sge = 1; |
831 | sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; |
832 | |
833 | /* If there is a Reply chunk, nothing follows the transport |
834 | * header, so there is nothing to map. |
835 | */ |
836 | if (!pcl_is_empty(pcl: reply_pcl)) |
837 | return 0; |
838 | |
839 | /* For pull-up, svc_rdma_send() will sync the transport header. |
840 | * No additional DMA mapping is necessary. |
841 | */ |
842 | if (svc_rdma_pull_up_needed(rdma, sctxt, write_pcl, xdr)) |
843 | return svc_rdma_pull_up_reply_msg(rdma, sctxt, write_pcl, xdr); |
844 | |
845 | return pcl_process_nonpayloads(pcl: write_pcl, xdr, |
846 | actor: svc_rdma_xb_dma_map, data: &args); |
847 | } |
848 | |
849 | /* The svc_rqst and all resources it owns are released as soon as |
850 | * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt |
851 | * so they are released by the Send completion handler. |
852 | */ |
853 | static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, |
854 | struct svc_rdma_send_ctxt *ctxt) |
855 | { |
856 | int i, pages = rqstp->rq_next_page - rqstp->rq_respages; |
857 | |
858 | ctxt->sc_page_count += pages; |
859 | for (i = 0; i < pages; i++) { |
860 | ctxt->sc_pages[i] = rqstp->rq_respages[i]; |
861 | rqstp->rq_respages[i] = NULL; |
862 | } |
863 | |
864 | /* Prevent svc_xprt_release from releasing pages in rq_pages */ |
865 | rqstp->rq_next_page = rqstp->rq_respages; |
866 | } |
867 | |
868 | /* Prepare the portion of the RPC Reply that will be transmitted |
869 | * via RDMA Send. The RPC-over-RDMA transport header is prepared |
870 | * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. |
871 | * |
872 | * Depending on whether a Write list or Reply chunk is present, |
873 | * the server may Send all, a portion of, or none of the xdr_buf. |
874 | * In the latter case, only the transport header (sc_sges[0]) is |
875 | * transmitted. |
876 | * |
877 | * Assumptions: |
878 | * - The Reply's transport header will never be larger than a page. |
879 | */ |
880 | static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, |
881 | struct svc_rdma_send_ctxt *sctxt, |
882 | const struct svc_rdma_recv_ctxt *rctxt, |
883 | struct svc_rqst *rqstp) |
884 | { |
885 | struct ib_send_wr *send_wr = &sctxt->sc_send_wr; |
886 | int ret; |
887 | |
888 | ret = svc_rdma_map_reply_msg(rdma, sctxt, write_pcl: &rctxt->rc_write_pcl, |
889 | reply_pcl: &rctxt->rc_reply_pcl, xdr: &rqstp->rq_res); |
890 | if (ret < 0) |
891 | return ret; |
892 | |
893 | /* Transfer pages involved in RDMA Writes to the sctxt's |
894 | * page array. Completion handling releases these pages. |
895 | */ |
896 | svc_rdma_save_io_pages(rqstp, ctxt: sctxt); |
897 | |
898 | if (rctxt->rc_inv_rkey) { |
899 | send_wr->opcode = IB_WR_SEND_WITH_INV; |
900 | send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey; |
901 | } else { |
902 | send_wr->opcode = IB_WR_SEND; |
903 | } |
904 | |
905 | return svc_rdma_post_send(rdma, ctxt: sctxt); |
906 | } |
907 | |
908 | /** |
909 | * svc_rdma_send_error_msg - Send an RPC/RDMA v1 error response |
910 | * @rdma: controlling transport context |
911 | * @sctxt: Send context for the response |
912 | * @rctxt: Receive context for incoming bad message |
913 | * @status: negative errno indicating error that occurred |
914 | * |
915 | * Given the client-provided Read, Write, and Reply chunks, the |
916 | * server was not able to parse the Call or form a complete Reply. |
917 | * Return an RDMA_ERROR message so the client can retire the RPC |
918 | * transaction. |
919 | * |
920 | * The caller does not have to release @sctxt. It is released by |
921 | * Send completion, or by this function on error. |
922 | */ |
923 | void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, |
924 | struct svc_rdma_send_ctxt *sctxt, |
925 | struct svc_rdma_recv_ctxt *rctxt, |
926 | int status) |
927 | { |
928 | __be32 *rdma_argp = rctxt->rc_recv_buf; |
929 | __be32 *p; |
930 | |
931 | rpcrdma_set_xdrlen(xdr: &sctxt->sc_hdrbuf, len: 0); |
932 | xdr_init_encode(xdr: &sctxt->sc_stream, buf: &sctxt->sc_hdrbuf, |
933 | p: sctxt->sc_xprt_buf, NULL); |
934 | |
935 | p = xdr_reserve_space(xdr: &sctxt->sc_stream, |
936 | nbytes: rpcrdma_fixed_maxsz * sizeof(*p)); |
937 | if (!p) |
938 | goto put_ctxt; |
939 | |
940 | *p++ = *rdma_argp; |
941 | *p++ = *(rdma_argp + 1); |
942 | *p++ = rdma->sc_fc_credits; |
943 | *p = rdma_error; |
944 | |
945 | switch (status) { |
946 | case -EPROTONOSUPPORT: |
947 | p = xdr_reserve_space(xdr: &sctxt->sc_stream, nbytes: 3 * sizeof(*p)); |
948 | if (!p) |
949 | goto put_ctxt; |
950 | |
951 | *p++ = err_vers; |
952 | *p++ = rpcrdma_version; |
953 | *p = rpcrdma_version; |
954 | trace_svcrdma_err_vers(xid: *rdma_argp); |
955 | break; |
956 | default: |
957 | p = xdr_reserve_space(xdr: &sctxt->sc_stream, nbytes: sizeof(*p)); |
958 | if (!p) |
959 | goto put_ctxt; |
960 | |
961 | *p = err_chunk; |
962 | trace_svcrdma_err_chunk(xid: *rdma_argp); |
963 | } |
964 | |
965 | /* Remote Invalidation is skipped for simplicity. */ |
966 | sctxt->sc_send_wr.num_sge = 1; |
967 | sctxt->sc_send_wr.opcode = IB_WR_SEND; |
968 | sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; |
969 | if (svc_rdma_post_send(rdma, ctxt: sctxt)) |
970 | goto put_ctxt; |
971 | return; |
972 | |
973 | put_ctxt: |
974 | svc_rdma_send_ctxt_put(rdma, ctxt: sctxt); |
975 | } |
976 | |
977 | /** |
978 | * svc_rdma_sendto - Transmit an RPC reply |
979 | * @rqstp: processed RPC request, reply XDR already in ::rq_res |
980 | * |
981 | * Any resources still associated with @rqstp are released upon return. |
982 | * If no reply message was possible, the connection is closed. |
983 | * |
984 | * Returns: |
985 | * %0 if an RPC reply has been successfully posted, |
986 | * %-ENOMEM if a resource shortage occurred (connection is lost), |
987 | * %-ENOTCONN if posting failed (connection is lost). |
988 | */ |
989 | int svc_rdma_sendto(struct svc_rqst *rqstp) |
990 | { |
991 | struct svc_xprt *xprt = rqstp->rq_xprt; |
992 | struct svcxprt_rdma *rdma = |
993 | container_of(xprt, struct svcxprt_rdma, sc_xprt); |
994 | struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; |
995 | __be32 *rdma_argp = rctxt->rc_recv_buf; |
996 | struct svc_rdma_send_ctxt *sctxt; |
997 | unsigned int rc_size; |
998 | __be32 *p; |
999 | int ret; |
1000 | |
1001 | ret = -ENOTCONN; |
1002 | if (svc_xprt_is_dead(xprt)) |
1003 | goto drop_connection; |
1004 | |
1005 | ret = -ENOMEM; |
1006 | sctxt = svc_rdma_send_ctxt_get(rdma); |
1007 | if (!sctxt) |
1008 | goto drop_connection; |
1009 | |
1010 | ret = -EMSGSIZE; |
1011 | p = xdr_reserve_space(xdr: &sctxt->sc_stream, |
1012 | nbytes: rpcrdma_fixed_maxsz * sizeof(*p)); |
1013 | if (!p) |
1014 | goto put_ctxt; |
1015 | |
1016 | ret = svc_rdma_send_write_list(rdma, rctxt, xdr: &rqstp->rq_res); |
1017 | if (ret < 0) |
1018 | goto put_ctxt; |
1019 | |
1020 | rc_size = 0; |
1021 | if (!pcl_is_empty(pcl: &rctxt->rc_reply_pcl)) { |
1022 | ret = svc_rdma_prepare_reply_chunk(rdma, write_pcl: &rctxt->rc_write_pcl, |
1023 | reply_pcl: &rctxt->rc_reply_pcl, sctxt, |
1024 | xdr: &rqstp->rq_res); |
1025 | if (ret < 0) |
1026 | goto reply_chunk; |
1027 | rc_size = ret; |
1028 | } |
1029 | |
1030 | *p++ = *rdma_argp; |
1031 | *p++ = *(rdma_argp + 1); |
1032 | *p++ = rdma->sc_fc_credits; |
1033 | *p = pcl_is_empty(pcl: &rctxt->rc_reply_pcl) ? rdma_msg : rdma_nomsg; |
1034 | |
1035 | ret = svc_rdma_encode_read_list(sctxt); |
1036 | if (ret < 0) |
1037 | goto put_ctxt; |
1038 | ret = svc_rdma_encode_write_list(rctxt, sctxt); |
1039 | if (ret < 0) |
1040 | goto put_ctxt; |
1041 | ret = svc_rdma_encode_reply_chunk(rctxt, sctxt, length: rc_size); |
1042 | if (ret < 0) |
1043 | goto put_ctxt; |
1044 | |
1045 | ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); |
1046 | if (ret < 0) |
1047 | goto put_ctxt; |
1048 | return 0; |
1049 | |
1050 | reply_chunk: |
1051 | if (ret != -E2BIG && ret != -EINVAL) |
1052 | goto put_ctxt; |
1053 | |
1054 | /* Send completion releases payload pages that were part |
1055 | * of previously posted RDMA Writes. |
1056 | */ |
1057 | svc_rdma_save_io_pages(rqstp, ctxt: sctxt); |
1058 | svc_rdma_send_error_msg(rdma, sctxt, rctxt, status: ret); |
1059 | return 0; |
1060 | |
1061 | put_ctxt: |
1062 | svc_rdma_send_ctxt_put(rdma, ctxt: sctxt); |
1063 | drop_connection: |
1064 | trace_svcrdma_send_err(rqst: rqstp, status: ret); |
1065 | svc_xprt_deferred_close(xprt: &rdma->sc_xprt); |
1066 | return -ENOTCONN; |
1067 | } |
1068 | |
1069 | /** |
1070 | * svc_rdma_result_payload - special processing for a result payload |
1071 | * @rqstp: RPC transaction context |
1072 | * @offset: payload's byte offset in @rqstp->rq_res |
1073 | * @length: size of payload, in bytes |
1074 | * |
1075 | * Assign the passed-in result payload to the current Write chunk, |
1076 | * and advance to cur_result_payload to the next Write chunk, if |
1077 | * there is one. |
1078 | * |
1079 | * Return values: |
1080 | * %0 if successful or nothing needed to be done |
1081 | * %-E2BIG if the payload was larger than the Write chunk |
1082 | */ |
1083 | int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, |
1084 | unsigned int length) |
1085 | { |
1086 | struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; |
1087 | struct svc_rdma_chunk *chunk; |
1088 | |
1089 | chunk = rctxt->rc_cur_result_payload; |
1090 | if (!length || !chunk) |
1091 | return 0; |
1092 | rctxt->rc_cur_result_payload = |
1093 | pcl_next_chunk(pcl: &rctxt->rc_write_pcl, chunk); |
1094 | |
1095 | if (length > chunk->ch_length) |
1096 | return -E2BIG; |
1097 | chunk->ch_position = offset; |
1098 | chunk->ch_payload_length = length; |
1099 | return 0; |
1100 | } |
1101 | |