1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* RxRPC recvmsg() implementation |
3 | * |
4 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | |
10 | #include <linux/net.h> |
11 | #include <linux/skbuff.h> |
12 | #include <linux/export.h> |
13 | #include <linux/sched/signal.h> |
14 | |
15 | #include <net/sock.h> |
16 | #include <net/af_rxrpc.h> |
17 | #include "ar-internal.h" |
18 | |
19 | /* |
20 | * Post a call for attention by the socket or kernel service. Further |
21 | * notifications are suppressed by putting recvmsg_link on a dummy queue. |
22 | */ |
23 | void rxrpc_notify_socket(struct rxrpc_call *call) |
24 | { |
25 | struct rxrpc_sock *rx; |
26 | struct sock *sk; |
27 | |
28 | _enter("%d" , call->debug_id); |
29 | |
30 | if (!list_empty(head: &call->recvmsg_link)) |
31 | return; |
32 | |
33 | rcu_read_lock(); |
34 | |
35 | rx = rcu_dereference(call->socket); |
36 | sk = &rx->sk; |
37 | if (rx && sk->sk_state < RXRPC_CLOSE) { |
38 | if (call->notify_rx) { |
39 | spin_lock(lock: &call->notify_lock); |
40 | call->notify_rx(sk, call, call->user_call_ID); |
41 | spin_unlock(lock: &call->notify_lock); |
42 | } else { |
43 | spin_lock(lock: &rx->recvmsg_lock); |
44 | if (list_empty(head: &call->recvmsg_link)) { |
45 | rxrpc_get_call(call, rxrpc_call_get_notify_socket); |
46 | list_add_tail(new: &call->recvmsg_link, head: &rx->recvmsg_q); |
47 | } |
48 | spin_unlock(lock: &rx->recvmsg_lock); |
49 | |
50 | if (!sock_flag(sk, flag: SOCK_DEAD)) { |
51 | _debug("call %ps" , sk->sk_data_ready); |
52 | sk->sk_data_ready(sk); |
53 | } |
54 | } |
55 | } |
56 | |
57 | rcu_read_unlock(); |
58 | _leave("" ); |
59 | } |
60 | |
61 | /* |
62 | * Pass a call terminating message to userspace. |
63 | */ |
64 | static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) |
65 | { |
66 | u32 tmp = 0; |
67 | int ret; |
68 | |
69 | switch (call->completion) { |
70 | case RXRPC_CALL_SUCCEEDED: |
71 | ret = 0; |
72 | if (rxrpc_is_service_call(call)) |
73 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_ACK, len: 0, data: &tmp); |
74 | break; |
75 | case RXRPC_CALL_REMOTELY_ABORTED: |
76 | tmp = call->abort_code; |
77 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_ABORT, len: 4, data: &tmp); |
78 | break; |
79 | case RXRPC_CALL_LOCALLY_ABORTED: |
80 | tmp = call->abort_code; |
81 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_ABORT, len: 4, data: &tmp); |
82 | break; |
83 | case RXRPC_CALL_NETWORK_ERROR: |
84 | tmp = -call->error; |
85 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_NET_ERROR, len: 4, data: &tmp); |
86 | break; |
87 | case RXRPC_CALL_LOCAL_ERROR: |
88 | tmp = -call->error; |
89 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_LOCAL_ERROR, len: 4, data: &tmp); |
90 | break; |
91 | default: |
92 | pr_err("Invalid terminal call state %u\n" , call->completion); |
93 | BUG(); |
94 | break; |
95 | } |
96 | |
97 | trace_rxrpc_recvdata(call, why: rxrpc_recvmsg_terminal, |
98 | seq: call->ackr_window - 1, |
99 | offset: call->rx_pkt_offset, len: call->rx_pkt_len, ret); |
100 | return ret; |
101 | } |
102 | |
103 | /* |
104 | * Discard a packet we've used up and advance the Rx window by one. |
105 | */ |
106 | static void rxrpc_rotate_rx_window(struct rxrpc_call *call) |
107 | { |
108 | struct rxrpc_skb_priv *sp; |
109 | struct sk_buff *skb; |
110 | rxrpc_serial_t serial; |
111 | rxrpc_seq_t old_consumed = call->rx_consumed, tseq; |
112 | bool last; |
113 | int acked; |
114 | |
115 | _enter("%d" , call->debug_id); |
116 | |
117 | skb = skb_dequeue(list: &call->recvmsg_queue); |
118 | rxrpc_see_skb(skb, rxrpc_skb_see_rotate); |
119 | |
120 | sp = rxrpc_skb(skb); |
121 | tseq = sp->hdr.seq; |
122 | serial = sp->hdr.serial; |
123 | last = sp->hdr.flags & RXRPC_LAST_PACKET; |
124 | |
125 | /* Barrier against rxrpc_input_data(). */ |
126 | if (after(seq1: tseq, seq2: call->rx_consumed)) |
127 | smp_store_release(&call->rx_consumed, tseq); |
128 | |
129 | rxrpc_free_skb(skb, rxrpc_skb_put_rotate); |
130 | |
131 | trace_rxrpc_receive(call, why: last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, |
132 | serial, seq: call->rx_consumed); |
133 | |
134 | if (last) |
135 | set_bit(nr: RXRPC_CALL_RECVMSG_READ_ALL, addr: &call->flags); |
136 | |
137 | /* Check to see if there's an ACK that needs sending. */ |
138 | acked = atomic_add_return(i: call->rx_consumed - old_consumed, |
139 | v: &call->ackr_nr_consumed); |
140 | if (acked > 8 && |
141 | !test_and_set_bit(nr: RXRPC_CALL_RX_IS_IDLE, addr: &call->flags)) |
142 | rxrpc_poke_call(call, what: rxrpc_call_poke_idle); |
143 | } |
144 | |
145 | /* |
146 | * Decrypt and verify a DATA packet. |
147 | */ |
148 | static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) |
149 | { |
150 | struct rxrpc_skb_priv *sp = rxrpc_skb(skb); |
151 | |
152 | if (sp->flags & RXRPC_RX_VERIFIED) |
153 | return 0; |
154 | return call->security->verify_packet(call, skb); |
155 | } |
156 | |
157 | /* |
158 | * Deliver messages to a call. This keeps processing packets until the buffer |
159 | * is filled and we find either more DATA (returns 0) or the end of the DATA |
160 | * (returns 1). If more packets are required, it returns -EAGAIN and if the |
161 | * call has failed it returns -EIO. |
162 | */ |
163 | static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, |
164 | struct msghdr *msg, struct iov_iter *iter, |
165 | size_t len, int flags, size_t *_offset) |
166 | { |
167 | struct rxrpc_skb_priv *sp; |
168 | struct sk_buff *skb; |
169 | rxrpc_seq_t seq = 0; |
170 | size_t remain; |
171 | unsigned int rx_pkt_offset, rx_pkt_len; |
172 | int copy, ret = -EAGAIN, ret2; |
173 | |
174 | rx_pkt_offset = call->rx_pkt_offset; |
175 | rx_pkt_len = call->rx_pkt_len; |
176 | |
177 | if (rxrpc_call_has_failed(call)) { |
178 | seq = call->ackr_window - 1; |
179 | ret = -EIO; |
180 | goto done; |
181 | } |
182 | |
183 | if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) { |
184 | seq = call->ackr_window - 1; |
185 | ret = 1; |
186 | goto done; |
187 | } |
188 | |
189 | /* No one else can be removing stuff from the queue, so we shouldn't |
190 | * need the Rx lock to walk it. |
191 | */ |
192 | skb = skb_peek(list_: &call->recvmsg_queue); |
193 | while (skb) { |
194 | rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg); |
195 | sp = rxrpc_skb(skb); |
196 | seq = sp->hdr.seq; |
197 | |
198 | if (!(flags & MSG_PEEK)) |
199 | trace_rxrpc_receive(call, why: rxrpc_receive_front, |
200 | serial: sp->hdr.serial, seq); |
201 | |
202 | if (msg) |
203 | sock_recv_timestamp(msg, sk: sock->sk, skb); |
204 | |
205 | if (rx_pkt_offset == 0) { |
206 | ret2 = rxrpc_verify_data(call, skb); |
207 | trace_rxrpc_recvdata(call, why: rxrpc_recvmsg_next, seq, |
208 | offset: sp->offset, len: sp->len, ret: ret2); |
209 | if (ret2 < 0) { |
210 | kdebug("verify = %d" , ret2); |
211 | ret = ret2; |
212 | goto out; |
213 | } |
214 | rx_pkt_offset = sp->offset; |
215 | rx_pkt_len = sp->len; |
216 | } else { |
217 | trace_rxrpc_recvdata(call, why: rxrpc_recvmsg_cont, seq, |
218 | offset: rx_pkt_offset, len: rx_pkt_len, ret: 0); |
219 | } |
220 | |
221 | /* We have to handle short, empty and used-up DATA packets. */ |
222 | remain = len - *_offset; |
223 | copy = rx_pkt_len; |
224 | if (copy > remain) |
225 | copy = remain; |
226 | if (copy > 0) { |
227 | ret2 = skb_copy_datagram_iter(from: skb, offset: rx_pkt_offset, to: iter, |
228 | size: copy); |
229 | if (ret2 < 0) { |
230 | ret = ret2; |
231 | goto out; |
232 | } |
233 | |
234 | /* handle piecemeal consumption of data packets */ |
235 | rx_pkt_offset += copy; |
236 | rx_pkt_len -= copy; |
237 | *_offset += copy; |
238 | } |
239 | |
240 | if (rx_pkt_len > 0) { |
241 | trace_rxrpc_recvdata(call, why: rxrpc_recvmsg_full, seq, |
242 | offset: rx_pkt_offset, len: rx_pkt_len, ret: 0); |
243 | ASSERTCMP(*_offset, ==, len); |
244 | ret = 0; |
245 | break; |
246 | } |
247 | |
248 | /* The whole packet has been transferred. */ |
249 | if (sp->hdr.flags & RXRPC_LAST_PACKET) |
250 | ret = 1; |
251 | rx_pkt_offset = 0; |
252 | rx_pkt_len = 0; |
253 | |
254 | skb = skb_peek_next(skb, list_: &call->recvmsg_queue); |
255 | |
256 | if (!(flags & MSG_PEEK)) |
257 | rxrpc_rotate_rx_window(call); |
258 | } |
259 | |
260 | out: |
261 | if (!(flags & MSG_PEEK)) { |
262 | call->rx_pkt_offset = rx_pkt_offset; |
263 | call->rx_pkt_len = rx_pkt_len; |
264 | } |
265 | done: |
266 | trace_rxrpc_recvdata(call, why: rxrpc_recvmsg_data_return, seq, |
267 | offset: rx_pkt_offset, len: rx_pkt_len, ret); |
268 | if (ret == -EAGAIN) |
269 | set_bit(nr: RXRPC_CALL_RX_IS_IDLE, addr: &call->flags); |
270 | return ret; |
271 | } |
272 | |
273 | /* |
274 | * Receive a message from an RxRPC socket |
275 | * - we need to be careful about two or more threads calling recvmsg |
276 | * simultaneously |
277 | */ |
278 | int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, |
279 | int flags) |
280 | { |
281 | struct rxrpc_call *call; |
282 | struct rxrpc_sock *rx = rxrpc_sk(sock->sk); |
283 | struct list_head *l; |
284 | unsigned int call_debug_id = 0; |
285 | size_t copied = 0; |
286 | long timeo; |
287 | int ret; |
288 | |
289 | DEFINE_WAIT(wait); |
290 | |
291 | trace_rxrpc_recvmsg(call_debug_id: 0, why: rxrpc_recvmsg_enter, ret: 0); |
292 | |
293 | if (flags & (MSG_OOB | MSG_TRUNC)) |
294 | return -EOPNOTSUPP; |
295 | |
296 | timeo = sock_rcvtimeo(sk: &rx->sk, noblock: flags & MSG_DONTWAIT); |
297 | |
298 | try_again: |
299 | lock_sock(sk: &rx->sk); |
300 | |
301 | /* Return immediately if a client socket has no outstanding calls */ |
302 | if (RB_EMPTY_ROOT(&rx->calls) && |
303 | list_empty(head: &rx->recvmsg_q) && |
304 | rx->sk.sk_state != RXRPC_SERVER_LISTENING) { |
305 | release_sock(sk: &rx->sk); |
306 | return -EAGAIN; |
307 | } |
308 | |
309 | if (list_empty(head: &rx->recvmsg_q)) { |
310 | ret = -EWOULDBLOCK; |
311 | if (timeo == 0) { |
312 | call = NULL; |
313 | goto error_no_call; |
314 | } |
315 | |
316 | release_sock(sk: &rx->sk); |
317 | |
318 | /* Wait for something to happen */ |
319 | prepare_to_wait_exclusive(wq_head: sk_sleep(sk: &rx->sk), wq_entry: &wait, |
320 | TASK_INTERRUPTIBLE); |
321 | ret = sock_error(sk: &rx->sk); |
322 | if (ret) |
323 | goto wait_error; |
324 | |
325 | if (list_empty(head: &rx->recvmsg_q)) { |
326 | if (signal_pending(current)) |
327 | goto wait_interrupted; |
328 | trace_rxrpc_recvmsg(call_debug_id: 0, why: rxrpc_recvmsg_wait, ret: 0); |
329 | timeo = schedule_timeout(timeout: timeo); |
330 | } |
331 | finish_wait(wq_head: sk_sleep(sk: &rx->sk), wq_entry: &wait); |
332 | goto try_again; |
333 | } |
334 | |
335 | /* Find the next call and dequeue it if we're not just peeking. If we |
336 | * do dequeue it, that comes with a ref that we will need to release. |
337 | * We also want to weed out calls that got requeued whilst we were |
338 | * shovelling data out. |
339 | */ |
340 | spin_lock(lock: &rx->recvmsg_lock); |
341 | l = rx->recvmsg_q.next; |
342 | call = list_entry(l, struct rxrpc_call, recvmsg_link); |
343 | |
344 | if (!rxrpc_call_is_complete(call) && |
345 | skb_queue_empty(list: &call->recvmsg_queue)) { |
346 | list_del_init(entry: &call->recvmsg_link); |
347 | spin_unlock(lock: &rx->recvmsg_lock); |
348 | release_sock(sk: &rx->sk); |
349 | trace_rxrpc_recvmsg(call_debug_id: call->debug_id, why: rxrpc_recvmsg_unqueue, ret: 0); |
350 | rxrpc_put_call(call, rxrpc_call_put_recvmsg); |
351 | goto try_again; |
352 | } |
353 | |
354 | if (!(flags & MSG_PEEK)) |
355 | list_del_init(entry: &call->recvmsg_link); |
356 | else |
357 | rxrpc_get_call(call, rxrpc_call_get_recvmsg); |
358 | spin_unlock(lock: &rx->recvmsg_lock); |
359 | |
360 | call_debug_id = call->debug_id; |
361 | trace_rxrpc_recvmsg(call_debug_id, why: rxrpc_recvmsg_dequeue, ret: 0); |
362 | |
363 | /* We're going to drop the socket lock, so we need to lock the call |
364 | * against interference by sendmsg. |
365 | */ |
366 | if (!mutex_trylock(lock: &call->user_mutex)) { |
367 | ret = -EWOULDBLOCK; |
368 | if (flags & MSG_DONTWAIT) |
369 | goto error_requeue_call; |
370 | ret = -ERESTARTSYS; |
371 | if (mutex_lock_interruptible(&call->user_mutex) < 0) |
372 | goto error_requeue_call; |
373 | } |
374 | |
375 | release_sock(sk: &rx->sk); |
376 | |
377 | if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) |
378 | BUG(); |
379 | |
380 | if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { |
381 | if (flags & MSG_CMSG_COMPAT) { |
382 | unsigned int id32 = call->user_call_ID; |
383 | |
384 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_USER_CALL_ID, |
385 | len: sizeof(unsigned int), data: &id32); |
386 | } else { |
387 | unsigned long idl = call->user_call_ID; |
388 | |
389 | ret = put_cmsg(msg, SOL_RXRPC, type: RXRPC_USER_CALL_ID, |
390 | len: sizeof(unsigned long), data: &idl); |
391 | } |
392 | if (ret < 0) |
393 | goto error_unlock_call; |
394 | } |
395 | |
396 | if (msg->msg_name && call->peer) { |
397 | size_t len = sizeof(call->dest_srx); |
398 | |
399 | memcpy(msg->msg_name, &call->dest_srx, len); |
400 | msg->msg_namelen = len; |
401 | } |
402 | |
403 | ret = rxrpc_recvmsg_data(sock, call, msg, iter: &msg->msg_iter, len, |
404 | flags, offset: &copied); |
405 | if (ret == -EAGAIN) |
406 | ret = 0; |
407 | if (ret == -EIO) |
408 | goto call_failed; |
409 | if (ret < 0) |
410 | goto error_unlock_call; |
411 | |
412 | if (rxrpc_call_is_complete(call) && |
413 | skb_queue_empty(list: &call->recvmsg_queue)) |
414 | goto call_complete; |
415 | if (rxrpc_call_has_failed(call)) |
416 | goto call_failed; |
417 | |
418 | if (!skb_queue_empty(list: &call->recvmsg_queue)) |
419 | rxrpc_notify_socket(call); |
420 | goto not_yet_complete; |
421 | |
422 | call_failed: |
423 | rxrpc_purge_queue(&call->recvmsg_queue); |
424 | call_complete: |
425 | ret = rxrpc_recvmsg_term(call, msg); |
426 | if (ret < 0) |
427 | goto error_unlock_call; |
428 | if (!(flags & MSG_PEEK)) |
429 | rxrpc_release_call(rx, call); |
430 | msg->msg_flags |= MSG_EOR; |
431 | ret = 1; |
432 | |
433 | not_yet_complete: |
434 | if (ret == 0) |
435 | msg->msg_flags |= MSG_MORE; |
436 | else |
437 | msg->msg_flags &= ~MSG_MORE; |
438 | ret = copied; |
439 | |
440 | error_unlock_call: |
441 | mutex_unlock(lock: &call->user_mutex); |
442 | rxrpc_put_call(call, rxrpc_call_put_recvmsg); |
443 | trace_rxrpc_recvmsg(call_debug_id, why: rxrpc_recvmsg_return, ret); |
444 | return ret; |
445 | |
446 | error_requeue_call: |
447 | if (!(flags & MSG_PEEK)) { |
448 | spin_lock(lock: &rx->recvmsg_lock); |
449 | list_add(new: &call->recvmsg_link, head: &rx->recvmsg_q); |
450 | spin_unlock(lock: &rx->recvmsg_lock); |
451 | trace_rxrpc_recvmsg(call_debug_id, why: rxrpc_recvmsg_requeue, ret: 0); |
452 | } else { |
453 | rxrpc_put_call(call, rxrpc_call_put_recvmsg); |
454 | } |
455 | error_no_call: |
456 | release_sock(sk: &rx->sk); |
457 | error_trace: |
458 | trace_rxrpc_recvmsg(call_debug_id, why: rxrpc_recvmsg_return, ret); |
459 | return ret; |
460 | |
461 | wait_interrupted: |
462 | ret = sock_intr_errno(timeo); |
463 | wait_error: |
464 | finish_wait(wq_head: sk_sleep(sk: &rx->sk), wq_entry: &wait); |
465 | call = NULL; |
466 | goto error_trace; |
467 | } |
468 | |
469 | /** |
470 | * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info |
471 | * @sock: The socket that the call exists on |
472 | * @call: The call to send data through |
473 | * @iter: The buffer to receive into |
474 | * @_len: The amount of data we want to receive (decreased on return) |
475 | * @want_more: True if more data is expected to be read |
476 | * @_abort: Where the abort code is stored if -ECONNABORTED is returned |
477 | * @_service: Where to store the actual service ID (may be upgraded) |
478 | * |
479 | * Allow a kernel service to receive data and pick up information about the |
480 | * state of a call. Returns 0 if got what was asked for and there's more |
481 | * available, 1 if we got what was asked for and we're at the end of the data |
482 | * and -EAGAIN if we need more data. |
483 | * |
484 | * Note that we may return -EAGAIN to drain empty packets at the end of the |
485 | * data, even if we've already copied over the requested data. |
486 | * |
487 | * *_abort should also be initialised to 0. |
488 | */ |
489 | int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, |
490 | struct iov_iter *iter, size_t *_len, |
491 | bool want_more, u32 *_abort, u16 *_service) |
492 | { |
493 | size_t offset = 0; |
494 | int ret; |
495 | |
496 | _enter("{%d},%zu,%d" , call->debug_id, *_len, want_more); |
497 | |
498 | mutex_lock(&call->user_mutex); |
499 | |
500 | ret = rxrpc_recvmsg_data(sock, call, NULL, iter, len: *_len, flags: 0, offset: &offset); |
501 | *_len -= offset; |
502 | if (ret == -EIO) |
503 | goto call_failed; |
504 | if (ret < 0) |
505 | goto out; |
506 | |
507 | /* We can only reach here with a partially full buffer if we have |
508 | * reached the end of the data. We must otherwise have a full buffer |
509 | * or have been given -EAGAIN. |
510 | */ |
511 | if (ret == 1) { |
512 | if (iov_iter_count(i: iter) > 0) |
513 | goto short_data; |
514 | if (!want_more) |
515 | goto read_phase_complete; |
516 | ret = 0; |
517 | goto out; |
518 | } |
519 | |
520 | if (!want_more) |
521 | goto excess_data; |
522 | goto out; |
523 | |
524 | read_phase_complete: |
525 | ret = 1; |
526 | out: |
527 | if (_service) |
528 | *_service = call->dest_srx.srx_service; |
529 | mutex_unlock(lock: &call->user_mutex); |
530 | _leave(" = %d [%zu,%d]" , ret, iov_iter_count(iter), *_abort); |
531 | return ret; |
532 | |
533 | short_data: |
534 | trace_rxrpc_abort(call_nr: call->debug_id, why: rxrpc_recvmsg_short_data, |
535 | cid: call->cid, call_id: call->call_id, seq: call->rx_consumed, |
536 | abort_code: 0, error: -EBADMSG); |
537 | ret = -EBADMSG; |
538 | goto out; |
539 | excess_data: |
540 | trace_rxrpc_abort(call_nr: call->debug_id, why: rxrpc_recvmsg_excess_data, |
541 | cid: call->cid, call_id: call->call_id, seq: call->rx_consumed, |
542 | abort_code: 0, error: -EMSGSIZE); |
543 | ret = -EMSGSIZE; |
544 | goto out; |
545 | call_failed: |
546 | *_abort = call->abort_code; |
547 | ret = call->error; |
548 | if (call->completion == RXRPC_CALL_SUCCEEDED) { |
549 | ret = 1; |
550 | if (iov_iter_count(i: iter) > 0) |
551 | ret = -ECONNRESET; |
552 | } |
553 | goto out; |
554 | } |
555 | EXPORT_SYMBOL(rxrpc_kernel_recv_data); |
556 | |