1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ |
3 | |
4 | #include <linux/skmsg.h> |
5 | #include <linux/skbuff.h> |
6 | #include <linux/scatterlist.h> |
7 | |
8 | #include <net/sock.h> |
9 | #include <net/tcp.h> |
10 | #include <net/tls.h> |
11 | #include <trace/events/sock.h> |
12 | |
13 | static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) |
14 | { |
15 | if (msg->sg.end > msg->sg.start && |
16 | elem_first_coalesce < msg->sg.end) |
17 | return true; |
18 | |
19 | if (msg->sg.end < msg->sg.start && |
20 | (elem_first_coalesce > msg->sg.start || |
21 | elem_first_coalesce < msg->sg.end)) |
22 | return true; |
23 | |
24 | return false; |
25 | } |
26 | |
27 | int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, |
28 | int elem_first_coalesce) |
29 | { |
30 | struct page_frag *pfrag = sk_page_frag(sk); |
31 | u32 osize = msg->sg.size; |
32 | int ret = 0; |
33 | |
34 | len -= msg->sg.size; |
35 | while (len > 0) { |
36 | struct scatterlist *sge; |
37 | u32 orig_offset; |
38 | int use, i; |
39 | |
40 | if (!sk_page_frag_refill(sk, pfrag)) { |
41 | ret = -ENOMEM; |
42 | goto msg_trim; |
43 | } |
44 | |
45 | orig_offset = pfrag->offset; |
46 | use = min_t(int, len, pfrag->size - orig_offset); |
47 | if (!sk_wmem_schedule(sk, size: use)) { |
48 | ret = -ENOMEM; |
49 | goto msg_trim; |
50 | } |
51 | |
52 | i = msg->sg.end; |
53 | sk_msg_iter_var_prev(i); |
54 | sge = &msg->sg.data[i]; |
55 | |
56 | if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) && |
57 | sg_page(sg: sge) == pfrag->page && |
58 | sge->offset + sge->length == orig_offset) { |
59 | sge->length += use; |
60 | } else { |
61 | if (sk_msg_full(msg)) { |
62 | ret = -ENOSPC; |
63 | break; |
64 | } |
65 | |
66 | sge = &msg->sg.data[msg->sg.end]; |
67 | sg_unmark_end(sg: sge); |
68 | sg_set_page(sg: sge, page: pfrag->page, len: use, offset: orig_offset); |
69 | get_page(page: pfrag->page); |
70 | sk_msg_iter_next(msg, end); |
71 | } |
72 | |
73 | sk_mem_charge(sk, size: use); |
74 | msg->sg.size += use; |
75 | pfrag->offset += use; |
76 | len -= use; |
77 | } |
78 | |
79 | return ret; |
80 | |
81 | msg_trim: |
82 | sk_msg_trim(sk, msg, len: osize); |
83 | return ret; |
84 | } |
85 | EXPORT_SYMBOL_GPL(sk_msg_alloc); |
86 | |
87 | int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, |
88 | u32 off, u32 len) |
89 | { |
90 | int i = src->sg.start; |
91 | struct scatterlist *sge = sk_msg_elem(msg: src, which: i); |
92 | struct scatterlist *sgd = NULL; |
93 | u32 sge_len, sge_off; |
94 | |
95 | while (off) { |
96 | if (sge->length > off) |
97 | break; |
98 | off -= sge->length; |
99 | sk_msg_iter_var_next(i); |
100 | if (i == src->sg.end && off) |
101 | return -ENOSPC; |
102 | sge = sk_msg_elem(msg: src, which: i); |
103 | } |
104 | |
105 | while (len) { |
106 | sge_len = sge->length - off; |
107 | if (sge_len > len) |
108 | sge_len = len; |
109 | |
110 | if (dst->sg.end) |
111 | sgd = sk_msg_elem(msg: dst, which: dst->sg.end - 1); |
112 | |
113 | if (sgd && |
114 | (sg_page(sg: sge) == sg_page(sg: sgd)) && |
115 | (sg_virt(sg: sge) + off == sg_virt(sg: sgd) + sgd->length)) { |
116 | sgd->length += sge_len; |
117 | dst->sg.size += sge_len; |
118 | } else if (!sk_msg_full(msg: dst)) { |
119 | sge_off = sge->offset + off; |
120 | sk_msg_page_add(msg: dst, page: sg_page(sg: sge), len: sge_len, offset: sge_off); |
121 | } else { |
122 | return -ENOSPC; |
123 | } |
124 | |
125 | off = 0; |
126 | len -= sge_len; |
127 | sk_mem_charge(sk, size: sge_len); |
128 | sk_msg_iter_var_next(i); |
129 | if (i == src->sg.end && len) |
130 | return -ENOSPC; |
131 | sge = sk_msg_elem(msg: src, which: i); |
132 | } |
133 | |
134 | return 0; |
135 | } |
136 | EXPORT_SYMBOL_GPL(sk_msg_clone); |
137 | |
138 | void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes) |
139 | { |
140 | int i = msg->sg.start; |
141 | |
142 | do { |
143 | struct scatterlist *sge = sk_msg_elem(msg, which: i); |
144 | |
145 | if (bytes < sge->length) { |
146 | sge->length -= bytes; |
147 | sge->offset += bytes; |
148 | sk_mem_uncharge(sk, size: bytes); |
149 | break; |
150 | } |
151 | |
152 | sk_mem_uncharge(sk, size: sge->length); |
153 | bytes -= sge->length; |
154 | sge->length = 0; |
155 | sge->offset = 0; |
156 | sk_msg_iter_var_next(i); |
157 | } while (bytes && i != msg->sg.end); |
158 | msg->sg.start = i; |
159 | } |
160 | EXPORT_SYMBOL_GPL(sk_msg_return_zero); |
161 | |
162 | void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes) |
163 | { |
164 | int i = msg->sg.start; |
165 | |
166 | do { |
167 | struct scatterlist *sge = &msg->sg.data[i]; |
168 | int uncharge = (bytes < sge->length) ? bytes : sge->length; |
169 | |
170 | sk_mem_uncharge(sk, size: uncharge); |
171 | bytes -= uncharge; |
172 | sk_msg_iter_var_next(i); |
173 | } while (i != msg->sg.end); |
174 | } |
175 | EXPORT_SYMBOL_GPL(sk_msg_return); |
176 | |
177 | static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i, |
178 | bool charge) |
179 | { |
180 | struct scatterlist *sge = sk_msg_elem(msg, which: i); |
181 | u32 len = sge->length; |
182 | |
183 | /* When the skb owns the memory we free it from consume_skb path. */ |
184 | if (!msg->skb) { |
185 | if (charge) |
186 | sk_mem_uncharge(sk, size: len); |
187 | put_page(page: sg_page(sg: sge)); |
188 | } |
189 | memset(sge, 0, sizeof(*sge)); |
190 | return len; |
191 | } |
192 | |
193 | static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i, |
194 | bool charge) |
195 | { |
196 | struct scatterlist *sge = sk_msg_elem(msg, which: i); |
197 | int freed = 0; |
198 | |
199 | while (msg->sg.size) { |
200 | msg->sg.size -= sge->length; |
201 | freed += sk_msg_free_elem(sk, msg, i, charge); |
202 | sk_msg_iter_var_next(i); |
203 | sk_msg_check_to_free(msg, i, bytes: msg->sg.size); |
204 | sge = sk_msg_elem(msg, which: i); |
205 | } |
206 | consume_skb(skb: msg->skb); |
207 | sk_msg_init(msg); |
208 | return freed; |
209 | } |
210 | |
211 | int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg) |
212 | { |
213 | return __sk_msg_free(sk, msg, i: msg->sg.start, charge: false); |
214 | } |
215 | EXPORT_SYMBOL_GPL(sk_msg_free_nocharge); |
216 | |
217 | int sk_msg_free(struct sock *sk, struct sk_msg *msg) |
218 | { |
219 | return __sk_msg_free(sk, msg, i: msg->sg.start, charge: true); |
220 | } |
221 | EXPORT_SYMBOL_GPL(sk_msg_free); |
222 | |
223 | static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, |
224 | u32 bytes, bool charge) |
225 | { |
226 | struct scatterlist *sge; |
227 | u32 i = msg->sg.start; |
228 | |
229 | while (bytes) { |
230 | sge = sk_msg_elem(msg, which: i); |
231 | if (!sge->length) |
232 | break; |
233 | if (bytes < sge->length) { |
234 | if (charge) |
235 | sk_mem_uncharge(sk, size: bytes); |
236 | sge->length -= bytes; |
237 | sge->offset += bytes; |
238 | msg->sg.size -= bytes; |
239 | break; |
240 | } |
241 | |
242 | msg->sg.size -= sge->length; |
243 | bytes -= sge->length; |
244 | sk_msg_free_elem(sk, msg, i, charge); |
245 | sk_msg_iter_var_next(i); |
246 | sk_msg_check_to_free(msg, i, bytes); |
247 | } |
248 | msg->sg.start = i; |
249 | } |
250 | |
251 | void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes) |
252 | { |
253 | __sk_msg_free_partial(sk, msg, bytes, charge: true); |
254 | } |
255 | EXPORT_SYMBOL_GPL(sk_msg_free_partial); |
256 | |
257 | void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, |
258 | u32 bytes) |
259 | { |
260 | __sk_msg_free_partial(sk, msg, bytes, charge: false); |
261 | } |
262 | |
263 | void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len) |
264 | { |
265 | int trim = msg->sg.size - len; |
266 | u32 i = msg->sg.end; |
267 | |
268 | if (trim <= 0) { |
269 | WARN_ON(trim < 0); |
270 | return; |
271 | } |
272 | |
273 | sk_msg_iter_var_prev(i); |
274 | msg->sg.size = len; |
275 | while (msg->sg.data[i].length && |
276 | trim >= msg->sg.data[i].length) { |
277 | trim -= msg->sg.data[i].length; |
278 | sk_msg_free_elem(sk, msg, i, charge: true); |
279 | sk_msg_iter_var_prev(i); |
280 | if (!trim) |
281 | goto out; |
282 | } |
283 | |
284 | msg->sg.data[i].length -= trim; |
285 | sk_mem_uncharge(sk, size: trim); |
286 | /* Adjust copybreak if it falls into the trimmed part of last buf */ |
287 | if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length) |
288 | msg->sg.copybreak = msg->sg.data[i].length; |
289 | out: |
290 | sk_msg_iter_var_next(i); |
291 | msg->sg.end = i; |
292 | |
293 | /* If we trim data a full sg elem before curr pointer update |
294 | * copybreak and current so that any future copy operations |
295 | * start at new copy location. |
296 | * However trimed data that has not yet been used in a copy op |
297 | * does not require an update. |
298 | */ |
299 | if (!msg->sg.size) { |
300 | msg->sg.curr = msg->sg.start; |
301 | msg->sg.copybreak = 0; |
302 | } else if (sk_msg_iter_dist(start: msg->sg.start, end: msg->sg.curr) >= |
303 | sk_msg_iter_dist(start: msg->sg.start, end: msg->sg.end)) { |
304 | sk_msg_iter_var_prev(i); |
305 | msg->sg.curr = i; |
306 | msg->sg.copybreak = msg->sg.data[i].length; |
307 | } |
308 | } |
309 | EXPORT_SYMBOL_GPL(sk_msg_trim); |
310 | |
311 | int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, |
312 | struct sk_msg *msg, u32 bytes) |
313 | { |
314 | int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg); |
315 | const int to_max_pages = MAX_MSG_FRAGS; |
316 | struct page *pages[MAX_MSG_FRAGS]; |
317 | ssize_t orig, copied, use, offset; |
318 | |
319 | orig = msg->sg.size; |
320 | while (bytes > 0) { |
321 | i = 0; |
322 | maxpages = to_max_pages - num_elems; |
323 | if (maxpages == 0) { |
324 | ret = -EFAULT; |
325 | goto out; |
326 | } |
327 | |
328 | copied = iov_iter_get_pages2(i: from, pages, maxsize: bytes, maxpages, |
329 | start: &offset); |
330 | if (copied <= 0) { |
331 | ret = -EFAULT; |
332 | goto out; |
333 | } |
334 | |
335 | bytes -= copied; |
336 | msg->sg.size += copied; |
337 | |
338 | while (copied) { |
339 | use = min_t(int, copied, PAGE_SIZE - offset); |
340 | sg_set_page(sg: &msg->sg.data[msg->sg.end], |
341 | page: pages[i], len: use, offset); |
342 | sg_unmark_end(sg: &msg->sg.data[msg->sg.end]); |
343 | sk_mem_charge(sk, size: use); |
344 | |
345 | offset = 0; |
346 | copied -= use; |
347 | sk_msg_iter_next(msg, end); |
348 | num_elems++; |
349 | i++; |
350 | } |
351 | /* When zerocopy is mixed with sk_msg_*copy* operations we |
352 | * may have a copybreak set in this case clear and prefer |
353 | * zerocopy remainder when possible. |
354 | */ |
355 | msg->sg.copybreak = 0; |
356 | msg->sg.curr = msg->sg.end; |
357 | } |
358 | out: |
359 | /* Revert iov_iter updates, msg will need to use 'trim' later if it |
360 | * also needs to be cleared. |
361 | */ |
362 | if (ret) |
363 | iov_iter_revert(i: from, bytes: msg->sg.size - orig); |
364 | return ret; |
365 | } |
366 | EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter); |
367 | |
368 | int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, |
369 | struct sk_msg *msg, u32 bytes) |
370 | { |
371 | int ret = -ENOSPC, i = msg->sg.curr; |
372 | struct scatterlist *sge; |
373 | u32 copy, buf_size; |
374 | void *to; |
375 | |
376 | do { |
377 | sge = sk_msg_elem(msg, which: i); |
378 | /* This is possible if a trim operation shrunk the buffer */ |
379 | if (msg->sg.copybreak >= sge->length) { |
380 | msg->sg.copybreak = 0; |
381 | sk_msg_iter_var_next(i); |
382 | if (i == msg->sg.end) |
383 | break; |
384 | sge = sk_msg_elem(msg, which: i); |
385 | } |
386 | |
387 | buf_size = sge->length - msg->sg.copybreak; |
388 | copy = (buf_size > bytes) ? bytes : buf_size; |
389 | to = sg_virt(sg: sge) + msg->sg.copybreak; |
390 | msg->sg.copybreak += copy; |
391 | if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) |
392 | ret = copy_from_iter_nocache(addr: to, bytes: copy, i: from); |
393 | else |
394 | ret = copy_from_iter(addr: to, bytes: copy, i: from); |
395 | if (ret != copy) { |
396 | ret = -EFAULT; |
397 | goto out; |
398 | } |
399 | bytes -= copy; |
400 | if (!bytes) |
401 | break; |
402 | msg->sg.copybreak = 0; |
403 | sk_msg_iter_var_next(i); |
404 | } while (i != msg->sg.end); |
405 | out: |
406 | msg->sg.curr = i; |
407 | return ret; |
408 | } |
409 | EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); |
410 | |
411 | /* Receive sk_msg from psock->ingress_msg to @msg. */ |
412 | int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, |
413 | int len, int flags) |
414 | { |
415 | struct iov_iter *iter = &msg->msg_iter; |
416 | int peek = flags & MSG_PEEK; |
417 | struct sk_msg *msg_rx; |
418 | int i, copied = 0; |
419 | |
420 | msg_rx = sk_psock_peek_msg(psock); |
421 | while (copied != len) { |
422 | struct scatterlist *sge; |
423 | |
424 | if (unlikely(!msg_rx)) |
425 | break; |
426 | |
427 | i = msg_rx->sg.start; |
428 | do { |
429 | struct page *page; |
430 | int copy; |
431 | |
432 | sge = sk_msg_elem(msg: msg_rx, which: i); |
433 | copy = sge->length; |
434 | page = sg_page(sg: sge); |
435 | if (copied + copy > len) |
436 | copy = len - copied; |
437 | copy = copy_page_to_iter(page, offset: sge->offset, bytes: copy, i: iter); |
438 | if (!copy) { |
439 | copied = copied ? copied : -EFAULT; |
440 | goto out; |
441 | } |
442 | |
443 | copied += copy; |
444 | if (likely(!peek)) { |
445 | sge->offset += copy; |
446 | sge->length -= copy; |
447 | if (!msg_rx->skb) |
448 | sk_mem_uncharge(sk, size: copy); |
449 | msg_rx->sg.size -= copy; |
450 | |
451 | if (!sge->length) { |
452 | sk_msg_iter_var_next(i); |
453 | if (!msg_rx->skb) |
454 | put_page(page); |
455 | } |
456 | } else { |
457 | /* Lets not optimize peek case if copy_page_to_iter |
458 | * didn't copy the entire length lets just break. |
459 | */ |
460 | if (copy != sge->length) |
461 | goto out; |
462 | sk_msg_iter_var_next(i); |
463 | } |
464 | |
465 | if (copied == len) |
466 | break; |
467 | } while ((i != msg_rx->sg.end) && !sg_is_last(sg: sge)); |
468 | |
469 | if (unlikely(peek)) { |
470 | msg_rx = sk_psock_next_msg(psock, msg: msg_rx); |
471 | if (!msg_rx) |
472 | break; |
473 | continue; |
474 | } |
475 | |
476 | msg_rx->sg.start = i; |
477 | if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sg: sge))) { |
478 | msg_rx = sk_psock_dequeue_msg(psock); |
479 | kfree_sk_msg(msg: msg_rx); |
480 | } |
481 | msg_rx = sk_psock_peek_msg(psock); |
482 | } |
483 | out: |
484 | return copied; |
485 | } |
486 | EXPORT_SYMBOL_GPL(sk_msg_recvmsg); |
487 | |
488 | bool sk_msg_is_readable(struct sock *sk) |
489 | { |
490 | struct sk_psock *psock; |
491 | bool empty = true; |
492 | |
493 | rcu_read_lock(); |
494 | psock = sk_psock(sk); |
495 | if (likely(psock)) |
496 | empty = list_empty(head: &psock->ingress_msg); |
497 | rcu_read_unlock(); |
498 | return !empty; |
499 | } |
500 | EXPORT_SYMBOL_GPL(sk_msg_is_readable); |
501 | |
502 | static struct sk_msg *alloc_sk_msg(gfp_t gfp) |
503 | { |
504 | struct sk_msg *msg; |
505 | |
506 | msg = kzalloc(size: sizeof(*msg), flags: gfp | __GFP_NOWARN); |
507 | if (unlikely(!msg)) |
508 | return NULL; |
509 | sg_init_marker(sgl: msg->sg.data, NR_MSG_FRAG_IDS); |
510 | return msg; |
511 | } |
512 | |
513 | static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, |
514 | struct sk_buff *skb) |
515 | { |
516 | if (atomic_read(v: &sk->sk_rmem_alloc) > sk->sk_rcvbuf) |
517 | return NULL; |
518 | |
519 | if (!sk_rmem_schedule(sk, skb, size: skb->truesize)) |
520 | return NULL; |
521 | |
522 | return alloc_sk_msg(GFP_KERNEL); |
523 | } |
524 | |
525 | static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, |
526 | u32 off, u32 len, |
527 | struct sk_psock *psock, |
528 | struct sock *sk, |
529 | struct sk_msg *msg) |
530 | { |
531 | int num_sge, copied; |
532 | |
533 | num_sge = skb_to_sgvec(skb, sg: msg->sg.data, offset: off, len); |
534 | if (num_sge < 0) { |
535 | /* skb linearize may fail with ENOMEM, but lets simply try again |
536 | * later if this happens. Under memory pressure we don't want to |
537 | * drop the skb. We need to linearize the skb so that the mapping |
538 | * in skb_to_sgvec can not error. |
539 | */ |
540 | if (skb_linearize(skb)) |
541 | return -EAGAIN; |
542 | |
543 | num_sge = skb_to_sgvec(skb, sg: msg->sg.data, offset: off, len); |
544 | if (unlikely(num_sge < 0)) |
545 | return num_sge; |
546 | } |
547 | |
548 | copied = len; |
549 | msg->sg.start = 0; |
550 | msg->sg.size = copied; |
551 | msg->sg.end = num_sge; |
552 | msg->skb = skb; |
553 | |
554 | sk_psock_queue_msg(psock, msg); |
555 | sk_psock_data_ready(sk, psock); |
556 | return copied; |
557 | } |
558 | |
559 | static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, |
560 | u32 off, u32 len); |
561 | |
562 | static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, |
563 | u32 off, u32 len) |
564 | { |
565 | struct sock *sk = psock->sk; |
566 | struct sk_msg *msg; |
567 | int err; |
568 | |
569 | /* If we are receiving on the same sock skb->sk is already assigned, |
570 | * skip memory accounting and owner transition seeing it already set |
571 | * correctly. |
572 | */ |
573 | if (unlikely(skb->sk == sk)) |
574 | return sk_psock_skb_ingress_self(psock, skb, off, len); |
575 | msg = sk_psock_create_ingress_msg(sk, skb); |
576 | if (!msg) |
577 | return -EAGAIN; |
578 | |
579 | /* This will transition ownership of the data from the socket where |
580 | * the BPF program was run initiating the redirect to the socket |
581 | * we will eventually receive this data on. The data will be released |
582 | * from skb_consume found in __tcp_bpf_recvmsg() after its been copied |
583 | * into user buffers. |
584 | */ |
585 | skb_set_owner_r(skb, sk); |
586 | err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); |
587 | if (err < 0) |
588 | kfree(objp: msg); |
589 | return err; |
590 | } |
591 | |
592 | /* Puts an skb on the ingress queue of the socket already assigned to the |
593 | * skb. In this case we do not need to check memory limits or skb_set_owner_r |
594 | * because the skb is already accounted for here. |
595 | */ |
596 | static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, |
597 | u32 off, u32 len) |
598 | { |
599 | struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); |
600 | struct sock *sk = psock->sk; |
601 | int err; |
602 | |
603 | if (unlikely(!msg)) |
604 | return -EAGAIN; |
605 | skb_set_owner_r(skb, sk); |
606 | err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); |
607 | if (err < 0) |
608 | kfree(objp: msg); |
609 | return err; |
610 | } |
611 | |
612 | static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, |
613 | u32 off, u32 len, bool ingress) |
614 | { |
615 | int err = 0; |
616 | |
617 | if (!ingress) { |
618 | if (!sock_writeable(sk: psock->sk)) |
619 | return -EAGAIN; |
620 | return skb_send_sock(sk: psock->sk, skb, offset: off, len); |
621 | } |
622 | skb_get(skb); |
623 | err = sk_psock_skb_ingress(psock, skb, off, len); |
624 | if (err < 0) |
625 | kfree_skb(skb); |
626 | return err; |
627 | } |
628 | |
629 | static void sk_psock_skb_state(struct sk_psock *psock, |
630 | struct sk_psock_work_state *state, |
631 | int len, int off) |
632 | { |
633 | spin_lock_bh(lock: &psock->ingress_lock); |
634 | if (sk_psock_test_state(psock, bit: SK_PSOCK_TX_ENABLED)) { |
635 | state->len = len; |
636 | state->off = off; |
637 | } |
638 | spin_unlock_bh(lock: &psock->ingress_lock); |
639 | } |
640 | |
641 | static void sk_psock_backlog(struct work_struct *work) |
642 | { |
643 | struct delayed_work *dwork = to_delayed_work(work); |
644 | struct sk_psock *psock = container_of(dwork, struct sk_psock, work); |
645 | struct sk_psock_work_state *state = &psock->work_state; |
646 | struct sk_buff *skb = NULL; |
647 | u32 len = 0, off = 0; |
648 | bool ingress; |
649 | int ret; |
650 | |
651 | mutex_lock(&psock->work_mutex); |
652 | if (unlikely(state->len)) { |
653 | len = state->len; |
654 | off = state->off; |
655 | } |
656 | |
657 | while ((skb = skb_peek(list_: &psock->ingress_skb))) { |
658 | len = skb->len; |
659 | off = 0; |
660 | if (skb_bpf_strparser(skb)) { |
661 | struct strp_msg *stm = strp_msg(skb); |
662 | |
663 | off = stm->offset; |
664 | len = stm->full_len; |
665 | } |
666 | ingress = skb_bpf_ingress(skb); |
667 | skb_bpf_redirect_clear(skb); |
668 | do { |
669 | ret = -EIO; |
670 | if (!sock_flag(sk: psock->sk, flag: SOCK_DEAD)) |
671 | ret = sk_psock_handle_skb(psock, skb, off, |
672 | len, ingress); |
673 | if (ret <= 0) { |
674 | if (ret == -EAGAIN) { |
675 | sk_psock_skb_state(psock, state, len, off); |
676 | |
677 | /* Delay slightly to prioritize any |
678 | * other work that might be here. |
679 | */ |
680 | if (sk_psock_test_state(psock, bit: SK_PSOCK_TX_ENABLED)) |
681 | schedule_delayed_work(dwork: &psock->work, delay: 1); |
682 | goto end; |
683 | } |
684 | /* Hard errors break pipe and stop xmit. */ |
685 | sk_psock_report_error(psock, err: ret ? -ret : EPIPE); |
686 | sk_psock_clear_state(psock, bit: SK_PSOCK_TX_ENABLED); |
687 | goto end; |
688 | } |
689 | off += ret; |
690 | len -= ret; |
691 | } while (len); |
692 | |
693 | skb = skb_dequeue(list: &psock->ingress_skb); |
694 | kfree_skb(skb); |
695 | } |
696 | end: |
697 | mutex_unlock(lock: &psock->work_mutex); |
698 | } |
699 | |
700 | struct sk_psock *sk_psock_init(struct sock *sk, int node) |
701 | { |
702 | struct sk_psock *psock; |
703 | struct proto *prot; |
704 | |
705 | write_lock_bh(&sk->sk_callback_lock); |
706 | |
707 | if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) { |
708 | psock = ERR_PTR(error: -EINVAL); |
709 | goto out; |
710 | } |
711 | |
712 | if (sk->sk_user_data) { |
713 | psock = ERR_PTR(error: -EBUSY); |
714 | goto out; |
715 | } |
716 | |
717 | psock = kzalloc_node(size: sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node); |
718 | if (!psock) { |
719 | psock = ERR_PTR(error: -ENOMEM); |
720 | goto out; |
721 | } |
722 | |
723 | prot = READ_ONCE(sk->sk_prot); |
724 | psock->sk = sk; |
725 | psock->eval = __SK_NONE; |
726 | psock->sk_proto = prot; |
727 | psock->saved_unhash = prot->unhash; |
728 | psock->saved_destroy = prot->destroy; |
729 | psock->saved_close = prot->close; |
730 | psock->saved_write_space = sk->sk_write_space; |
731 | |
732 | INIT_LIST_HEAD(list: &psock->link); |
733 | spin_lock_init(&psock->link_lock); |
734 | |
735 | INIT_DELAYED_WORK(&psock->work, sk_psock_backlog); |
736 | mutex_init(&psock->work_mutex); |
737 | INIT_LIST_HEAD(list: &psock->ingress_msg); |
738 | spin_lock_init(&psock->ingress_lock); |
739 | skb_queue_head_init(list: &psock->ingress_skb); |
740 | |
741 | sk_psock_set_state(psock, bit: SK_PSOCK_TX_ENABLED); |
742 | refcount_set(r: &psock->refcnt, n: 1); |
743 | |
744 | __rcu_assign_sk_user_data_with_flags(sk, psock, |
745 | SK_USER_DATA_NOCOPY | |
746 | SK_USER_DATA_PSOCK); |
747 | sock_hold(sk); |
748 | |
749 | out: |
750 | write_unlock_bh(&sk->sk_callback_lock); |
751 | return psock; |
752 | } |
753 | EXPORT_SYMBOL_GPL(sk_psock_init); |
754 | |
755 | struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock) |
756 | { |
757 | struct sk_psock_link *link; |
758 | |
759 | spin_lock_bh(lock: &psock->link_lock); |
760 | link = list_first_entry_or_null(&psock->link, struct sk_psock_link, |
761 | list); |
762 | if (link) |
763 | list_del(entry: &link->list); |
764 | spin_unlock_bh(lock: &psock->link_lock); |
765 | return link; |
766 | } |
767 | |
768 | static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) |
769 | { |
770 | struct sk_msg *msg, *tmp; |
771 | |
772 | list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { |
773 | list_del(entry: &msg->list); |
774 | sk_msg_free(psock->sk, msg); |
775 | kfree(objp: msg); |
776 | } |
777 | } |
778 | |
779 | static void __sk_psock_zap_ingress(struct sk_psock *psock) |
780 | { |
781 | struct sk_buff *skb; |
782 | |
783 | while ((skb = skb_dequeue(list: &psock->ingress_skb)) != NULL) { |
784 | skb_bpf_redirect_clear(skb); |
785 | sock_drop(sk: psock->sk, skb); |
786 | } |
787 | __sk_psock_purge_ingress_msg(psock); |
788 | } |
789 | |
790 | static void sk_psock_link_destroy(struct sk_psock *psock) |
791 | { |
792 | struct sk_psock_link *link, *tmp; |
793 | |
794 | list_for_each_entry_safe(link, tmp, &psock->link, list) { |
795 | list_del(entry: &link->list); |
796 | sk_psock_free_link(link); |
797 | } |
798 | } |
799 | |
800 | void sk_psock_stop(struct sk_psock *psock) |
801 | { |
802 | spin_lock_bh(lock: &psock->ingress_lock); |
803 | sk_psock_clear_state(psock, bit: SK_PSOCK_TX_ENABLED); |
804 | sk_psock_cork_free(psock); |
805 | spin_unlock_bh(lock: &psock->ingress_lock); |
806 | } |
807 | |
808 | static void sk_psock_done_strp(struct sk_psock *psock); |
809 | |
810 | static void sk_psock_destroy(struct work_struct *work) |
811 | { |
812 | struct sk_psock *psock = container_of(to_rcu_work(work), |
813 | struct sk_psock, rwork); |
814 | /* No sk_callback_lock since already detached. */ |
815 | |
816 | sk_psock_done_strp(psock); |
817 | |
818 | cancel_delayed_work_sync(dwork: &psock->work); |
819 | __sk_psock_zap_ingress(psock); |
820 | mutex_destroy(lock: &psock->work_mutex); |
821 | |
822 | psock_progs_drop(progs: &psock->progs); |
823 | |
824 | sk_psock_link_destroy(psock); |
825 | sk_psock_cork_free(psock); |
826 | |
827 | if (psock->sk_redir) |
828 | sock_put(sk: psock->sk_redir); |
829 | if (psock->sk_pair) |
830 | sock_put(sk: psock->sk_pair); |
831 | sock_put(sk: psock->sk); |
832 | kfree(objp: psock); |
833 | } |
834 | |
835 | void sk_psock_drop(struct sock *sk, struct sk_psock *psock) |
836 | { |
837 | write_lock_bh(&sk->sk_callback_lock); |
838 | sk_psock_restore_proto(sk, psock); |
839 | rcu_assign_sk_user_data(sk, NULL); |
840 | if (psock->progs.stream_parser) |
841 | sk_psock_stop_strp(sk, psock); |
842 | else if (psock->progs.stream_verdict || psock->progs.skb_verdict) |
843 | sk_psock_stop_verdict(sk, psock); |
844 | write_unlock_bh(&sk->sk_callback_lock); |
845 | |
846 | sk_psock_stop(psock); |
847 | |
848 | INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); |
849 | queue_rcu_work(wq: system_wq, rwork: &psock->rwork); |
850 | } |
851 | EXPORT_SYMBOL_GPL(sk_psock_drop); |
852 | |
853 | static int sk_psock_map_verd(int verdict, bool redir) |
854 | { |
855 | switch (verdict) { |
856 | case SK_PASS: |
857 | return redir ? __SK_REDIRECT : __SK_PASS; |
858 | case SK_DROP: |
859 | default: |
860 | break; |
861 | } |
862 | |
863 | return __SK_DROP; |
864 | } |
865 | |
866 | int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, |
867 | struct sk_msg *msg) |
868 | { |
869 | struct bpf_prog *prog; |
870 | int ret; |
871 | |
872 | rcu_read_lock(); |
873 | prog = READ_ONCE(psock->progs.msg_parser); |
874 | if (unlikely(!prog)) { |
875 | ret = __SK_PASS; |
876 | goto out; |
877 | } |
878 | |
879 | sk_msg_compute_data_pointers(msg); |
880 | msg->sk = sk; |
881 | ret = bpf_prog_run_pin_on_cpu(prog, ctx: msg); |
882 | ret = sk_psock_map_verd(verdict: ret, redir: msg->sk_redir); |
883 | psock->apply_bytes = msg->apply_bytes; |
884 | if (ret == __SK_REDIRECT) { |
885 | if (psock->sk_redir) { |
886 | sock_put(sk: psock->sk_redir); |
887 | psock->sk_redir = NULL; |
888 | } |
889 | if (!msg->sk_redir) { |
890 | ret = __SK_DROP; |
891 | goto out; |
892 | } |
893 | psock->redir_ingress = sk_msg_to_ingress(msg); |
894 | psock->sk_redir = msg->sk_redir; |
895 | sock_hold(sk: psock->sk_redir); |
896 | } |
897 | out: |
898 | rcu_read_unlock(); |
899 | return ret; |
900 | } |
901 | EXPORT_SYMBOL_GPL(sk_psock_msg_verdict); |
902 | |
903 | static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) |
904 | { |
905 | struct sk_psock *psock_other; |
906 | struct sock *sk_other; |
907 | |
908 | sk_other = skb_bpf_redirect_fetch(skb); |
909 | /* This error is a buggy BPF program, it returned a redirect |
910 | * return code, but then didn't set a redirect interface. |
911 | */ |
912 | if (unlikely(!sk_other)) { |
913 | skb_bpf_redirect_clear(skb); |
914 | sock_drop(sk: from->sk, skb); |
915 | return -EIO; |
916 | } |
917 | psock_other = sk_psock(sk: sk_other); |
918 | /* This error indicates the socket is being torn down or had another |
919 | * error that caused the pipe to break. We can't send a packet on |
920 | * a socket that is in this state so we drop the skb. |
921 | */ |
922 | if (!psock_other || sock_flag(sk: sk_other, flag: SOCK_DEAD)) { |
923 | skb_bpf_redirect_clear(skb); |
924 | sock_drop(sk: from->sk, skb); |
925 | return -EIO; |
926 | } |
927 | spin_lock_bh(lock: &psock_other->ingress_lock); |
928 | if (!sk_psock_test_state(psock: psock_other, bit: SK_PSOCK_TX_ENABLED)) { |
929 | spin_unlock_bh(lock: &psock_other->ingress_lock); |
930 | skb_bpf_redirect_clear(skb); |
931 | sock_drop(sk: from->sk, skb); |
932 | return -EIO; |
933 | } |
934 | |
935 | skb_queue_tail(list: &psock_other->ingress_skb, newsk: skb); |
936 | schedule_delayed_work(dwork: &psock_other->work, delay: 0); |
937 | spin_unlock_bh(lock: &psock_other->ingress_lock); |
938 | return 0; |
939 | } |
940 | |
941 | static void sk_psock_tls_verdict_apply(struct sk_buff *skb, |
942 | struct sk_psock *from, int verdict) |
943 | { |
944 | switch (verdict) { |
945 | case __SK_REDIRECT: |
946 | sk_psock_skb_redirect(from, skb); |
947 | break; |
948 | case __SK_PASS: |
949 | case __SK_DROP: |
950 | default: |
951 | break; |
952 | } |
953 | } |
954 | |
955 | int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) |
956 | { |
957 | struct bpf_prog *prog; |
958 | int ret = __SK_PASS; |
959 | |
960 | rcu_read_lock(); |
961 | prog = READ_ONCE(psock->progs.stream_verdict); |
962 | if (likely(prog)) { |
963 | skb->sk = psock->sk; |
964 | skb_dst_drop(skb); |
965 | skb_bpf_redirect_clear(skb); |
966 | ret = bpf_prog_run_pin_on_cpu(prog, ctx: skb); |
967 | ret = sk_psock_map_verd(verdict: ret, redir: skb_bpf_redirect_fetch(skb)); |
968 | skb->sk = NULL; |
969 | } |
970 | sk_psock_tls_verdict_apply(skb, from: psock, verdict: ret); |
971 | rcu_read_unlock(); |
972 | return ret; |
973 | } |
974 | EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); |
975 | |
976 | static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, |
977 | int verdict) |
978 | { |
979 | struct sock *sk_other; |
980 | int err = 0; |
981 | u32 len, off; |
982 | |
983 | switch (verdict) { |
984 | case __SK_PASS: |
985 | err = -EIO; |
986 | sk_other = psock->sk; |
987 | if (sock_flag(sk: sk_other, flag: SOCK_DEAD) || |
988 | !sk_psock_test_state(psock, bit: SK_PSOCK_TX_ENABLED)) |
989 | goto out_free; |
990 | |
991 | skb_bpf_set_ingress(skb); |
992 | |
993 | /* If the queue is empty then we can submit directly |
994 | * into the msg queue. If its not empty we have to |
995 | * queue work otherwise we may get OOO data. Otherwise, |
996 | * if sk_psock_skb_ingress errors will be handled by |
997 | * retrying later from workqueue. |
998 | */ |
999 | if (skb_queue_empty(list: &psock->ingress_skb)) { |
1000 | len = skb->len; |
1001 | off = 0; |
1002 | if (skb_bpf_strparser(skb)) { |
1003 | struct strp_msg *stm = strp_msg(skb); |
1004 | |
1005 | off = stm->offset; |
1006 | len = stm->full_len; |
1007 | } |
1008 | err = sk_psock_skb_ingress_self(psock, skb, off, len); |
1009 | } |
1010 | if (err < 0) { |
1011 | spin_lock_bh(lock: &psock->ingress_lock); |
1012 | if (sk_psock_test_state(psock, bit: SK_PSOCK_TX_ENABLED)) { |
1013 | skb_queue_tail(list: &psock->ingress_skb, newsk: skb); |
1014 | schedule_delayed_work(dwork: &psock->work, delay: 0); |
1015 | err = 0; |
1016 | } |
1017 | spin_unlock_bh(lock: &psock->ingress_lock); |
1018 | if (err < 0) |
1019 | goto out_free; |
1020 | } |
1021 | break; |
1022 | case __SK_REDIRECT: |
1023 | tcp_eat_skb(sk: psock->sk, skb); |
1024 | err = sk_psock_skb_redirect(from: psock, skb); |
1025 | break; |
1026 | case __SK_DROP: |
1027 | default: |
1028 | out_free: |
1029 | skb_bpf_redirect_clear(skb); |
1030 | tcp_eat_skb(sk: psock->sk, skb); |
1031 | sock_drop(sk: psock->sk, skb); |
1032 | } |
1033 | |
1034 | return err; |
1035 | } |
1036 | |
1037 | static void sk_psock_write_space(struct sock *sk) |
1038 | { |
1039 | struct sk_psock *psock; |
1040 | void (*write_space)(struct sock *sk) = NULL; |
1041 | |
1042 | rcu_read_lock(); |
1043 | psock = sk_psock(sk); |
1044 | if (likely(psock)) { |
1045 | if (sk_psock_test_state(psock, bit: SK_PSOCK_TX_ENABLED)) |
1046 | schedule_delayed_work(dwork: &psock->work, delay: 0); |
1047 | write_space = psock->saved_write_space; |
1048 | } |
1049 | rcu_read_unlock(); |
1050 | if (write_space) |
1051 | write_space(sk); |
1052 | } |
1053 | |
1054 | #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) |
1055 | static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) |
1056 | { |
1057 | struct sk_psock *psock; |
1058 | struct bpf_prog *prog; |
1059 | int ret = __SK_DROP; |
1060 | struct sock *sk; |
1061 | |
1062 | rcu_read_lock(); |
1063 | sk = strp->sk; |
1064 | psock = sk_psock(sk); |
1065 | if (unlikely(!psock)) { |
1066 | sock_drop(sk, skb); |
1067 | goto out; |
1068 | } |
1069 | prog = READ_ONCE(psock->progs.stream_verdict); |
1070 | if (likely(prog)) { |
1071 | skb->sk = sk; |
1072 | skb_dst_drop(skb); |
1073 | skb_bpf_redirect_clear(skb); |
1074 | ret = bpf_prog_run_pin_on_cpu(prog, ctx: skb); |
1075 | skb_bpf_set_strparser(skb); |
1076 | ret = sk_psock_map_verd(verdict: ret, redir: skb_bpf_redirect_fetch(skb)); |
1077 | skb->sk = NULL; |
1078 | } |
1079 | sk_psock_verdict_apply(psock, skb, verdict: ret); |
1080 | out: |
1081 | rcu_read_unlock(); |
1082 | } |
1083 | |
1084 | static int sk_psock_strp_read_done(struct strparser *strp, int err) |
1085 | { |
1086 | return err; |
1087 | } |
1088 | |
1089 | static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb) |
1090 | { |
1091 | struct sk_psock *psock = container_of(strp, struct sk_psock, strp); |
1092 | struct bpf_prog *prog; |
1093 | int ret = skb->len; |
1094 | |
1095 | rcu_read_lock(); |
1096 | prog = READ_ONCE(psock->progs.stream_parser); |
1097 | if (likely(prog)) { |
1098 | skb->sk = psock->sk; |
1099 | ret = bpf_prog_run_pin_on_cpu(prog, ctx: skb); |
1100 | skb->sk = NULL; |
1101 | } |
1102 | rcu_read_unlock(); |
1103 | return ret; |
1104 | } |
1105 | |
1106 | /* Called with socket lock held. */ |
1107 | static void sk_psock_strp_data_ready(struct sock *sk) |
1108 | { |
1109 | struct sk_psock *psock; |
1110 | |
1111 | trace_sk_data_ready(sk); |
1112 | |
1113 | rcu_read_lock(); |
1114 | psock = sk_psock(sk); |
1115 | if (likely(psock)) { |
1116 | if (tls_sw_has_ctx_rx(sk)) { |
1117 | psock->saved_data_ready(sk); |
1118 | } else { |
1119 | write_lock_bh(&sk->sk_callback_lock); |
1120 | strp_data_ready(strp: &psock->strp); |
1121 | write_unlock_bh(&sk->sk_callback_lock); |
1122 | } |
1123 | } |
1124 | rcu_read_unlock(); |
1125 | } |
1126 | |
1127 | int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) |
1128 | { |
1129 | int ret; |
1130 | |
1131 | static const struct strp_callbacks cb = { |
1132 | .rcv_msg = sk_psock_strp_read, |
1133 | .read_sock_done = sk_psock_strp_read_done, |
1134 | .parse_msg = sk_psock_strp_parse, |
1135 | }; |
1136 | |
1137 | ret = strp_init(strp: &psock->strp, sk, cb: &cb); |
1138 | if (!ret) |
1139 | sk_psock_set_state(psock, bit: SK_PSOCK_RX_STRP_ENABLED); |
1140 | |
1141 | return ret; |
1142 | } |
1143 | |
1144 | void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) |
1145 | { |
1146 | if (psock->saved_data_ready) |
1147 | return; |
1148 | |
1149 | psock->saved_data_ready = sk->sk_data_ready; |
1150 | sk->sk_data_ready = sk_psock_strp_data_ready; |
1151 | sk->sk_write_space = sk_psock_write_space; |
1152 | } |
1153 | |
1154 | void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) |
1155 | { |
1156 | psock_set_prog(pprog: &psock->progs.stream_parser, NULL); |
1157 | |
1158 | if (!psock->saved_data_ready) |
1159 | return; |
1160 | |
1161 | sk->sk_data_ready = psock->saved_data_ready; |
1162 | psock->saved_data_ready = NULL; |
1163 | strp_stop(strp: &psock->strp); |
1164 | } |
1165 | |
1166 | static void sk_psock_done_strp(struct sk_psock *psock) |
1167 | { |
1168 | /* Parser has been stopped */ |
1169 | if (sk_psock_test_state(psock, bit: SK_PSOCK_RX_STRP_ENABLED)) |
1170 | strp_done(strp: &psock->strp); |
1171 | } |
1172 | #else |
1173 | static void sk_psock_done_strp(struct sk_psock *psock) |
1174 | { |
1175 | } |
1176 | #endif /* CONFIG_BPF_STREAM_PARSER */ |
1177 | |
1178 | static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) |
1179 | { |
1180 | struct sk_psock *psock; |
1181 | struct bpf_prog *prog; |
1182 | int ret = __SK_DROP; |
1183 | int len = skb->len; |
1184 | |
1185 | rcu_read_lock(); |
1186 | psock = sk_psock(sk); |
1187 | if (unlikely(!psock)) { |
1188 | len = 0; |
1189 | tcp_eat_skb(sk, skb); |
1190 | sock_drop(sk, skb); |
1191 | goto out; |
1192 | } |
1193 | prog = READ_ONCE(psock->progs.stream_verdict); |
1194 | if (!prog) |
1195 | prog = READ_ONCE(psock->progs.skb_verdict); |
1196 | if (likely(prog)) { |
1197 | skb_dst_drop(skb); |
1198 | skb_bpf_redirect_clear(skb); |
1199 | ret = bpf_prog_run_pin_on_cpu(prog, ctx: skb); |
1200 | ret = sk_psock_map_verd(verdict: ret, redir: skb_bpf_redirect_fetch(skb)); |
1201 | } |
1202 | ret = sk_psock_verdict_apply(psock, skb, verdict: ret); |
1203 | if (ret < 0) |
1204 | len = ret; |
1205 | out: |
1206 | rcu_read_unlock(); |
1207 | return len; |
1208 | } |
1209 | |
1210 | static void sk_psock_verdict_data_ready(struct sock *sk) |
1211 | { |
1212 | struct socket *sock = sk->sk_socket; |
1213 | const struct proto_ops *ops; |
1214 | int copied; |
1215 | |
1216 | trace_sk_data_ready(sk); |
1217 | |
1218 | if (unlikely(!sock)) |
1219 | return; |
1220 | ops = READ_ONCE(sock->ops); |
1221 | if (!ops || !ops->read_skb) |
1222 | return; |
1223 | copied = ops->read_skb(sk, sk_psock_verdict_recv); |
1224 | if (copied >= 0) { |
1225 | struct sk_psock *psock; |
1226 | |
1227 | rcu_read_lock(); |
1228 | psock = sk_psock(sk); |
1229 | if (psock) { |
1230 | read_lock_bh(&sk->sk_callback_lock); |
1231 | sk_psock_data_ready(sk, psock); |
1232 | read_unlock_bh(&sk->sk_callback_lock); |
1233 | } |
1234 | rcu_read_unlock(); |
1235 | } |
1236 | } |
1237 | |
1238 | void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) |
1239 | { |
1240 | if (psock->saved_data_ready) |
1241 | return; |
1242 | |
1243 | psock->saved_data_ready = sk->sk_data_ready; |
1244 | sk->sk_data_ready = sk_psock_verdict_data_ready; |
1245 | sk->sk_write_space = sk_psock_write_space; |
1246 | } |
1247 | |
1248 | void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) |
1249 | { |
1250 | psock_set_prog(pprog: &psock->progs.stream_verdict, NULL); |
1251 | psock_set_prog(pprog: &psock->progs.skb_verdict, NULL); |
1252 | |
1253 | if (!psock->saved_data_ready) |
1254 | return; |
1255 | |
1256 | sk->sk_data_ready = psock->saved_data_ready; |
1257 | psock->saved_data_ready = NULL; |
1258 | } |
1259 | |