1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * IPV6 GSO/GRO offload support |
4 | * Linux INET6 implementation |
5 | */ |
6 | |
7 | #include <linux/kernel.h> |
8 | #include <linux/socket.h> |
9 | #include <linux/netdevice.h> |
10 | #include <linux/skbuff.h> |
11 | #include <linux/printk.h> |
12 | |
13 | #include <net/protocol.h> |
14 | #include <net/ipv6.h> |
15 | #include <net/inet_common.h> |
16 | #include <net/tcp.h> |
17 | #include <net/udp.h> |
18 | #include <net/gro.h> |
19 | #include <net/gso.h> |
20 | |
21 | #include "ip6_offload.h" |
22 | |
23 | /* All GRO functions are always builtin, except UDP over ipv6, which lays in |
24 | * ipv6 module, as it depends on UDPv6 lookup function, so we need special care |
25 | * when ipv6 is built as a module |
26 | */ |
27 | #if IS_BUILTIN(CONFIG_IPV6) |
28 | #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__) |
29 | #else |
30 | #define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) |
31 | #endif |
32 | |
33 | #define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \ |
34 | ({ \ |
35 | unlikely(gro_recursion_inc_test(skb)) ? \ |
36 | NAPI_GRO_CB(skb)->flush |= 1, NULL : \ |
37 | INDIRECT_CALL_L4(cb, f2, f1, head, skb); \ |
38 | }) |
39 | |
40 | static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) |
41 | { |
42 | const struct net_offload *ops = NULL; |
43 | |
44 | for (;;) { |
45 | struct ipv6_opt_hdr *opth; |
46 | int len; |
47 | |
48 | if (proto != NEXTHDR_HOP) { |
49 | ops = rcu_dereference(inet6_offloads[proto]); |
50 | |
51 | if (unlikely(!ops)) |
52 | break; |
53 | |
54 | if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) |
55 | break; |
56 | } |
57 | |
58 | if (unlikely(!pskb_may_pull(skb, 8))) |
59 | break; |
60 | |
61 | opth = (void *)skb->data; |
62 | len = ipv6_optlen(opth); |
63 | |
64 | if (unlikely(!pskb_may_pull(skb, len))) |
65 | break; |
66 | |
67 | opth = (void *)skb->data; |
68 | proto = opth->nexthdr; |
69 | __skb_pull(skb, len); |
70 | } |
71 | |
72 | return proto; |
73 | } |
74 | |
75 | static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, |
76 | netdev_features_t features) |
77 | { |
78 | struct sk_buff *segs = ERR_PTR(error: -EINVAL); |
79 | struct ipv6hdr *ipv6h; |
80 | const struct net_offload *ops; |
81 | int proto, err; |
82 | struct frag_hdr *fptr; |
83 | unsigned int payload_len; |
84 | u8 *prevhdr; |
85 | int offset = 0; |
86 | bool encap, udpfrag; |
87 | int nhoff; |
88 | bool gso_partial; |
89 | |
90 | skb_reset_network_header(skb); |
91 | err = ipv6_hopopt_jumbo_remove(skb); |
92 | if (err) |
93 | return ERR_PTR(error: err); |
94 | nhoff = skb_network_header(skb) - skb_mac_header(skb); |
95 | if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) |
96 | goto out; |
97 | |
98 | encap = SKB_GSO_CB(skb)->encap_level > 0; |
99 | if (encap) |
100 | features &= skb->dev->hw_enc_features; |
101 | SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); |
102 | |
103 | ipv6h = ipv6_hdr(skb); |
104 | __skb_pull(skb, len: sizeof(*ipv6h)); |
105 | segs = ERR_PTR(error: -EPROTONOSUPPORT); |
106 | |
107 | proto = ipv6_gso_pull_exthdrs(skb, proto: ipv6h->nexthdr); |
108 | |
109 | if (skb->encapsulation && |
110 | skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) |
111 | udpfrag = proto == IPPROTO_UDP && encap && |
112 | (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); |
113 | else |
114 | udpfrag = proto == IPPROTO_UDP && !skb->encapsulation && |
115 | (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); |
116 | |
117 | ops = rcu_dereference(inet6_offloads[proto]); |
118 | if (likely(ops && ops->callbacks.gso_segment)) { |
119 | skb_reset_transport_header(skb); |
120 | segs = ops->callbacks.gso_segment(skb, features); |
121 | if (!segs) |
122 | skb->network_header = skb_mac_header(skb) + nhoff - skb->head; |
123 | } |
124 | |
125 | if (IS_ERR_OR_NULL(ptr: segs)) |
126 | goto out; |
127 | |
128 | gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); |
129 | |
130 | for (skb = segs; skb; skb = skb->next) { |
131 | ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); |
132 | if (gso_partial && skb_is_gso(skb)) |
133 | payload_len = skb_shinfo(skb)->gso_size + |
134 | SKB_GSO_CB(skb)->data_offset + |
135 | skb->head - (unsigned char *)(ipv6h + 1); |
136 | else |
137 | payload_len = skb->len - nhoff - sizeof(*ipv6h); |
138 | ipv6h->payload_len = htons(payload_len); |
139 | skb->network_header = (u8 *)ipv6h - skb->head; |
140 | skb_reset_mac_len(skb); |
141 | |
142 | if (udpfrag) { |
143 | int err = ip6_find_1stfragopt(skb, nexthdr: &prevhdr); |
144 | if (err < 0) { |
145 | kfree_skb_list(segs); |
146 | return ERR_PTR(error: err); |
147 | } |
148 | fptr = (struct frag_hdr *)((u8 *)ipv6h + err); |
149 | fptr->frag_off = htons(offset); |
150 | if (skb->next) |
151 | fptr->frag_off |= htons(IP6_MF); |
152 | offset += (ntohs(ipv6h->payload_len) - |
153 | sizeof(struct frag_hdr)); |
154 | } |
155 | if (encap) |
156 | skb_reset_inner_headers(skb); |
157 | } |
158 | |
159 | out: |
160 | return segs; |
161 | } |
162 | |
163 | /* Return the total length of all the extension hdrs, following the same |
164 | * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs. |
165 | */ |
166 | static int ipv6_exthdrs_len(struct ipv6hdr *iph, |
167 | const struct net_offload **opps) |
168 | { |
169 | struct ipv6_opt_hdr *opth = (void *)iph; |
170 | int len = 0, proto, optlen = sizeof(*iph); |
171 | |
172 | proto = iph->nexthdr; |
173 | for (;;) { |
174 | if (proto != NEXTHDR_HOP) { |
175 | *opps = rcu_dereference(inet6_offloads[proto]); |
176 | if (unlikely(!(*opps))) |
177 | break; |
178 | if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR)) |
179 | break; |
180 | } |
181 | opth = (void *)opth + optlen; |
182 | optlen = ipv6_optlen(opth); |
183 | len += optlen; |
184 | proto = opth->nexthdr; |
185 | } |
186 | return len; |
187 | } |
188 | |
189 | INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, |
190 | struct sk_buff *skb) |
191 | { |
192 | const struct net_offload *ops; |
193 | struct sk_buff *pp = NULL; |
194 | struct sk_buff *p; |
195 | struct ipv6hdr *iph; |
196 | unsigned int nlen; |
197 | unsigned int hlen; |
198 | unsigned int off; |
199 | u16 flush = 1; |
200 | int proto; |
201 | |
202 | off = skb_gro_offset(skb); |
203 | hlen = off + sizeof(*iph); |
204 | iph = skb_gro_header(skb, hlen, offset: off); |
205 | if (unlikely(!iph)) |
206 | goto out; |
207 | |
208 | skb_set_network_header(skb, offset: off); |
209 | skb_gro_pull(skb, len: sizeof(*iph)); |
210 | skb_set_transport_header(skb, offset: skb_gro_offset(skb)); |
211 | |
212 | flush += ntohs(iph->payload_len) != skb_gro_len(skb); |
213 | |
214 | proto = iph->nexthdr; |
215 | ops = rcu_dereference(inet6_offloads[proto]); |
216 | if (!ops || !ops->callbacks.gro_receive) { |
217 | pskb_pull(skb, len: skb_gro_offset(skb)); |
218 | skb_gro_frag0_invalidate(skb); |
219 | proto = ipv6_gso_pull_exthdrs(skb, proto); |
220 | skb_gro_pull(skb, len: -skb_transport_offset(skb)); |
221 | skb_reset_transport_header(skb); |
222 | __skb_push(skb, len: skb_gro_offset(skb)); |
223 | |
224 | ops = rcu_dereference(inet6_offloads[proto]); |
225 | if (!ops || !ops->callbacks.gro_receive) |
226 | goto out; |
227 | |
228 | iph = ipv6_hdr(skb); |
229 | } |
230 | |
231 | NAPI_GRO_CB(skb)->proto = proto; |
232 | |
233 | flush--; |
234 | nlen = skb_network_header_len(skb); |
235 | |
236 | list_for_each_entry(p, head, list) { |
237 | const struct ipv6hdr *iph2; |
238 | __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ |
239 | |
240 | if (!NAPI_GRO_CB(p)->same_flow) |
241 | continue; |
242 | |
243 | iph2 = (struct ipv6hdr *)(p->data + off); |
244 | first_word = *(__be32 *)iph ^ *(__be32 *)iph2; |
245 | |
246 | /* All fields must match except length and Traffic Class. |
247 | * XXX skbs on the gro_list have all been parsed and pulled |
248 | * already so we don't need to compare nlen |
249 | * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) |
250 | * memcmp() alone below is sufficient, right? |
251 | */ |
252 | if ((first_word & htonl(0xF00FFFFF)) || |
253 | !ipv6_addr_equal(a1: &iph->saddr, a2: &iph2->saddr) || |
254 | !ipv6_addr_equal(a1: &iph->daddr, a2: &iph2->daddr) || |
255 | iph->nexthdr != iph2->nexthdr) { |
256 | not_same_flow: |
257 | NAPI_GRO_CB(p)->same_flow = 0; |
258 | continue; |
259 | } |
260 | if (unlikely(nlen > sizeof(struct ipv6hdr))) { |
261 | if (memcmp(p: iph + 1, q: iph2 + 1, |
262 | size: nlen - sizeof(struct ipv6hdr))) |
263 | goto not_same_flow; |
264 | } |
265 | /* flush if Traffic Class fields are different */ |
266 | NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) | |
267 | (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); |
268 | NAPI_GRO_CB(p)->flush |= flush; |
269 | |
270 | /* If the previous IP ID value was based on an atomic |
271 | * datagram we can overwrite the value and ignore it. |
272 | */ |
273 | if (NAPI_GRO_CB(skb)->is_atomic) |
274 | NAPI_GRO_CB(p)->flush_id = 0; |
275 | } |
276 | |
277 | NAPI_GRO_CB(skb)->is_atomic = true; |
278 | NAPI_GRO_CB(skb)->flush |= flush; |
279 | |
280 | skb_gro_postpull_rcsum(skb, start: iph, len: nlen); |
281 | |
282 | pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive, |
283 | ops->callbacks.gro_receive, head, skb); |
284 | |
285 | out: |
286 | skb_gro_flush_final(skb, pp, flush); |
287 | |
288 | return pp; |
289 | } |
290 | |
291 | static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head, |
292 | struct sk_buff *skb) |
293 | { |
294 | /* Common GRO receive for SIT and IP6IP6 */ |
295 | |
296 | if (NAPI_GRO_CB(skb)->encap_mark) { |
297 | NAPI_GRO_CB(skb)->flush = 1; |
298 | return NULL; |
299 | } |
300 | |
301 | NAPI_GRO_CB(skb)->encap_mark = 1; |
302 | |
303 | return ipv6_gro_receive(head, skb); |
304 | } |
305 | |
306 | static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, |
307 | struct sk_buff *skb) |
308 | { |
309 | /* Common GRO receive for SIT and IP6IP6 */ |
310 | |
311 | if (NAPI_GRO_CB(skb)->encap_mark) { |
312 | NAPI_GRO_CB(skb)->flush = 1; |
313 | return NULL; |
314 | } |
315 | |
316 | NAPI_GRO_CB(skb)->encap_mark = 1; |
317 | |
318 | return inet_gro_receive(head, skb); |
319 | } |
320 | |
321 | INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) |
322 | { |
323 | const struct net_offload *ops; |
324 | struct ipv6hdr *iph; |
325 | int err = -ENOSYS; |
326 | u32 payload_len; |
327 | |
328 | if (skb->encapsulation) { |
329 | skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); |
330 | skb_set_inner_network_header(skb, offset: nhoff); |
331 | } |
332 | |
333 | payload_len = skb->len - nhoff - sizeof(*iph); |
334 | if (unlikely(payload_len > IPV6_MAXPLEN)) { |
335 | struct hop_jumbo_hdr *hop_jumbo; |
336 | int hoplen = sizeof(*hop_jumbo); |
337 | |
338 | /* Move network header left */ |
339 | memmove(skb_mac_header(skb) - hoplen, skb_mac_header(skb), |
340 | skb->transport_header - skb->mac_header); |
341 | skb->data -= hoplen; |
342 | skb->len += hoplen; |
343 | skb->mac_header -= hoplen; |
344 | skb->network_header -= hoplen; |
345 | iph = (struct ipv6hdr *)(skb->data + nhoff); |
346 | hop_jumbo = (struct hop_jumbo_hdr *)(iph + 1); |
347 | |
348 | /* Build hop-by-hop options */ |
349 | hop_jumbo->nexthdr = iph->nexthdr; |
350 | hop_jumbo->hdrlen = 0; |
351 | hop_jumbo->tlv_type = IPV6_TLV_JUMBO; |
352 | hop_jumbo->tlv_len = 4; |
353 | hop_jumbo->jumbo_payload_len = htonl(payload_len + hoplen); |
354 | |
355 | iph->nexthdr = NEXTHDR_HOP; |
356 | iph->payload_len = 0; |
357 | } else { |
358 | iph = (struct ipv6hdr *)(skb->data + nhoff); |
359 | iph->payload_len = htons(payload_len); |
360 | } |
361 | |
362 | nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, opps: &ops); |
363 | if (WARN_ON(!ops || !ops->callbacks.gro_complete)) |
364 | goto out; |
365 | |
366 | err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete, |
367 | udp6_gro_complete, skb, nhoff); |
368 | |
369 | out: |
370 | return err; |
371 | } |
372 | |
373 | static int sit_gro_complete(struct sk_buff *skb, int nhoff) |
374 | { |
375 | skb->encapsulation = 1; |
376 | skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; |
377 | return ipv6_gro_complete(skb, nhoff); |
378 | } |
379 | |
380 | static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff) |
381 | { |
382 | skb->encapsulation = 1; |
383 | skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; |
384 | return ipv6_gro_complete(skb, nhoff); |
385 | } |
386 | |
387 | static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff) |
388 | { |
389 | skb->encapsulation = 1; |
390 | skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; |
391 | return inet_gro_complete(skb, nhoff); |
392 | } |
393 | |
394 | static struct packet_offload ipv6_packet_offload __read_mostly = { |
395 | .type = cpu_to_be16(ETH_P_IPV6), |
396 | .callbacks = { |
397 | .gso_segment = ipv6_gso_segment, |
398 | .gro_receive = ipv6_gro_receive, |
399 | .gro_complete = ipv6_gro_complete, |
400 | }, |
401 | }; |
402 | |
403 | static struct sk_buff *sit_gso_segment(struct sk_buff *skb, |
404 | netdev_features_t features) |
405 | { |
406 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) |
407 | return ERR_PTR(error: -EINVAL); |
408 | |
409 | return ipv6_gso_segment(skb, features); |
410 | } |
411 | |
412 | static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb, |
413 | netdev_features_t features) |
414 | { |
415 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) |
416 | return ERR_PTR(error: -EINVAL); |
417 | |
418 | return inet_gso_segment(skb, features); |
419 | } |
420 | |
421 | static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb, |
422 | netdev_features_t features) |
423 | { |
424 | if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) |
425 | return ERR_PTR(error: -EINVAL); |
426 | |
427 | return ipv6_gso_segment(skb, features); |
428 | } |
429 | |
430 | static const struct net_offload sit_offload = { |
431 | .callbacks = { |
432 | .gso_segment = sit_gso_segment, |
433 | .gro_receive = sit_ip6ip6_gro_receive, |
434 | .gro_complete = sit_gro_complete, |
435 | }, |
436 | }; |
437 | |
438 | static const struct net_offload ip4ip6_offload = { |
439 | .callbacks = { |
440 | .gso_segment = ip4ip6_gso_segment, |
441 | .gro_receive = ip4ip6_gro_receive, |
442 | .gro_complete = ip4ip6_gro_complete, |
443 | }, |
444 | }; |
445 | |
446 | static const struct net_offload ip6ip6_offload = { |
447 | .callbacks = { |
448 | .gso_segment = ip6ip6_gso_segment, |
449 | .gro_receive = sit_ip6ip6_gro_receive, |
450 | .gro_complete = ip6ip6_gro_complete, |
451 | }, |
452 | }; |
453 | static int __init ipv6_offload_init(void) |
454 | { |
455 | |
456 | if (tcpv6_offload_init() < 0) |
457 | pr_crit("%s: Cannot add TCP protocol offload\n" , __func__); |
458 | if (ipv6_exthdrs_offload_init() < 0) |
459 | pr_crit("%s: Cannot add EXTHDRS protocol offload\n" , __func__); |
460 | |
461 | dev_add_offload(po: &ipv6_packet_offload); |
462 | |
463 | inet_add_offload(prot: &sit_offload, IPPROTO_IPV6); |
464 | inet6_add_offload(prot: &ip6ip6_offload, IPPROTO_IPV6); |
465 | inet6_add_offload(prot: &ip4ip6_offload, IPPROTO_IPIP); |
466 | |
467 | return 0; |
468 | } |
469 | |
470 | fs_initcall(ipv6_offload_init); |
471 | |