1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * xfrm_device.c - IPsec device offloading code. |
4 | * |
5 | * Copyright (c) 2015 secunet Security Networks AG |
6 | * |
7 | * Author: |
8 | * Steffen Klassert <steffen.klassert@secunet.com> |
9 | */ |
10 | |
11 | #include <linux/errno.h> |
12 | #include <linux/module.h> |
13 | #include <linux/netdevice.h> |
14 | #include <linux/skbuff.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/spinlock.h> |
17 | #include <net/dst.h> |
18 | #include <net/gso.h> |
19 | #include <net/xfrm.h> |
20 | #include <linux/notifier.h> |
21 | |
22 | #ifdef CONFIG_XFRM_OFFLOAD |
23 | static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, |
24 | unsigned int hsize) |
25 | { |
26 | struct xfrm_offload *xo = xfrm_offload(skb); |
27 | |
28 | skb_reset_mac_len(skb); |
29 | if (xo->flags & XFRM_GSO_SEGMENT) |
30 | skb->transport_header -= x->props.header_len; |
31 | |
32 | pskb_pull(skb, len: skb_transport_offset(skb) + x->props.header_len); |
33 | } |
34 | |
35 | static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, |
36 | unsigned int hsize) |
37 | |
38 | { |
39 | struct xfrm_offload *xo = xfrm_offload(skb); |
40 | |
41 | if (xo->flags & XFRM_GSO_SEGMENT) |
42 | skb->transport_header = skb->network_header + hsize; |
43 | |
44 | skb_reset_mac_len(skb); |
45 | pskb_pull(skb, len: skb->mac_len + x->props.header_len); |
46 | } |
47 | |
48 | static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, |
49 | unsigned int hsize) |
50 | { |
51 | struct xfrm_offload *xo = xfrm_offload(skb); |
52 | int phlen = 0; |
53 | |
54 | if (xo->flags & XFRM_GSO_SEGMENT) |
55 | skb->transport_header = skb->network_header + hsize; |
56 | |
57 | skb_reset_mac_len(skb); |
58 | if (x->sel.family != AF_INET6) { |
59 | phlen = IPV4_BEET_PHMAXLEN; |
60 | if (x->outer_mode.family == AF_INET6) |
61 | phlen += sizeof(struct ipv6hdr) - sizeof(struct iphdr); |
62 | } |
63 | |
64 | pskb_pull(skb, len: skb->mac_len + hsize + (x->props.header_len - phlen)); |
65 | } |
66 | |
67 | /* Adjust pointers into the packet when IPsec is done at layer2 */ |
68 | static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) |
69 | { |
70 | switch (x->outer_mode.encap) { |
71 | case XFRM_MODE_TUNNEL: |
72 | if (x->outer_mode.family == AF_INET) |
73 | return __xfrm_mode_tunnel_prep(x, skb, |
74 | hsize: sizeof(struct iphdr)); |
75 | if (x->outer_mode.family == AF_INET6) |
76 | return __xfrm_mode_tunnel_prep(x, skb, |
77 | hsize: sizeof(struct ipv6hdr)); |
78 | break; |
79 | case XFRM_MODE_TRANSPORT: |
80 | if (x->outer_mode.family == AF_INET) |
81 | return __xfrm_transport_prep(x, skb, |
82 | hsize: sizeof(struct iphdr)); |
83 | if (x->outer_mode.family == AF_INET6) |
84 | return __xfrm_transport_prep(x, skb, |
85 | hsize: sizeof(struct ipv6hdr)); |
86 | break; |
87 | case XFRM_MODE_BEET: |
88 | if (x->outer_mode.family == AF_INET) |
89 | return __xfrm_mode_beet_prep(x, skb, |
90 | hsize: sizeof(struct iphdr)); |
91 | if (x->outer_mode.family == AF_INET6) |
92 | return __xfrm_mode_beet_prep(x, skb, |
93 | hsize: sizeof(struct ipv6hdr)); |
94 | break; |
95 | case XFRM_MODE_ROUTEOPTIMIZATION: |
96 | case XFRM_MODE_IN_TRIGGER: |
97 | break; |
98 | } |
99 | } |
100 | |
101 | static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) |
102 | { |
103 | struct xfrm_offload *xo = xfrm_offload(skb); |
104 | __u32 seq = xo->seq.low; |
105 | |
106 | seq += skb_shinfo(skb)->gso_segs; |
107 | if (unlikely(seq < xo->seq.low)) |
108 | return true; |
109 | |
110 | return false; |
111 | } |
112 | |
113 | struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) |
114 | { |
115 | int err; |
116 | unsigned long flags; |
117 | struct xfrm_state *x; |
118 | struct softnet_data *sd; |
119 | struct sk_buff *skb2, *nskb, *pskb = NULL; |
120 | netdev_features_t esp_features = features; |
121 | struct xfrm_offload *xo = xfrm_offload(skb); |
122 | struct net_device *dev = skb->dev; |
123 | struct sec_path *sp; |
124 | |
125 | if (!xo || (xo->flags & XFRM_XMIT)) |
126 | return skb; |
127 | |
128 | if (!(features & NETIF_F_HW_ESP)) |
129 | esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); |
130 | |
131 | sp = skb_sec_path(skb); |
132 | x = sp->xvec[sp->len - 1]; |
133 | if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN) |
134 | return skb; |
135 | |
136 | /* The packet was sent to HW IPsec packet offload engine, |
137 | * but to wrong device. Drop the packet, so it won't skip |
138 | * XFRM stack. |
139 | */ |
140 | if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) { |
141 | kfree_skb(skb); |
142 | dev_core_stats_tx_dropped_inc(dev); |
143 | return NULL; |
144 | } |
145 | |
146 | /* This skb was already validated on the upper/virtual dev */ |
147 | if ((x->xso.dev != dev) && (x->xso.real_dev == dev)) |
148 | return skb; |
149 | |
150 | local_irq_save(flags); |
151 | sd = this_cpu_ptr(&softnet_data); |
152 | err = !skb_queue_empty(list: &sd->xfrm_backlog); |
153 | local_irq_restore(flags); |
154 | |
155 | if (err) { |
156 | *again = true; |
157 | return skb; |
158 | } |
159 | |
160 | if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || |
161 | unlikely(xmit_xfrm_check_overflow(skb)))) { |
162 | struct sk_buff *segs; |
163 | |
164 | /* Packet got rerouted, fixup features and segment it. */ |
165 | esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP); |
166 | |
167 | segs = skb_gso_segment(skb, features: esp_features); |
168 | if (IS_ERR(ptr: segs)) { |
169 | kfree_skb(skb); |
170 | dev_core_stats_tx_dropped_inc(dev); |
171 | return NULL; |
172 | } else { |
173 | consume_skb(skb); |
174 | skb = segs; |
175 | } |
176 | } |
177 | |
178 | if (!skb->next) { |
179 | esp_features |= skb->dev->gso_partial_features; |
180 | xfrm_outer_mode_prep(x, skb); |
181 | |
182 | xo->flags |= XFRM_DEV_RESUME; |
183 | |
184 | err = x->type_offload->xmit(x, skb, esp_features); |
185 | if (err) { |
186 | if (err == -EINPROGRESS) |
187 | return NULL; |
188 | |
189 | XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); |
190 | kfree_skb(skb); |
191 | return NULL; |
192 | } |
193 | |
194 | skb_push(skb, len: skb->data - skb_mac_header(skb)); |
195 | |
196 | return skb; |
197 | } |
198 | |
199 | skb_list_walk_safe(skb, skb2, nskb) { |
200 | esp_features |= skb->dev->gso_partial_features; |
201 | skb_mark_not_on_list(skb: skb2); |
202 | |
203 | xo = xfrm_offload(skb: skb2); |
204 | xo->flags |= XFRM_DEV_RESUME; |
205 | |
206 | xfrm_outer_mode_prep(x, skb: skb2); |
207 | |
208 | err = x->type_offload->xmit(x, skb2, esp_features); |
209 | if (!err) { |
210 | skb2->next = nskb; |
211 | } else if (err != -EINPROGRESS) { |
212 | XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); |
213 | skb2->next = nskb; |
214 | kfree_skb_list(segs: skb2); |
215 | return NULL; |
216 | } else { |
217 | if (skb == skb2) |
218 | skb = nskb; |
219 | else |
220 | pskb->next = nskb; |
221 | |
222 | continue; |
223 | } |
224 | |
225 | skb_push(skb: skb2, len: skb2->data - skb_mac_header(skb: skb2)); |
226 | pskb = skb2; |
227 | } |
228 | |
229 | return skb; |
230 | } |
231 | EXPORT_SYMBOL_GPL(validate_xmit_xfrm); |
232 | |
233 | int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, |
234 | struct xfrm_user_offload *xuo, |
235 | struct netlink_ext_ack *extack) |
236 | { |
237 | int err; |
238 | struct dst_entry *dst; |
239 | struct net_device *dev; |
240 | struct xfrm_dev_offload *xso = &x->xso; |
241 | xfrm_address_t *saddr; |
242 | xfrm_address_t *daddr; |
243 | bool is_packet_offload; |
244 | |
245 | if (!x->type_offload) { |
246 | NL_SET_ERR_MSG(extack, "Type doesn't support offload" ); |
247 | return -EINVAL; |
248 | } |
249 | |
250 | if (xuo->flags & |
251 | ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) { |
252 | NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request" ); |
253 | return -EINVAL; |
254 | } |
255 | |
256 | is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; |
257 | |
258 | /* We don't yet support UDP encapsulation and TFC padding. */ |
259 | if ((!is_packet_offload && x->encap) || x->tfcpad) { |
260 | NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded" ); |
261 | return -EINVAL; |
262 | } |
263 | |
264 | dev = dev_get_by_index(net, ifindex: xuo->ifindex); |
265 | if (!dev) { |
266 | if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { |
267 | saddr = &x->props.saddr; |
268 | daddr = &x->id.daddr; |
269 | } else { |
270 | saddr = &x->id.daddr; |
271 | daddr = &x->props.saddr; |
272 | } |
273 | |
274 | dst = __xfrm_dst_lookup(net, tos: 0, oif: 0, saddr, daddr, |
275 | family: x->props.family, |
276 | mark: xfrm_smark_get(mark: 0, x)); |
277 | if (IS_ERR(ptr: dst)) |
278 | return (is_packet_offload) ? -EINVAL : 0; |
279 | |
280 | dev = dst->dev; |
281 | |
282 | dev_hold(dev); |
283 | dst_release(dst); |
284 | } |
285 | |
286 | if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { |
287 | xso->dev = NULL; |
288 | dev_put(dev); |
289 | return (is_packet_offload) ? -EINVAL : 0; |
290 | } |
291 | |
292 | if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && |
293 | !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { |
294 | NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN" ); |
295 | xso->dev = NULL; |
296 | dev_put(dev); |
297 | return -EINVAL; |
298 | } |
299 | |
300 | xso->dev = dev; |
301 | netdev_tracker_alloc(dev, tracker: &xso->dev_tracker, GFP_ATOMIC); |
302 | xso->real_dev = dev; |
303 | |
304 | if (xuo->flags & XFRM_OFFLOAD_INBOUND) |
305 | xso->dir = XFRM_DEV_OFFLOAD_IN; |
306 | else |
307 | xso->dir = XFRM_DEV_OFFLOAD_OUT; |
308 | |
309 | if (is_packet_offload) |
310 | xso->type = XFRM_DEV_OFFLOAD_PACKET; |
311 | else |
312 | xso->type = XFRM_DEV_OFFLOAD_CRYPTO; |
313 | |
314 | err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack); |
315 | if (err) { |
316 | xso->dev = NULL; |
317 | xso->dir = 0; |
318 | xso->real_dev = NULL; |
319 | netdev_put(dev, tracker: &xso->dev_tracker); |
320 | xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; |
321 | |
322 | /* User explicitly requested packet offload mode and configured |
323 | * policy in addition to the XFRM state. So be civil to users, |
324 | * and return an error instead of taking fallback path. |
325 | * |
326 | * This WARN_ON() can be seen as a documentation for driver |
327 | * authors to do not return -EOPNOTSUPP in packet offload mode. |
328 | */ |
329 | WARN_ON(err == -EOPNOTSUPP && is_packet_offload); |
330 | if (err != -EOPNOTSUPP || is_packet_offload) { |
331 | NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state" ); |
332 | return err; |
333 | } |
334 | } |
335 | |
336 | return 0; |
337 | } |
338 | EXPORT_SYMBOL_GPL(xfrm_dev_state_add); |
339 | |
340 | int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, |
341 | struct xfrm_user_offload *xuo, u8 dir, |
342 | struct netlink_ext_ack *extack) |
343 | { |
344 | struct xfrm_dev_offload *xdo = &xp->xdo; |
345 | struct net_device *dev; |
346 | int err; |
347 | |
348 | if (!xuo->flags || xuo->flags & ~XFRM_OFFLOAD_PACKET) { |
349 | /* We support only packet offload mode and it means |
350 | * that user must set XFRM_OFFLOAD_PACKET bit. |
351 | */ |
352 | NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request" ); |
353 | return -EINVAL; |
354 | } |
355 | |
356 | dev = dev_get_by_index(net, ifindex: xuo->ifindex); |
357 | if (!dev) |
358 | return -EINVAL; |
359 | |
360 | if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_policy_add) { |
361 | xdo->dev = NULL; |
362 | dev_put(dev); |
363 | NL_SET_ERR_MSG(extack, "Policy offload is not supported" ); |
364 | return -EINVAL; |
365 | } |
366 | |
367 | xdo->dev = dev; |
368 | netdev_tracker_alloc(dev, tracker: &xdo->dev_tracker, GFP_ATOMIC); |
369 | xdo->real_dev = dev; |
370 | xdo->type = XFRM_DEV_OFFLOAD_PACKET; |
371 | switch (dir) { |
372 | case XFRM_POLICY_IN: |
373 | xdo->dir = XFRM_DEV_OFFLOAD_IN; |
374 | break; |
375 | case XFRM_POLICY_OUT: |
376 | xdo->dir = XFRM_DEV_OFFLOAD_OUT; |
377 | break; |
378 | case XFRM_POLICY_FWD: |
379 | xdo->dir = XFRM_DEV_OFFLOAD_FWD; |
380 | break; |
381 | default: |
382 | xdo->dev = NULL; |
383 | netdev_put(dev, tracker: &xdo->dev_tracker); |
384 | NL_SET_ERR_MSG(extack, "Unrecognized offload direction" ); |
385 | return -EINVAL; |
386 | } |
387 | |
388 | err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); |
389 | if (err) { |
390 | xdo->dev = NULL; |
391 | xdo->real_dev = NULL; |
392 | xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; |
393 | xdo->dir = 0; |
394 | netdev_put(dev, tracker: &xdo->dev_tracker); |
395 | NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy" ); |
396 | return err; |
397 | } |
398 | |
399 | return 0; |
400 | } |
401 | EXPORT_SYMBOL_GPL(xfrm_dev_policy_add); |
402 | |
403 | bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) |
404 | { |
405 | int mtu; |
406 | struct dst_entry *dst = skb_dst(skb); |
407 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; |
408 | struct net_device *dev = x->xso.dev; |
409 | |
410 | if (!x->type_offload || x->encap) |
411 | return false; |
412 | |
413 | if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET || |
414 | ((!dev || (dev == xfrm_dst_path(dst)->dev)) && |
415 | !xdst->child->xfrm)) { |
416 | mtu = xfrm_state_mtu(x, mtu: xdst->child_mtu_cached); |
417 | if (skb->len <= mtu) |
418 | goto ok; |
419 | |
420 | if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) |
421 | goto ok; |
422 | } |
423 | |
424 | return false; |
425 | |
426 | ok: |
427 | if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok) |
428 | return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); |
429 | |
430 | return true; |
431 | } |
432 | EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); |
433 | |
434 | void xfrm_dev_resume(struct sk_buff *skb) |
435 | { |
436 | struct net_device *dev = skb->dev; |
437 | int ret = NETDEV_TX_BUSY; |
438 | struct netdev_queue *txq; |
439 | struct softnet_data *sd; |
440 | unsigned long flags; |
441 | |
442 | rcu_read_lock(); |
443 | txq = netdev_core_pick_tx(dev, skb, NULL); |
444 | |
445 | HARD_TX_LOCK(dev, txq, smp_processor_id()); |
446 | if (!netif_xmit_frozen_or_stopped(dev_queue: txq)) |
447 | skb = dev_hard_start_xmit(skb, dev, txq, ret: &ret); |
448 | HARD_TX_UNLOCK(dev, txq); |
449 | |
450 | if (!dev_xmit_complete(rc: ret)) { |
451 | local_irq_save(flags); |
452 | sd = this_cpu_ptr(&softnet_data); |
453 | skb_queue_tail(list: &sd->xfrm_backlog, newsk: skb); |
454 | raise_softirq_irqoff(nr: NET_TX_SOFTIRQ); |
455 | local_irq_restore(flags); |
456 | } |
457 | rcu_read_unlock(); |
458 | } |
459 | EXPORT_SYMBOL_GPL(xfrm_dev_resume); |
460 | |
461 | void xfrm_dev_backlog(struct softnet_data *sd) |
462 | { |
463 | struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog; |
464 | struct sk_buff_head list; |
465 | struct sk_buff *skb; |
466 | |
467 | if (skb_queue_empty(list: xfrm_backlog)) |
468 | return; |
469 | |
470 | __skb_queue_head_init(list: &list); |
471 | |
472 | spin_lock(lock: &xfrm_backlog->lock); |
473 | skb_queue_splice_init(list: xfrm_backlog, head: &list); |
474 | spin_unlock(lock: &xfrm_backlog->lock); |
475 | |
476 | while (!skb_queue_empty(list: &list)) { |
477 | skb = __skb_dequeue(list: &list); |
478 | xfrm_dev_resume(skb); |
479 | } |
480 | |
481 | } |
482 | #endif |
483 | |
484 | static int xfrm_api_check(struct net_device *dev) |
485 | { |
486 | #ifdef CONFIG_XFRM_OFFLOAD |
487 | if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && |
488 | !(dev->features & NETIF_F_HW_ESP)) |
489 | return NOTIFY_BAD; |
490 | |
491 | if ((dev->features & NETIF_F_HW_ESP) && |
492 | (!(dev->xfrmdev_ops && |
493 | dev->xfrmdev_ops->xdo_dev_state_add && |
494 | dev->xfrmdev_ops->xdo_dev_state_delete))) |
495 | return NOTIFY_BAD; |
496 | #else |
497 | if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM)) |
498 | return NOTIFY_BAD; |
499 | #endif |
500 | |
501 | return NOTIFY_DONE; |
502 | } |
503 | |
504 | static int xfrm_dev_down(struct net_device *dev) |
505 | { |
506 | if (dev->features & NETIF_F_HW_ESP) { |
507 | xfrm_dev_state_flush(net: dev_net(dev), dev, task_valid: true); |
508 | xfrm_dev_policy_flush(net: dev_net(dev), dev, task_valid: true); |
509 | } |
510 | |
511 | return NOTIFY_DONE; |
512 | } |
513 | |
514 | static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) |
515 | { |
516 | struct net_device *dev = netdev_notifier_info_to_dev(info: ptr); |
517 | |
518 | switch (event) { |
519 | case NETDEV_REGISTER: |
520 | return xfrm_api_check(dev); |
521 | |
522 | case NETDEV_FEAT_CHANGE: |
523 | return xfrm_api_check(dev); |
524 | |
525 | case NETDEV_DOWN: |
526 | case NETDEV_UNREGISTER: |
527 | return xfrm_dev_down(dev); |
528 | } |
529 | return NOTIFY_DONE; |
530 | } |
531 | |
532 | static struct notifier_block xfrm_dev_notifier = { |
533 | .notifier_call = xfrm_dev_event, |
534 | }; |
535 | |
536 | void __init xfrm_dev_init(void) |
537 | { |
538 | register_netdevice_notifier(nb: &xfrm_dev_notifier); |
539 | } |
540 | |