1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (C) B.A.T.M.A.N. contributors: |
3 | * |
4 | * Martin Hundebøll <martin@hundeboll.net> |
5 | */ |
6 | |
7 | #include "fragmentation.h" |
8 | #include "main.h" |
9 | |
10 | #include <linux/atomic.h> |
11 | #include <linux/byteorder/generic.h> |
12 | #include <linux/errno.h> |
13 | #include <linux/etherdevice.h> |
14 | #include <linux/gfp.h> |
15 | #include <linux/if_ether.h> |
16 | #include <linux/jiffies.h> |
17 | #include <linux/lockdep.h> |
18 | #include <linux/minmax.h> |
19 | #include <linux/netdevice.h> |
20 | #include <linux/skbuff.h> |
21 | #include <linux/slab.h> |
22 | #include <linux/spinlock.h> |
23 | #include <linux/string.h> |
24 | #include <uapi/linux/batadv_packet.h> |
25 | |
26 | #include "hard-interface.h" |
27 | #include "originator.h" |
28 | #include "routing.h" |
29 | #include "send.h" |
30 | |
31 | /** |
32 | * batadv_frag_clear_chain() - delete entries in the fragment buffer chain |
33 | * @head: head of chain with entries. |
34 | * @dropped: whether the chain is cleared because all fragments are dropped |
35 | * |
36 | * Free fragments in the passed hlist. Should be called with appropriate lock. |
37 | */ |
38 | static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped) |
39 | { |
40 | struct batadv_frag_list_entry *entry; |
41 | struct hlist_node *node; |
42 | |
43 | hlist_for_each_entry_safe(entry, node, head, list) { |
44 | hlist_del(n: &entry->list); |
45 | |
46 | if (dropped) |
47 | kfree_skb(skb: entry->skb); |
48 | else |
49 | consume_skb(skb: entry->skb); |
50 | |
51 | kfree(objp: entry); |
52 | } |
53 | } |
54 | |
55 | /** |
56 | * batadv_frag_purge_orig() - free fragments associated to an orig |
57 | * @orig_node: originator to free fragments from |
58 | * @check_cb: optional function to tell if an entry should be purged |
59 | */ |
60 | void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, |
61 | bool (*check_cb)(struct batadv_frag_table_entry *)) |
62 | { |
63 | struct batadv_frag_table_entry *chain; |
64 | u8 i; |
65 | |
66 | for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { |
67 | chain = &orig_node->fragments[i]; |
68 | spin_lock_bh(lock: &chain->lock); |
69 | |
70 | if (!check_cb || check_cb(chain)) { |
71 | batadv_frag_clear_chain(head: &chain->fragment_list, dropped: true); |
72 | chain->size = 0; |
73 | } |
74 | |
75 | spin_unlock_bh(lock: &chain->lock); |
76 | } |
77 | } |
78 | |
79 | /** |
80 | * batadv_frag_size_limit() - maximum possible size of packet to be fragmented |
81 | * |
82 | * Return: the maximum size of payload that can be fragmented. |
83 | */ |
84 | static int batadv_frag_size_limit(void) |
85 | { |
86 | int limit = BATADV_FRAG_MAX_FRAG_SIZE; |
87 | |
88 | limit -= sizeof(struct batadv_frag_packet); |
89 | limit *= BATADV_FRAG_MAX_FRAGMENTS; |
90 | |
91 | return limit; |
92 | } |
93 | |
94 | /** |
95 | * batadv_frag_init_chain() - check and prepare fragment chain for new fragment |
96 | * @chain: chain in fragments table to init |
97 | * @seqno: sequence number of the received fragment |
98 | * |
99 | * Make chain ready for a fragment with sequence number "seqno". Delete existing |
100 | * entries if they have an "old" sequence number. |
101 | * |
102 | * Caller must hold chain->lock. |
103 | * |
104 | * Return: true if chain is empty and the caller can just insert the new |
105 | * fragment without searching for the right position. |
106 | */ |
107 | static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, |
108 | u16 seqno) |
109 | { |
110 | lockdep_assert_held(&chain->lock); |
111 | |
112 | if (chain->seqno == seqno) |
113 | return false; |
114 | |
115 | if (!hlist_empty(h: &chain->fragment_list)) |
116 | batadv_frag_clear_chain(head: &chain->fragment_list, dropped: true); |
117 | |
118 | chain->size = 0; |
119 | chain->seqno = seqno; |
120 | |
121 | return true; |
122 | } |
123 | |
124 | /** |
125 | * batadv_frag_insert_packet() - insert a fragment into a fragment chain |
126 | * @orig_node: originator that the fragment was received from |
127 | * @skb: skb to insert |
128 | * @chain_out: list head to attach complete chains of fragments to |
129 | * |
130 | * Insert a new fragment into the reverse ordered chain in the right table |
131 | * entry. The hash table entry is cleared if "old" fragments exist in it. |
132 | * |
133 | * Return: true if skb is buffered, false on error. If the chain has all the |
134 | * fragments needed to merge the packet, the chain is moved to the passed head |
135 | * to avoid locking the chain in the table. |
136 | */ |
137 | static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, |
138 | struct sk_buff *skb, |
139 | struct hlist_head *chain_out) |
140 | { |
141 | struct batadv_frag_table_entry *chain; |
142 | struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr; |
143 | struct batadv_frag_list_entry *frag_entry_last = NULL; |
144 | struct batadv_frag_packet *frag_packet; |
145 | u8 bucket; |
146 | u16 seqno, hdr_size = sizeof(struct batadv_frag_packet); |
147 | bool ret = false; |
148 | |
149 | /* Linearize packet to avoid linearizing 16 packets in a row when doing |
150 | * the later merge. Non-linear merge should be added to remove this |
151 | * linearization. |
152 | */ |
153 | if (skb_linearize(skb) < 0) |
154 | goto err; |
155 | |
156 | frag_packet = (struct batadv_frag_packet *)skb->data; |
157 | seqno = ntohs(frag_packet->seqno); |
158 | bucket = seqno % BATADV_FRAG_BUFFER_COUNT; |
159 | |
160 | frag_entry_new = kmalloc(size: sizeof(*frag_entry_new), GFP_ATOMIC); |
161 | if (!frag_entry_new) |
162 | goto err; |
163 | |
164 | frag_entry_new->skb = skb; |
165 | frag_entry_new->no = frag_packet->no; |
166 | |
167 | /* Select entry in the "chain table" and delete any prior fragments |
168 | * with another sequence number. batadv_frag_init_chain() returns true, |
169 | * if the list is empty at return. |
170 | */ |
171 | chain = &orig_node->fragments[bucket]; |
172 | spin_lock_bh(lock: &chain->lock); |
173 | if (batadv_frag_init_chain(chain, seqno)) { |
174 | hlist_add_head(n: &frag_entry_new->list, h: &chain->fragment_list); |
175 | chain->size = skb->len - hdr_size; |
176 | chain->timestamp = jiffies; |
177 | chain->total_size = ntohs(frag_packet->total_size); |
178 | ret = true; |
179 | goto out; |
180 | } |
181 | |
182 | /* Find the position for the new fragment. */ |
183 | hlist_for_each_entry(frag_entry_curr, &chain->fragment_list, list) { |
184 | /* Drop packet if fragment already exists. */ |
185 | if (frag_entry_curr->no == frag_entry_new->no) |
186 | goto err_unlock; |
187 | |
188 | /* Order fragments from highest to lowest. */ |
189 | if (frag_entry_curr->no < frag_entry_new->no) { |
190 | hlist_add_before(n: &frag_entry_new->list, |
191 | next: &frag_entry_curr->list); |
192 | chain->size += skb->len - hdr_size; |
193 | chain->timestamp = jiffies; |
194 | ret = true; |
195 | goto out; |
196 | } |
197 | |
198 | /* store current entry because it could be the last in list */ |
199 | frag_entry_last = frag_entry_curr; |
200 | } |
201 | |
202 | /* Reached the end of the list, so insert after 'frag_entry_last'. */ |
203 | if (likely(frag_entry_last)) { |
204 | hlist_add_behind(n: &frag_entry_new->list, prev: &frag_entry_last->list); |
205 | chain->size += skb->len - hdr_size; |
206 | chain->timestamp = jiffies; |
207 | ret = true; |
208 | } |
209 | |
210 | out: |
211 | if (chain->size > batadv_frag_size_limit() || |
212 | chain->total_size != ntohs(frag_packet->total_size) || |
213 | chain->total_size > batadv_frag_size_limit()) { |
214 | /* Clear chain if total size of either the list or the packet |
215 | * exceeds the maximum size of one merged packet. Don't allow |
216 | * packets to have different total_size. |
217 | */ |
218 | batadv_frag_clear_chain(head: &chain->fragment_list, dropped: true); |
219 | chain->size = 0; |
220 | } else if (ntohs(frag_packet->total_size) == chain->size) { |
221 | /* All fragments received. Hand over chain to caller. */ |
222 | hlist_move_list(old: &chain->fragment_list, new: chain_out); |
223 | chain->size = 0; |
224 | } |
225 | |
226 | err_unlock: |
227 | spin_unlock_bh(lock: &chain->lock); |
228 | |
229 | err: |
230 | if (!ret) { |
231 | kfree(objp: frag_entry_new); |
232 | kfree_skb(skb); |
233 | } |
234 | |
235 | return ret; |
236 | } |
237 | |
238 | /** |
239 | * batadv_frag_merge_packets() - merge a chain of fragments |
240 | * @chain: head of chain with fragments |
241 | * |
242 | * Expand the first skb in the chain and copy the content of the remaining |
243 | * skb's into the expanded one. After doing so, clear the chain. |
244 | * |
245 | * Return: the merged skb or NULL on error. |
246 | */ |
247 | static struct sk_buff * |
248 | batadv_frag_merge_packets(struct hlist_head *chain) |
249 | { |
250 | struct batadv_frag_packet *packet; |
251 | struct batadv_frag_list_entry *entry; |
252 | struct sk_buff *skb_out; |
253 | int size, hdr_size = sizeof(struct batadv_frag_packet); |
254 | bool dropped = false; |
255 | |
256 | /* Remove first entry, as this is the destination for the rest of the |
257 | * fragments. |
258 | */ |
259 | entry = hlist_entry(chain->first, struct batadv_frag_list_entry, list); |
260 | hlist_del(n: &entry->list); |
261 | skb_out = entry->skb; |
262 | kfree(objp: entry); |
263 | |
264 | packet = (struct batadv_frag_packet *)skb_out->data; |
265 | size = ntohs(packet->total_size) + hdr_size; |
266 | |
267 | /* Make room for the rest of the fragments. */ |
268 | if (pskb_expand_head(skb: skb_out, nhead: 0, ntail: size - skb_out->len, GFP_ATOMIC) < 0) { |
269 | kfree_skb(skb: skb_out); |
270 | skb_out = NULL; |
271 | dropped = true; |
272 | goto free; |
273 | } |
274 | |
275 | /* Move the existing MAC header to just before the payload. (Override |
276 | * the fragment header.) |
277 | */ |
278 | skb_pull(skb: skb_out, len: hdr_size); |
279 | skb_out->ip_summed = CHECKSUM_NONE; |
280 | memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN); |
281 | skb_set_mac_header(skb: skb_out, offset: -ETH_HLEN); |
282 | skb_reset_network_header(skb: skb_out); |
283 | skb_reset_transport_header(skb: skb_out); |
284 | |
285 | /* Copy the payload of the each fragment into the last skb */ |
286 | hlist_for_each_entry(entry, chain, list) { |
287 | size = entry->skb->len - hdr_size; |
288 | skb_put_data(skb: skb_out, data: entry->skb->data + hdr_size, len: size); |
289 | } |
290 | |
291 | free: |
292 | /* Locking is not needed, because 'chain' is not part of any orig. */ |
293 | batadv_frag_clear_chain(head: chain, dropped); |
294 | return skb_out; |
295 | } |
296 | |
297 | /** |
298 | * batadv_frag_skb_buffer() - buffer fragment for later merge |
299 | * @skb: skb to buffer |
300 | * @orig_node_src: originator that the skb is received from |
301 | * |
302 | * Add fragment to buffer and merge fragments if possible. |
303 | * |
304 | * There are three possible outcomes: 1) Packet is merged: Return true and |
305 | * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb |
306 | * to NULL; 3) Error: Return false and free skb. |
307 | * |
308 | * Return: true when the packet is merged or buffered, false when skb is not |
309 | * used. |
310 | */ |
311 | bool batadv_frag_skb_buffer(struct sk_buff **skb, |
312 | struct batadv_orig_node *orig_node_src) |
313 | { |
314 | struct sk_buff *skb_out = NULL; |
315 | struct hlist_head head = HLIST_HEAD_INIT; |
316 | bool ret = false; |
317 | |
318 | /* Add packet to buffer and table entry if merge is possible. */ |
319 | if (!batadv_frag_insert_packet(orig_node: orig_node_src, skb: *skb, chain_out: &head)) |
320 | goto out_err; |
321 | |
322 | /* Leave if more fragments are needed to merge. */ |
323 | if (hlist_empty(h: &head)) |
324 | goto out; |
325 | |
326 | skb_out = batadv_frag_merge_packets(chain: &head); |
327 | if (!skb_out) |
328 | goto out_err; |
329 | |
330 | out: |
331 | ret = true; |
332 | out_err: |
333 | *skb = skb_out; |
334 | return ret; |
335 | } |
336 | |
337 | /** |
338 | * batadv_frag_skb_fwd() - forward fragments that would exceed MTU when merged |
339 | * @skb: skb to forward |
340 | * @recv_if: interface that the skb is received on |
341 | * @orig_node_src: originator that the skb is received from |
342 | * |
343 | * Look up the next-hop of the fragments payload and check if the merged packet |
344 | * will exceed the MTU towards the next-hop. If so, the fragment is forwarded |
345 | * without merging it. |
346 | * |
347 | * Return: true if the fragment is consumed/forwarded, false otherwise. |
348 | */ |
349 | bool batadv_frag_skb_fwd(struct sk_buff *skb, |
350 | struct batadv_hard_iface *recv_if, |
351 | struct batadv_orig_node *orig_node_src) |
352 | { |
353 | struct batadv_priv *bat_priv = netdev_priv(dev: recv_if->soft_iface); |
354 | struct batadv_orig_node *orig_node_dst; |
355 | struct batadv_neigh_node *neigh_node = NULL; |
356 | struct batadv_frag_packet *packet; |
357 | u16 total_size; |
358 | bool ret = false; |
359 | |
360 | packet = (struct batadv_frag_packet *)skb->data; |
361 | orig_node_dst = batadv_orig_hash_find(bat_priv, data: packet->dest); |
362 | if (!orig_node_dst) |
363 | goto out; |
364 | |
365 | neigh_node = batadv_find_router(bat_priv, orig_node: orig_node_dst, recv_if); |
366 | if (!neigh_node) |
367 | goto out; |
368 | |
369 | /* Forward the fragment, if the merged packet would be too big to |
370 | * be assembled. |
371 | */ |
372 | total_size = ntohs(packet->total_size); |
373 | if (total_size > neigh_node->if_incoming->net_dev->mtu) { |
374 | batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD); |
375 | batadv_add_counter(bat_priv, idx: BATADV_CNT_FRAG_FWD_BYTES, |
376 | count: skb->len + ETH_HLEN); |
377 | |
378 | packet->ttl--; |
379 | batadv_send_unicast_skb(skb, neigh_node); |
380 | ret = true; |
381 | } |
382 | |
383 | out: |
384 | batadv_orig_node_put(orig_node: orig_node_dst); |
385 | batadv_neigh_node_put(neigh_node); |
386 | return ret; |
387 | } |
388 | |
389 | /** |
390 | * batadv_frag_create() - create a fragment from skb |
391 | * @net_dev: outgoing device for fragment |
392 | * @skb: skb to create fragment from |
393 | * @frag_head: header to use in new fragment |
394 | * @fragment_size: size of new fragment |
395 | * |
396 | * Split the passed skb into two fragments: A new one with size matching the |
397 | * passed mtu and the old one with the rest. The new skb contains data from the |
398 | * tail of the old skb. |
399 | * |
400 | * Return: the new fragment, NULL on error. |
401 | */ |
402 | static struct sk_buff *batadv_frag_create(struct net_device *net_dev, |
403 | struct sk_buff *skb, |
404 | struct batadv_frag_packet *frag_head, |
405 | unsigned int fragment_size) |
406 | { |
407 | unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev); |
408 | unsigned int tailroom = net_dev->needed_tailroom; |
409 | struct sk_buff *skb_fragment; |
410 | unsigned int = sizeof(*frag_head); |
411 | unsigned int mtu = fragment_size + header_size; |
412 | |
413 | skb_fragment = dev_alloc_skb(length: ll_reserved + mtu + tailroom); |
414 | if (!skb_fragment) |
415 | goto err; |
416 | |
417 | skb_fragment->priority = skb->priority; |
418 | |
419 | /* Eat the last mtu-bytes of the skb */ |
420 | skb_reserve(skb: skb_fragment, len: ll_reserved + header_size); |
421 | skb_split(skb, skb1: skb_fragment, len: skb->len - fragment_size); |
422 | |
423 | /* Add the header */ |
424 | skb_push(skb: skb_fragment, len: header_size); |
425 | memcpy(skb_fragment->data, frag_head, header_size); |
426 | |
427 | err: |
428 | return skb_fragment; |
429 | } |
430 | |
431 | /** |
432 | * batadv_frag_send_packet() - create up to 16 fragments from the passed skb |
433 | * @skb: skb to create fragments from |
434 | * @orig_node: final destination of the created fragments |
435 | * @neigh_node: next-hop of the created fragments |
436 | * |
437 | * Return: the netdev tx status or a negative errno code on a failure |
438 | */ |
439 | int batadv_frag_send_packet(struct sk_buff *skb, |
440 | struct batadv_orig_node *orig_node, |
441 | struct batadv_neigh_node *neigh_node) |
442 | { |
443 | struct net_device *net_dev = neigh_node->if_incoming->net_dev; |
444 | struct batadv_priv *bat_priv; |
445 | struct batadv_hard_iface *primary_if = NULL; |
446 | struct batadv_frag_packet ; |
447 | struct sk_buff *skb_fragment; |
448 | unsigned int mtu = net_dev->mtu; |
449 | unsigned int = sizeof(frag_header); |
450 | unsigned int max_fragment_size, num_fragments; |
451 | int ret; |
452 | |
453 | /* To avoid merge and refragmentation at next-hops we never send |
454 | * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE |
455 | */ |
456 | mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE); |
457 | max_fragment_size = mtu - header_size; |
458 | |
459 | if (skb->len == 0 || max_fragment_size == 0) |
460 | return -EINVAL; |
461 | |
462 | num_fragments = (skb->len - 1) / max_fragment_size + 1; |
463 | max_fragment_size = (skb->len - 1) / num_fragments + 1; |
464 | |
465 | /* Don't even try to fragment, if we need more than 16 fragments */ |
466 | if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) { |
467 | ret = -EAGAIN; |
468 | goto free_skb; |
469 | } |
470 | |
471 | bat_priv = orig_node->bat_priv; |
472 | primary_if = batadv_primary_if_get_selected(bat_priv); |
473 | if (!primary_if) { |
474 | ret = -EINVAL; |
475 | goto free_skb; |
476 | } |
477 | |
478 | /* GRO might have added fragments to the fragment list instead of |
479 | * frags[]. But this is not handled by skb_split and must be |
480 | * linearized to avoid incorrect length information after all |
481 | * batman-adv fragments were created and submitted to the |
482 | * hard-interface |
483 | */ |
484 | if (skb_has_frag_list(skb) && __skb_linearize(skb)) { |
485 | ret = -ENOMEM; |
486 | goto free_skb; |
487 | } |
488 | |
489 | /* Create one header to be copied to all fragments */ |
490 | frag_header.packet_type = BATADV_UNICAST_FRAG; |
491 | frag_header.version = BATADV_COMPAT_VERSION; |
492 | frag_header.ttl = BATADV_TTL; |
493 | frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno)); |
494 | frag_header.reserved = 0; |
495 | frag_header.no = 0; |
496 | frag_header.total_size = htons(skb->len); |
497 | |
498 | /* skb->priority values from 256->263 are magic values to |
499 | * directly indicate a specific 802.1d priority. This is used |
500 | * to allow 802.1d priority to be passed directly in from VLAN |
501 | * tags, etc. |
502 | */ |
503 | if (skb->priority >= 256 && skb->priority <= 263) |
504 | frag_header.priority = skb->priority - 256; |
505 | else |
506 | frag_header.priority = 0; |
507 | |
508 | ether_addr_copy(dst: frag_header.orig, src: primary_if->net_dev->dev_addr); |
509 | ether_addr_copy(dst: frag_header.dest, src: orig_node->orig); |
510 | |
511 | /* Eat and send fragments from the tail of skb */ |
512 | while (skb->len > max_fragment_size) { |
513 | /* The initial check in this function should cover this case */ |
514 | if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) { |
515 | ret = -EINVAL; |
516 | goto put_primary_if; |
517 | } |
518 | |
519 | skb_fragment = batadv_frag_create(net_dev, skb, frag_head: &frag_header, |
520 | fragment_size: max_fragment_size); |
521 | if (!skb_fragment) { |
522 | ret = -ENOMEM; |
523 | goto put_primary_if; |
524 | } |
525 | |
526 | batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); |
527 | batadv_add_counter(bat_priv, idx: BATADV_CNT_FRAG_TX_BYTES, |
528 | count: skb_fragment->len + ETH_HLEN); |
529 | ret = batadv_send_unicast_skb(skb: skb_fragment, neigh_node); |
530 | if (ret != NET_XMIT_SUCCESS) { |
531 | ret = NET_XMIT_DROP; |
532 | goto put_primary_if; |
533 | } |
534 | |
535 | frag_header.no++; |
536 | } |
537 | |
538 | /* make sure that there is at least enough head for the fragmentation |
539 | * and ethernet headers |
540 | */ |
541 | ret = skb_cow_head(skb, ETH_HLEN + header_size); |
542 | if (ret < 0) |
543 | goto put_primary_if; |
544 | |
545 | skb_push(skb, len: header_size); |
546 | memcpy(skb->data, &frag_header, header_size); |
547 | |
548 | /* Send the last fragment */ |
549 | batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); |
550 | batadv_add_counter(bat_priv, idx: BATADV_CNT_FRAG_TX_BYTES, |
551 | count: skb->len + ETH_HLEN); |
552 | ret = batadv_send_unicast_skb(skb, neigh_node); |
553 | /* skb was consumed */ |
554 | skb = NULL; |
555 | |
556 | put_primary_if: |
557 | batadv_hardif_put(hard_iface: primary_if); |
558 | free_skb: |
559 | kfree_skb(skb); |
560 | |
561 | return ret; |
562 | } |
563 | |