1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | // Copyright (c) 2020 Facebook Inc. |
3 | |
4 | #include <linux/ethtool_netlink.h> |
5 | #include <linux/netdevice.h> |
6 | #include <linux/slab.h> |
7 | #include <linux/types.h> |
8 | #include <linux/workqueue.h> |
9 | #include <net/udp_tunnel.h> |
10 | #include <net/vxlan.h> |
11 | |
12 | enum udp_tunnel_nic_table_entry_flags { |
13 | UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0), |
14 | UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1), |
15 | UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2), |
16 | UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3), |
17 | }; |
18 | |
19 | struct udp_tunnel_nic_table_entry { |
20 | __be16 port; |
21 | u8 type; |
22 | u8 flags; |
23 | u16 use_cnt; |
24 | #define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX |
25 | u8 hw_priv; |
26 | }; |
27 | |
28 | /** |
29 | * struct udp_tunnel_nic - UDP tunnel port offload state |
30 | * @work: async work for talking to hardware from process context |
31 | * @dev: netdev pointer |
32 | * @need_sync: at least one port start changed |
33 | * @need_replay: space was freed, we need a replay of all ports |
34 | * @work_pending: @work is currently scheduled |
35 | * @n_tables: number of tables under @entries |
36 | * @missed: bitmap of tables which overflown |
37 | * @entries: table of tables of ports currently offloaded |
38 | */ |
39 | struct udp_tunnel_nic { |
40 | struct work_struct work; |
41 | |
42 | struct net_device *dev; |
43 | |
44 | u8 need_sync:1; |
45 | u8 need_replay:1; |
46 | u8 work_pending:1; |
47 | |
48 | unsigned int n_tables; |
49 | unsigned long missed; |
50 | struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables); |
51 | }; |
52 | |
53 | /* We ensure all work structs are done using driver state, but not the code. |
54 | * We need a workqueue we can flush before module gets removed. |
55 | */ |
56 | static struct workqueue_struct *udp_tunnel_nic_workqueue; |
57 | |
58 | static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type) |
59 | { |
60 | switch (type) { |
61 | case UDP_TUNNEL_TYPE_VXLAN: |
62 | return "vxlan" ; |
63 | case UDP_TUNNEL_TYPE_GENEVE: |
64 | return "geneve" ; |
65 | case UDP_TUNNEL_TYPE_VXLAN_GPE: |
66 | return "vxlan-gpe" ; |
67 | default: |
68 | return "unknown" ; |
69 | } |
70 | } |
71 | |
72 | static bool |
73 | udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry) |
74 | { |
75 | return entry->use_cnt == 0 && !entry->flags; |
76 | } |
77 | |
78 | static bool |
79 | udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry) |
80 | { |
81 | return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN); |
82 | } |
83 | |
84 | static bool |
85 | udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry) |
86 | { |
87 | return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN; |
88 | } |
89 | |
90 | static void |
91 | udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry) |
92 | { |
93 | if (!udp_tunnel_nic_entry_is_free(entry)) |
94 | entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN; |
95 | } |
96 | |
97 | static void |
98 | udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry) |
99 | { |
100 | entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN; |
101 | } |
102 | |
103 | static bool |
104 | udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry) |
105 | { |
106 | return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD | |
107 | UDP_TUNNEL_NIC_ENTRY_DEL); |
108 | } |
109 | |
110 | static void |
111 | udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn, |
112 | struct udp_tunnel_nic_table_entry *entry, |
113 | unsigned int flag) |
114 | { |
115 | entry->flags |= flag; |
116 | utn->need_sync = 1; |
117 | } |
118 | |
119 | static void |
120 | udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry, |
121 | struct udp_tunnel_info *ti) |
122 | { |
123 | memset(ti, 0, sizeof(*ti)); |
124 | ti->port = entry->port; |
125 | ti->type = entry->type; |
126 | ti->hw_priv = entry->hw_priv; |
127 | } |
128 | |
129 | static bool |
130 | udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn) |
131 | { |
132 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
133 | unsigned int i, j; |
134 | |
135 | for (i = 0; i < utn->n_tables; i++) |
136 | for (j = 0; j < info->tables[i].n_entries; j++) |
137 | if (!udp_tunnel_nic_entry_is_free(entry: &utn->entries[i][j])) |
138 | return false; |
139 | return true; |
140 | } |
141 | |
142 | static bool |
143 | udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn) |
144 | { |
145 | const struct udp_tunnel_nic_table_info *table; |
146 | unsigned int i, j; |
147 | |
148 | if (!utn->missed) |
149 | return false; |
150 | |
151 | for (i = 0; i < utn->n_tables; i++) { |
152 | table = &dev->udp_tunnel_nic_info->tables[i]; |
153 | if (!test_bit(i, &utn->missed)) |
154 | continue; |
155 | |
156 | for (j = 0; j < table->n_entries; j++) |
157 | if (udp_tunnel_nic_entry_is_free(entry: &utn->entries[i][j])) |
158 | return true; |
159 | } |
160 | |
161 | return false; |
162 | } |
163 | |
164 | static void |
165 | __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, |
166 | unsigned int idx, struct udp_tunnel_info *ti) |
167 | { |
168 | struct udp_tunnel_nic_table_entry *entry; |
169 | struct udp_tunnel_nic *utn; |
170 | |
171 | utn = dev->udp_tunnel_nic; |
172 | entry = &utn->entries[table][idx]; |
173 | |
174 | if (entry->use_cnt) |
175 | udp_tunnel_nic_ti_from_entry(entry, ti); |
176 | } |
177 | |
178 | static void |
179 | __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, |
180 | unsigned int idx, u8 priv) |
181 | { |
182 | dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv; |
183 | } |
184 | |
185 | static void |
186 | udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry, |
187 | int err) |
188 | { |
189 | bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; |
190 | |
191 | WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && |
192 | entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL); |
193 | |
194 | if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && |
195 | (!err || (err == -EEXIST && dodgy))) |
196 | entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD; |
197 | |
198 | if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL && |
199 | (!err || (err == -ENOENT && dodgy))) |
200 | entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL; |
201 | |
202 | if (!err) |
203 | entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL; |
204 | else |
205 | entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL; |
206 | } |
207 | |
208 | static void |
209 | udp_tunnel_nic_device_sync_one(struct net_device *dev, |
210 | struct udp_tunnel_nic *utn, |
211 | unsigned int table, unsigned int idx) |
212 | { |
213 | struct udp_tunnel_nic_table_entry *entry; |
214 | struct udp_tunnel_info ti; |
215 | int err; |
216 | |
217 | entry = &utn->entries[table][idx]; |
218 | if (!udp_tunnel_nic_entry_is_queued(entry)) |
219 | return; |
220 | |
221 | udp_tunnel_nic_ti_from_entry(entry, ti: &ti); |
222 | if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD) |
223 | err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti); |
224 | else |
225 | err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx, |
226 | &ti); |
227 | udp_tunnel_nic_entry_update_done(entry, err); |
228 | |
229 | if (err) |
230 | netdev_warn(dev, |
231 | format: "UDP tunnel port sync failed port %d type %s: %d\n" , |
232 | be16_to_cpu(entry->port), |
233 | udp_tunnel_nic_tunnel_type_name(type: entry->type), |
234 | err); |
235 | } |
236 | |
237 | static void |
238 | udp_tunnel_nic_device_sync_by_port(struct net_device *dev, |
239 | struct udp_tunnel_nic *utn) |
240 | { |
241 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
242 | unsigned int i, j; |
243 | |
244 | for (i = 0; i < utn->n_tables; i++) |
245 | for (j = 0; j < info->tables[i].n_entries; j++) |
246 | udp_tunnel_nic_device_sync_one(dev, utn, table: i, idx: j); |
247 | } |
248 | |
249 | static void |
250 | udp_tunnel_nic_device_sync_by_table(struct net_device *dev, |
251 | struct udp_tunnel_nic *utn) |
252 | { |
253 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
254 | unsigned int i, j; |
255 | int err; |
256 | |
257 | for (i = 0; i < utn->n_tables; i++) { |
258 | /* Find something that needs sync in this table */ |
259 | for (j = 0; j < info->tables[i].n_entries; j++) |
260 | if (udp_tunnel_nic_entry_is_queued(entry: &utn->entries[i][j])) |
261 | break; |
262 | if (j == info->tables[i].n_entries) |
263 | continue; |
264 | |
265 | err = info->sync_table(dev, i); |
266 | if (err) |
267 | netdev_warn(dev, format: "UDP tunnel port sync failed for table %d: %d\n" , |
268 | i, err); |
269 | |
270 | for (j = 0; j < info->tables[i].n_entries; j++) { |
271 | struct udp_tunnel_nic_table_entry *entry; |
272 | |
273 | entry = &utn->entries[i][j]; |
274 | if (udp_tunnel_nic_entry_is_queued(entry)) |
275 | udp_tunnel_nic_entry_update_done(entry, err); |
276 | } |
277 | } |
278 | } |
279 | |
280 | static void |
281 | __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) |
282 | { |
283 | if (!utn->need_sync) |
284 | return; |
285 | |
286 | if (dev->udp_tunnel_nic_info->sync_table) |
287 | udp_tunnel_nic_device_sync_by_table(dev, utn); |
288 | else |
289 | udp_tunnel_nic_device_sync_by_port(dev, utn); |
290 | |
291 | utn->need_sync = 0; |
292 | /* Can't replay directly here, in case we come from the tunnel driver's |
293 | * notification - trying to replay may deadlock inside tunnel driver. |
294 | */ |
295 | utn->need_replay = udp_tunnel_nic_should_replay(dev, utn); |
296 | } |
297 | |
298 | static void |
299 | udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) |
300 | { |
301 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
302 | bool may_sleep; |
303 | |
304 | if (!utn->need_sync) |
305 | return; |
306 | |
307 | /* Drivers which sleep in the callback need to update from |
308 | * the workqueue, if we come from the tunnel driver's notification. |
309 | */ |
310 | may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; |
311 | if (!may_sleep) |
312 | __udp_tunnel_nic_device_sync(dev, utn); |
313 | if (may_sleep || utn->need_replay) { |
314 | queue_work(wq: udp_tunnel_nic_workqueue, work: &utn->work); |
315 | utn->work_pending = 1; |
316 | } |
317 | } |
318 | |
319 | static bool |
320 | udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table, |
321 | struct udp_tunnel_info *ti) |
322 | { |
323 | return table->tunnel_types & ti->type; |
324 | } |
325 | |
326 | static bool |
327 | udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn, |
328 | struct udp_tunnel_info *ti) |
329 | { |
330 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
331 | unsigned int i; |
332 | |
333 | /* Special case IPv4-only NICs */ |
334 | if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY && |
335 | ti->sa_family != AF_INET) |
336 | return false; |
337 | |
338 | for (i = 0; i < utn->n_tables; i++) |
339 | if (udp_tunnel_nic_table_is_capable(table: &info->tables[i], ti)) |
340 | return true; |
341 | return false; |
342 | } |
343 | |
344 | static int |
345 | udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn, |
346 | struct udp_tunnel_info *ti) |
347 | { |
348 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
349 | struct udp_tunnel_nic_table_entry *entry; |
350 | unsigned int i, j; |
351 | |
352 | for (i = 0; i < utn->n_tables; i++) |
353 | for (j = 0; j < info->tables[i].n_entries; j++) { |
354 | entry = &utn->entries[i][j]; |
355 | |
356 | if (!udp_tunnel_nic_entry_is_free(entry) && |
357 | entry->port == ti->port && |
358 | entry->type != ti->type) { |
359 | __set_bit(i, &utn->missed); |
360 | return true; |
361 | } |
362 | } |
363 | return false; |
364 | } |
365 | |
366 | static void |
367 | udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, |
368 | unsigned int table, unsigned int idx, int use_cnt_adj) |
369 | { |
370 | struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; |
371 | bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; |
372 | unsigned int from, to; |
373 | |
374 | WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX); |
375 | |
376 | /* If not going from used to unused or vice versa - all done. |
377 | * For dodgy entries make sure we try to sync again (queue the entry). |
378 | */ |
379 | entry->use_cnt += use_cnt_adj; |
380 | if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj)) |
381 | return; |
382 | |
383 | /* Cancel the op before it was sent to the device, if possible, |
384 | * otherwise we'd need to take special care to issue commands |
385 | * in the same order the ports arrived. |
386 | */ |
387 | if (use_cnt_adj < 0) { |
388 | from = UDP_TUNNEL_NIC_ENTRY_ADD; |
389 | to = UDP_TUNNEL_NIC_ENTRY_DEL; |
390 | } else { |
391 | from = UDP_TUNNEL_NIC_ENTRY_DEL; |
392 | to = UDP_TUNNEL_NIC_ENTRY_ADD; |
393 | } |
394 | |
395 | if (entry->flags & from) { |
396 | entry->flags &= ~from; |
397 | if (!dodgy) |
398 | return; |
399 | } |
400 | |
401 | udp_tunnel_nic_entry_queue(utn, entry, flag: to); |
402 | } |
403 | |
404 | static bool |
405 | udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn, |
406 | unsigned int table, unsigned int idx, |
407 | struct udp_tunnel_info *ti, int use_cnt_adj) |
408 | { |
409 | struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; |
410 | |
411 | if (udp_tunnel_nic_entry_is_free(entry) || |
412 | entry->port != ti->port || |
413 | entry->type != ti->type) |
414 | return false; |
415 | |
416 | if (udp_tunnel_nic_entry_is_frozen(entry)) |
417 | return true; |
418 | |
419 | udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj); |
420 | return true; |
421 | } |
422 | |
423 | /* Try to find existing matching entry and adjust its use count, instead of |
424 | * adding a new one. Returns true if entry was found. In case of delete the |
425 | * entry may have gotten removed in the process, in which case it will be |
426 | * queued for removal. |
427 | */ |
428 | static bool |
429 | udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn, |
430 | struct udp_tunnel_info *ti, int use_cnt_adj) |
431 | { |
432 | const struct udp_tunnel_nic_table_info *table; |
433 | unsigned int i, j; |
434 | |
435 | for (i = 0; i < utn->n_tables; i++) { |
436 | table = &dev->udp_tunnel_nic_info->tables[i]; |
437 | if (!udp_tunnel_nic_table_is_capable(table, ti)) |
438 | continue; |
439 | |
440 | for (j = 0; j < table->n_entries; j++) |
441 | if (udp_tunnel_nic_entry_try_adj(utn, table: i, idx: j, ti, |
442 | use_cnt_adj)) |
443 | return true; |
444 | } |
445 | |
446 | return false; |
447 | } |
448 | |
449 | static bool |
450 | udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn, |
451 | struct udp_tunnel_info *ti) |
452 | { |
453 | return udp_tunnel_nic_try_existing(dev, utn, ti, use_cnt_adj: +1); |
454 | } |
455 | |
456 | static bool |
457 | udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn, |
458 | struct udp_tunnel_info *ti) |
459 | { |
460 | return udp_tunnel_nic_try_existing(dev, utn, ti, use_cnt_adj: -1); |
461 | } |
462 | |
463 | static bool |
464 | udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn, |
465 | struct udp_tunnel_info *ti) |
466 | { |
467 | const struct udp_tunnel_nic_table_info *table; |
468 | unsigned int i, j; |
469 | |
470 | for (i = 0; i < utn->n_tables; i++) { |
471 | table = &dev->udp_tunnel_nic_info->tables[i]; |
472 | if (!udp_tunnel_nic_table_is_capable(table, ti)) |
473 | continue; |
474 | |
475 | for (j = 0; j < table->n_entries; j++) { |
476 | struct udp_tunnel_nic_table_entry *entry; |
477 | |
478 | entry = &utn->entries[i][j]; |
479 | if (!udp_tunnel_nic_entry_is_free(entry)) |
480 | continue; |
481 | |
482 | entry->port = ti->port; |
483 | entry->type = ti->type; |
484 | entry->use_cnt = 1; |
485 | udp_tunnel_nic_entry_queue(utn, entry, |
486 | flag: UDP_TUNNEL_NIC_ENTRY_ADD); |
487 | return true; |
488 | } |
489 | |
490 | /* The different table may still fit this port in, but there |
491 | * are no devices currently which have multiple tables accepting |
492 | * the same tunnel type, and false positives are okay. |
493 | */ |
494 | __set_bit(i, &utn->missed); |
495 | } |
496 | |
497 | return false; |
498 | } |
499 | |
500 | static void |
501 | __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) |
502 | { |
503 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
504 | struct udp_tunnel_nic *utn; |
505 | |
506 | utn = dev->udp_tunnel_nic; |
507 | if (!utn) |
508 | return; |
509 | if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY) |
510 | return; |
511 | if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN && |
512 | ti->port == htons(IANA_VXLAN_UDP_PORT)) { |
513 | if (ti->type != UDP_TUNNEL_TYPE_VXLAN) |
514 | netdev_warn(dev, format: "device assumes port 4789 will be used by vxlan tunnels\n" ); |
515 | return; |
516 | } |
517 | |
518 | if (!udp_tunnel_nic_is_capable(dev, utn, ti)) |
519 | return; |
520 | |
521 | /* It may happen that a tunnel of one type is removed and different |
522 | * tunnel type tries to reuse its port before the device was informed. |
523 | * Rely on utn->missed to re-add this port later. |
524 | */ |
525 | if (udp_tunnel_nic_has_collision(dev, utn, ti)) |
526 | return; |
527 | |
528 | if (!udp_tunnel_nic_add_existing(dev, utn, ti)) |
529 | udp_tunnel_nic_add_new(dev, utn, ti); |
530 | |
531 | udp_tunnel_nic_device_sync(dev, utn); |
532 | } |
533 | |
534 | static void |
535 | __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) |
536 | { |
537 | struct udp_tunnel_nic *utn; |
538 | |
539 | utn = dev->udp_tunnel_nic; |
540 | if (!utn) |
541 | return; |
542 | |
543 | if (!udp_tunnel_nic_is_capable(dev, utn, ti)) |
544 | return; |
545 | |
546 | udp_tunnel_nic_del_existing(dev, utn, ti); |
547 | |
548 | udp_tunnel_nic_device_sync(dev, utn); |
549 | } |
550 | |
551 | static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) |
552 | { |
553 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
554 | struct udp_tunnel_nic *utn; |
555 | unsigned int i, j; |
556 | |
557 | ASSERT_RTNL(); |
558 | |
559 | utn = dev->udp_tunnel_nic; |
560 | if (!utn) |
561 | return; |
562 | |
563 | utn->need_sync = false; |
564 | for (i = 0; i < utn->n_tables; i++) |
565 | for (j = 0; j < info->tables[i].n_entries; j++) { |
566 | struct udp_tunnel_nic_table_entry *entry; |
567 | |
568 | entry = &utn->entries[i][j]; |
569 | |
570 | entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | |
571 | UDP_TUNNEL_NIC_ENTRY_OP_FAIL); |
572 | /* We don't release rtnl across ops */ |
573 | WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); |
574 | if (!entry->use_cnt) |
575 | continue; |
576 | |
577 | udp_tunnel_nic_entry_queue(utn, entry, |
578 | flag: UDP_TUNNEL_NIC_ENTRY_ADD); |
579 | } |
580 | |
581 | __udp_tunnel_nic_device_sync(dev, utn); |
582 | } |
583 | |
584 | static size_t |
585 | __udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) |
586 | { |
587 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
588 | struct udp_tunnel_nic *utn; |
589 | unsigned int j; |
590 | size_t size; |
591 | |
592 | utn = dev->udp_tunnel_nic; |
593 | if (!utn) |
594 | return 0; |
595 | |
596 | size = 0; |
597 | for (j = 0; j < info->tables[table].n_entries; j++) { |
598 | if (!udp_tunnel_nic_entry_is_present(entry: &utn->entries[table][j])) |
599 | continue; |
600 | |
601 | size += nla_total_size(payload: 0) + /* _TABLE_ENTRY */ |
602 | nla_total_size(payload: sizeof(__be16)) + /* _ENTRY_PORT */ |
603 | nla_total_size(payload: sizeof(u32)); /* _ENTRY_TYPE */ |
604 | } |
605 | |
606 | return size; |
607 | } |
608 | |
609 | static int |
610 | __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, |
611 | struct sk_buff *skb) |
612 | { |
613 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
614 | struct udp_tunnel_nic *utn; |
615 | struct nlattr *nest; |
616 | unsigned int j; |
617 | |
618 | utn = dev->udp_tunnel_nic; |
619 | if (!utn) |
620 | return 0; |
621 | |
622 | for (j = 0; j < info->tables[table].n_entries; j++) { |
623 | if (!udp_tunnel_nic_entry_is_present(entry: &utn->entries[table][j])) |
624 | continue; |
625 | |
626 | nest = nla_nest_start(skb, attrtype: ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); |
627 | if (!nest) |
628 | return -EMSGSIZE; |
629 | |
630 | if (nla_put_be16(skb, attrtype: ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, |
631 | value: utn->entries[table][j].port) || |
632 | nla_put_u32(skb, attrtype: ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, |
633 | ilog2(utn->entries[table][j].type))) |
634 | goto err_cancel; |
635 | |
636 | nla_nest_end(skb, start: nest); |
637 | } |
638 | |
639 | return 0; |
640 | |
641 | err_cancel: |
642 | nla_nest_cancel(skb, start: nest); |
643 | return -EMSGSIZE; |
644 | } |
645 | |
646 | static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { |
647 | .get_port = __udp_tunnel_nic_get_port, |
648 | .set_port_priv = __udp_tunnel_nic_set_port_priv, |
649 | .add_port = __udp_tunnel_nic_add_port, |
650 | .del_port = __udp_tunnel_nic_del_port, |
651 | .reset_ntf = __udp_tunnel_nic_reset_ntf, |
652 | .dump_size = __udp_tunnel_nic_dump_size, |
653 | .dump_write = __udp_tunnel_nic_dump_write, |
654 | }; |
655 | |
656 | static void |
657 | udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn) |
658 | { |
659 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
660 | unsigned int i, j; |
661 | |
662 | for (i = 0; i < utn->n_tables; i++) |
663 | for (j = 0; j < info->tables[i].n_entries; j++) { |
664 | int adj_cnt = -utn->entries[i][j].use_cnt; |
665 | |
666 | if (adj_cnt) |
667 | udp_tunnel_nic_entry_adj(utn, table: i, idx: j, use_cnt_adj: adj_cnt); |
668 | } |
669 | |
670 | __udp_tunnel_nic_device_sync(dev, utn); |
671 | |
672 | for (i = 0; i < utn->n_tables; i++) |
673 | memset(utn->entries[i], 0, array_size(info->tables[i].n_entries, |
674 | sizeof(**utn->entries))); |
675 | WARN_ON(utn->need_sync); |
676 | utn->need_replay = 0; |
677 | } |
678 | |
679 | static void |
680 | udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) |
681 | { |
682 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
683 | struct udp_tunnel_nic_shared_node *node; |
684 | unsigned int i, j; |
685 | |
686 | /* Freeze all the ports we are already tracking so that the replay |
687 | * does not double up the refcount. |
688 | */ |
689 | for (i = 0; i < utn->n_tables; i++) |
690 | for (j = 0; j < info->tables[i].n_entries; j++) |
691 | udp_tunnel_nic_entry_freeze_used(entry: &utn->entries[i][j]); |
692 | utn->missed = 0; |
693 | utn->need_replay = 0; |
694 | |
695 | if (!info->shared) { |
696 | udp_tunnel_get_rx_info(dev); |
697 | } else { |
698 | list_for_each_entry(node, &info->shared->devices, list) |
699 | udp_tunnel_get_rx_info(dev: node->dev); |
700 | } |
701 | |
702 | for (i = 0; i < utn->n_tables; i++) |
703 | for (j = 0; j < info->tables[i].n_entries; j++) |
704 | udp_tunnel_nic_entry_unfreeze(entry: &utn->entries[i][j]); |
705 | } |
706 | |
707 | static void udp_tunnel_nic_device_sync_work(struct work_struct *work) |
708 | { |
709 | struct udp_tunnel_nic *utn = |
710 | container_of(work, struct udp_tunnel_nic, work); |
711 | |
712 | rtnl_lock(); |
713 | utn->work_pending = 0; |
714 | __udp_tunnel_nic_device_sync(dev: utn->dev, utn); |
715 | |
716 | if (utn->need_replay) |
717 | udp_tunnel_nic_replay(dev: utn->dev, utn); |
718 | rtnl_unlock(); |
719 | } |
720 | |
721 | static struct udp_tunnel_nic * |
722 | udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, |
723 | unsigned int n_tables) |
724 | { |
725 | struct udp_tunnel_nic *utn; |
726 | unsigned int i; |
727 | |
728 | utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL); |
729 | if (!utn) |
730 | return NULL; |
731 | utn->n_tables = n_tables; |
732 | INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); |
733 | |
734 | for (i = 0; i < n_tables; i++) { |
735 | utn->entries[i] = kcalloc(n: info->tables[i].n_entries, |
736 | size: sizeof(*utn->entries[i]), GFP_KERNEL); |
737 | if (!utn->entries[i]) |
738 | goto err_free_prev_entries; |
739 | } |
740 | |
741 | return utn; |
742 | |
743 | err_free_prev_entries: |
744 | while (i--) |
745 | kfree(objp: utn->entries[i]); |
746 | kfree(objp: utn); |
747 | return NULL; |
748 | } |
749 | |
750 | static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) |
751 | { |
752 | unsigned int i; |
753 | |
754 | for (i = 0; i < utn->n_tables; i++) |
755 | kfree(objp: utn->entries[i]); |
756 | kfree(objp: utn); |
757 | } |
758 | |
759 | static int udp_tunnel_nic_register(struct net_device *dev) |
760 | { |
761 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
762 | struct udp_tunnel_nic_shared_node *node = NULL; |
763 | struct udp_tunnel_nic *utn; |
764 | unsigned int n_tables, i; |
765 | |
766 | BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < |
767 | UDP_TUNNEL_NIC_MAX_TABLES); |
768 | /* Expect use count of at most 2 (IPv4, IPv6) per device */ |
769 | BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX < |
770 | UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2); |
771 | |
772 | /* Check that the driver info is sane */ |
773 | if (WARN_ON(!info->set_port != !info->unset_port) || |
774 | WARN_ON(!info->set_port == !info->sync_table) || |
775 | WARN_ON(!info->tables[0].n_entries)) |
776 | return -EINVAL; |
777 | |
778 | if (WARN_ON(info->shared && |
779 | info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) |
780 | return -EINVAL; |
781 | |
782 | n_tables = 1; |
783 | for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { |
784 | if (!info->tables[i].n_entries) |
785 | continue; |
786 | |
787 | n_tables++; |
788 | if (WARN_ON(!info->tables[i - 1].n_entries)) |
789 | return -EINVAL; |
790 | } |
791 | |
792 | /* Create UDP tunnel state structures */ |
793 | if (info->shared) { |
794 | node = kzalloc(size: sizeof(*node), GFP_KERNEL); |
795 | if (!node) |
796 | return -ENOMEM; |
797 | |
798 | node->dev = dev; |
799 | } |
800 | |
801 | if (info->shared && info->shared->udp_tunnel_nic_info) { |
802 | utn = info->shared->udp_tunnel_nic_info; |
803 | } else { |
804 | utn = udp_tunnel_nic_alloc(info, n_tables); |
805 | if (!utn) { |
806 | kfree(objp: node); |
807 | return -ENOMEM; |
808 | } |
809 | } |
810 | |
811 | if (info->shared) { |
812 | if (!info->shared->udp_tunnel_nic_info) { |
813 | INIT_LIST_HEAD(list: &info->shared->devices); |
814 | info->shared->udp_tunnel_nic_info = utn; |
815 | } |
816 | |
817 | list_add_tail(new: &node->list, head: &info->shared->devices); |
818 | } |
819 | |
820 | utn->dev = dev; |
821 | dev_hold(dev); |
822 | dev->udp_tunnel_nic = utn; |
823 | |
824 | if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) |
825 | udp_tunnel_get_rx_info(dev); |
826 | |
827 | return 0; |
828 | } |
829 | |
830 | static void |
831 | udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) |
832 | { |
833 | const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; |
834 | |
835 | /* For a shared table remove this dev from the list of sharing devices |
836 | * and if there are other devices just detach. |
837 | */ |
838 | if (info->shared) { |
839 | struct udp_tunnel_nic_shared_node *node, *first; |
840 | |
841 | list_for_each_entry(node, &info->shared->devices, list) |
842 | if (node->dev == dev) |
843 | break; |
844 | if (list_entry_is_head(node, &info->shared->devices, list)) |
845 | return; |
846 | |
847 | list_del(entry: &node->list); |
848 | kfree(objp: node); |
849 | |
850 | first = list_first_entry_or_null(&info->shared->devices, |
851 | typeof(*first), list); |
852 | if (first) { |
853 | udp_tunnel_drop_rx_info(dev); |
854 | utn->dev = first->dev; |
855 | goto release_dev; |
856 | } |
857 | |
858 | info->shared->udp_tunnel_nic_info = NULL; |
859 | } |
860 | |
861 | /* Flush before we check work, so we don't waste time adding entries |
862 | * from the work which we will boot immediately. |
863 | */ |
864 | udp_tunnel_nic_flush(dev, utn); |
865 | |
866 | /* Wait for the work to be done using the state, netdev core will |
867 | * retry unregister until we give up our reference on this device. |
868 | */ |
869 | if (utn->work_pending) |
870 | return; |
871 | |
872 | udp_tunnel_nic_free(utn); |
873 | release_dev: |
874 | dev->udp_tunnel_nic = NULL; |
875 | dev_put(dev); |
876 | } |
877 | |
878 | static int |
879 | udp_tunnel_nic_netdevice_event(struct notifier_block *unused, |
880 | unsigned long event, void *ptr) |
881 | { |
882 | struct net_device *dev = netdev_notifier_info_to_dev(info: ptr); |
883 | const struct udp_tunnel_nic_info *info; |
884 | struct udp_tunnel_nic *utn; |
885 | |
886 | info = dev->udp_tunnel_nic_info; |
887 | if (!info) |
888 | return NOTIFY_DONE; |
889 | |
890 | if (event == NETDEV_REGISTER) { |
891 | int err; |
892 | |
893 | err = udp_tunnel_nic_register(dev); |
894 | if (err) |
895 | netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d" , err); |
896 | return notifier_from_errno(err); |
897 | } |
898 | /* All other events will need the udp_tunnel_nic state */ |
899 | utn = dev->udp_tunnel_nic; |
900 | if (!utn) |
901 | return NOTIFY_DONE; |
902 | |
903 | if (event == NETDEV_UNREGISTER) { |
904 | udp_tunnel_nic_unregister(dev, utn); |
905 | return NOTIFY_OK; |
906 | } |
907 | |
908 | /* All other events only matter if NIC has to be programmed open */ |
909 | if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) |
910 | return NOTIFY_DONE; |
911 | |
912 | if (event == NETDEV_UP) { |
913 | WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); |
914 | udp_tunnel_get_rx_info(dev); |
915 | return NOTIFY_OK; |
916 | } |
917 | if (event == NETDEV_GOING_DOWN) { |
918 | udp_tunnel_nic_flush(dev, utn); |
919 | return NOTIFY_OK; |
920 | } |
921 | |
922 | return NOTIFY_DONE; |
923 | } |
924 | |
925 | static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = { |
926 | .notifier_call = udp_tunnel_nic_netdevice_event, |
927 | }; |
928 | |
929 | static int __init udp_tunnel_nic_init_module(void) |
930 | { |
931 | int err; |
932 | |
933 | udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic" , 0); |
934 | if (!udp_tunnel_nic_workqueue) |
935 | return -ENOMEM; |
936 | |
937 | rtnl_lock(); |
938 | udp_tunnel_nic_ops = &__udp_tunnel_nic_ops; |
939 | rtnl_unlock(); |
940 | |
941 | err = register_netdevice_notifier(nb: &udp_tunnel_nic_notifier_block); |
942 | if (err) |
943 | goto err_unset_ops; |
944 | |
945 | return 0; |
946 | |
947 | err_unset_ops: |
948 | rtnl_lock(); |
949 | udp_tunnel_nic_ops = NULL; |
950 | rtnl_unlock(); |
951 | destroy_workqueue(wq: udp_tunnel_nic_workqueue); |
952 | return err; |
953 | } |
954 | late_initcall(udp_tunnel_nic_init_module); |
955 | |
956 | static void __exit udp_tunnel_nic_cleanup_module(void) |
957 | { |
958 | unregister_netdevice_notifier(nb: &udp_tunnel_nic_notifier_block); |
959 | |
960 | rtnl_lock(); |
961 | udp_tunnel_nic_ops = NULL; |
962 | rtnl_unlock(); |
963 | |
964 | destroy_workqueue(wq: udp_tunnel_nic_workqueue); |
965 | } |
966 | module_exit(udp_tunnel_nic_cleanup_module); |
967 | |
968 | MODULE_LICENSE("GPL" ); |
969 | |