1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * lwtunnel Infrastructure for light weight tunnels like mpls |
4 | * |
5 | * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> |
6 | */ |
7 | |
8 | #include <linux/capability.h> |
9 | #include <linux/module.h> |
10 | #include <linux/types.h> |
11 | #include <linux/kernel.h> |
12 | #include <linux/slab.h> |
13 | #include <linux/uaccess.h> |
14 | #include <linux/skbuff.h> |
15 | #include <linux/netdevice.h> |
16 | #include <linux/lwtunnel.h> |
17 | #include <linux/in.h> |
18 | #include <linux/init.h> |
19 | #include <linux/err.h> |
20 | |
21 | #include <net/lwtunnel.h> |
22 | #include <net/rtnetlink.h> |
23 | #include <net/ip6_fib.h> |
24 | #include <net/rtnh.h> |
25 | |
26 | DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); |
27 | EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); |
28 | |
29 | #ifdef CONFIG_MODULES |
30 | |
31 | static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) |
32 | { |
33 | /* Only lwt encaps implemented without using an interface for |
34 | * the encap need to return a string here. |
35 | */ |
36 | switch (encap_type) { |
37 | case LWTUNNEL_ENCAP_MPLS: |
38 | return "MPLS" ; |
39 | case LWTUNNEL_ENCAP_ILA: |
40 | return "ILA" ; |
41 | case LWTUNNEL_ENCAP_SEG6: |
42 | return "SEG6" ; |
43 | case LWTUNNEL_ENCAP_BPF: |
44 | return "BPF" ; |
45 | case LWTUNNEL_ENCAP_SEG6_LOCAL: |
46 | return "SEG6LOCAL" ; |
47 | case LWTUNNEL_ENCAP_RPL: |
48 | return "RPL" ; |
49 | case LWTUNNEL_ENCAP_IOAM6: |
50 | return "IOAM6" ; |
51 | case LWTUNNEL_ENCAP_XFRM: |
52 | /* module autoload not supported for encap type */ |
53 | return NULL; |
54 | case LWTUNNEL_ENCAP_IP6: |
55 | case LWTUNNEL_ENCAP_IP: |
56 | case LWTUNNEL_ENCAP_NONE: |
57 | case __LWTUNNEL_ENCAP_MAX: |
58 | /* should not have got here */ |
59 | WARN_ON(1); |
60 | break; |
61 | } |
62 | return NULL; |
63 | } |
64 | |
65 | #endif /* CONFIG_MODULES */ |
66 | |
67 | struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) |
68 | { |
69 | struct lwtunnel_state *lws; |
70 | |
71 | lws = kzalloc(size: sizeof(*lws) + encap_len, GFP_ATOMIC); |
72 | |
73 | return lws; |
74 | } |
75 | EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); |
76 | |
77 | static const struct lwtunnel_encap_ops __rcu * |
78 | lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; |
79 | |
80 | int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, |
81 | unsigned int num) |
82 | { |
83 | if (num > LWTUNNEL_ENCAP_MAX) |
84 | return -ERANGE; |
85 | |
86 | return !cmpxchg((const struct lwtunnel_encap_ops **) |
87 | &lwtun_encaps[num], |
88 | NULL, ops) ? 0 : -1; |
89 | } |
90 | EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); |
91 | |
92 | int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, |
93 | unsigned int encap_type) |
94 | { |
95 | int ret; |
96 | |
97 | if (encap_type == LWTUNNEL_ENCAP_NONE || |
98 | encap_type > LWTUNNEL_ENCAP_MAX) |
99 | return -ERANGE; |
100 | |
101 | ret = (cmpxchg((const struct lwtunnel_encap_ops **) |
102 | &lwtun_encaps[encap_type], |
103 | ops, NULL) == ops) ? 0 : -1; |
104 | |
105 | synchronize_net(); |
106 | |
107 | return ret; |
108 | } |
109 | EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); |
110 | |
111 | int lwtunnel_build_state(struct net *net, u16 encap_type, |
112 | struct nlattr *encap, unsigned int family, |
113 | const void *cfg, struct lwtunnel_state **lws, |
114 | struct netlink_ext_ack *extack) |
115 | { |
116 | const struct lwtunnel_encap_ops *ops; |
117 | bool found = false; |
118 | int ret = -EINVAL; |
119 | |
120 | if (encap_type == LWTUNNEL_ENCAP_NONE || |
121 | encap_type > LWTUNNEL_ENCAP_MAX) { |
122 | NL_SET_ERR_MSG_ATTR(extack, encap, |
123 | "Unknown LWT encapsulation type" ); |
124 | return ret; |
125 | } |
126 | |
127 | ret = -EOPNOTSUPP; |
128 | rcu_read_lock(); |
129 | ops = rcu_dereference(lwtun_encaps[encap_type]); |
130 | if (likely(ops && ops->build_state && try_module_get(ops->owner))) |
131 | found = true; |
132 | rcu_read_unlock(); |
133 | |
134 | if (found) { |
135 | ret = ops->build_state(net, encap, family, cfg, lws, extack); |
136 | if (ret) |
137 | module_put(module: ops->owner); |
138 | } else { |
139 | /* don't rely on -EOPNOTSUPP to detect match as build_state |
140 | * handlers could return it |
141 | */ |
142 | NL_SET_ERR_MSG_ATTR(extack, encap, |
143 | "LWT encapsulation type not supported" ); |
144 | } |
145 | |
146 | return ret; |
147 | } |
148 | EXPORT_SYMBOL_GPL(lwtunnel_build_state); |
149 | |
150 | int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) |
151 | { |
152 | const struct lwtunnel_encap_ops *ops; |
153 | int ret = -EINVAL; |
154 | |
155 | if (encap_type == LWTUNNEL_ENCAP_NONE || |
156 | encap_type > LWTUNNEL_ENCAP_MAX) { |
157 | NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type" ); |
158 | return ret; |
159 | } |
160 | |
161 | rcu_read_lock(); |
162 | ops = rcu_dereference(lwtun_encaps[encap_type]); |
163 | rcu_read_unlock(); |
164 | #ifdef CONFIG_MODULES |
165 | if (!ops) { |
166 | const char *encap_type_str = lwtunnel_encap_str(encap_type); |
167 | |
168 | if (encap_type_str) { |
169 | __rtnl_unlock(); |
170 | request_module("rtnl-lwt-%s" , encap_type_str); |
171 | rtnl_lock(); |
172 | |
173 | rcu_read_lock(); |
174 | ops = rcu_dereference(lwtun_encaps[encap_type]); |
175 | rcu_read_unlock(); |
176 | } |
177 | } |
178 | #endif |
179 | ret = ops ? 0 : -EOPNOTSUPP; |
180 | if (ret < 0) |
181 | NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported" ); |
182 | |
183 | return ret; |
184 | } |
185 | EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); |
186 | |
187 | int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, |
188 | struct netlink_ext_ack *extack) |
189 | { |
190 | struct rtnexthop *rtnh = (struct rtnexthop *)attr; |
191 | struct nlattr *nla_entype; |
192 | struct nlattr *attrs; |
193 | u16 encap_type; |
194 | int attrlen; |
195 | |
196 | while (rtnh_ok(rtnh, remaining)) { |
197 | attrlen = rtnh_attrlen(rtnh); |
198 | if (attrlen > 0) { |
199 | attrs = rtnh_attrs(rtnh); |
200 | nla_entype = nla_find(head: attrs, len: attrlen, attrtype: RTA_ENCAP_TYPE); |
201 | |
202 | if (nla_entype) { |
203 | if (nla_len(nla: nla_entype) < sizeof(u16)) { |
204 | NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE" ); |
205 | return -EINVAL; |
206 | } |
207 | encap_type = nla_get_u16(nla: nla_entype); |
208 | |
209 | if (lwtunnel_valid_encap_type(encap_type, |
210 | extack) != 0) |
211 | return -EOPNOTSUPP; |
212 | } |
213 | } |
214 | rtnh = rtnh_next(rtnh, remaining: &remaining); |
215 | } |
216 | |
217 | return 0; |
218 | } |
219 | EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); |
220 | |
221 | void lwtstate_free(struct lwtunnel_state *lws) |
222 | { |
223 | const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; |
224 | |
225 | if (ops->destroy_state) { |
226 | ops->destroy_state(lws); |
227 | kfree_rcu(lws, rcu); |
228 | } else { |
229 | kfree(objp: lws); |
230 | } |
231 | module_put(module: ops->owner); |
232 | } |
233 | EXPORT_SYMBOL_GPL(lwtstate_free); |
234 | |
235 | int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, |
236 | int encap_attr, int encap_type_attr) |
237 | { |
238 | const struct lwtunnel_encap_ops *ops; |
239 | struct nlattr *nest; |
240 | int ret; |
241 | |
242 | if (!lwtstate) |
243 | return 0; |
244 | |
245 | if (lwtstate->type == LWTUNNEL_ENCAP_NONE || |
246 | lwtstate->type > LWTUNNEL_ENCAP_MAX) |
247 | return 0; |
248 | |
249 | nest = nla_nest_start_noflag(skb, attrtype: encap_attr); |
250 | if (!nest) |
251 | return -EMSGSIZE; |
252 | |
253 | ret = -EOPNOTSUPP; |
254 | rcu_read_lock(); |
255 | ops = rcu_dereference(lwtun_encaps[lwtstate->type]); |
256 | if (likely(ops && ops->fill_encap)) |
257 | ret = ops->fill_encap(skb, lwtstate); |
258 | rcu_read_unlock(); |
259 | |
260 | if (ret) |
261 | goto nla_put_failure; |
262 | nla_nest_end(skb, start: nest); |
263 | ret = nla_put_u16(skb, attrtype: encap_type_attr, value: lwtstate->type); |
264 | if (ret) |
265 | goto nla_put_failure; |
266 | |
267 | return 0; |
268 | |
269 | nla_put_failure: |
270 | nla_nest_cancel(skb, start: nest); |
271 | |
272 | return (ret == -EOPNOTSUPP ? 0 : ret); |
273 | } |
274 | EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); |
275 | |
276 | int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) |
277 | { |
278 | const struct lwtunnel_encap_ops *ops; |
279 | int ret = 0; |
280 | |
281 | if (!lwtstate) |
282 | return 0; |
283 | |
284 | if (lwtstate->type == LWTUNNEL_ENCAP_NONE || |
285 | lwtstate->type > LWTUNNEL_ENCAP_MAX) |
286 | return 0; |
287 | |
288 | rcu_read_lock(); |
289 | ops = rcu_dereference(lwtun_encaps[lwtstate->type]); |
290 | if (likely(ops && ops->get_encap_size)) |
291 | ret = nla_total_size(payload: ops->get_encap_size(lwtstate)); |
292 | rcu_read_unlock(); |
293 | |
294 | return ret; |
295 | } |
296 | EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); |
297 | |
298 | int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) |
299 | { |
300 | const struct lwtunnel_encap_ops *ops; |
301 | int ret = 0; |
302 | |
303 | if (!a && !b) |
304 | return 0; |
305 | |
306 | if (!a || !b) |
307 | return 1; |
308 | |
309 | if (a->type != b->type) |
310 | return 1; |
311 | |
312 | if (a->type == LWTUNNEL_ENCAP_NONE || |
313 | a->type > LWTUNNEL_ENCAP_MAX) |
314 | return 0; |
315 | |
316 | rcu_read_lock(); |
317 | ops = rcu_dereference(lwtun_encaps[a->type]); |
318 | if (likely(ops && ops->cmp_encap)) |
319 | ret = ops->cmp_encap(a, b); |
320 | rcu_read_unlock(); |
321 | |
322 | return ret; |
323 | } |
324 | EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); |
325 | |
326 | int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) |
327 | { |
328 | struct dst_entry *dst = skb_dst(skb); |
329 | const struct lwtunnel_encap_ops *ops; |
330 | struct lwtunnel_state *lwtstate; |
331 | int ret = -EINVAL; |
332 | |
333 | if (!dst) |
334 | goto drop; |
335 | lwtstate = dst->lwtstate; |
336 | |
337 | if (lwtstate->type == LWTUNNEL_ENCAP_NONE || |
338 | lwtstate->type > LWTUNNEL_ENCAP_MAX) |
339 | return 0; |
340 | |
341 | ret = -EOPNOTSUPP; |
342 | rcu_read_lock(); |
343 | ops = rcu_dereference(lwtun_encaps[lwtstate->type]); |
344 | if (likely(ops && ops->output)) |
345 | ret = ops->output(net, sk, skb); |
346 | rcu_read_unlock(); |
347 | |
348 | if (ret == -EOPNOTSUPP) |
349 | goto drop; |
350 | |
351 | return ret; |
352 | |
353 | drop: |
354 | kfree_skb(skb); |
355 | |
356 | return ret; |
357 | } |
358 | EXPORT_SYMBOL_GPL(lwtunnel_output); |
359 | |
360 | int lwtunnel_xmit(struct sk_buff *skb) |
361 | { |
362 | struct dst_entry *dst = skb_dst(skb); |
363 | const struct lwtunnel_encap_ops *ops; |
364 | struct lwtunnel_state *lwtstate; |
365 | int ret = -EINVAL; |
366 | |
367 | if (!dst) |
368 | goto drop; |
369 | |
370 | lwtstate = dst->lwtstate; |
371 | |
372 | if (lwtstate->type == LWTUNNEL_ENCAP_NONE || |
373 | lwtstate->type > LWTUNNEL_ENCAP_MAX) |
374 | return 0; |
375 | |
376 | ret = -EOPNOTSUPP; |
377 | rcu_read_lock(); |
378 | ops = rcu_dereference(lwtun_encaps[lwtstate->type]); |
379 | if (likely(ops && ops->xmit)) |
380 | ret = ops->xmit(skb); |
381 | rcu_read_unlock(); |
382 | |
383 | if (ret == -EOPNOTSUPP) |
384 | goto drop; |
385 | |
386 | return ret; |
387 | |
388 | drop: |
389 | kfree_skb(skb); |
390 | |
391 | return ret; |
392 | } |
393 | EXPORT_SYMBOL_GPL(lwtunnel_xmit); |
394 | |
395 | int lwtunnel_input(struct sk_buff *skb) |
396 | { |
397 | struct dst_entry *dst = skb_dst(skb); |
398 | const struct lwtunnel_encap_ops *ops; |
399 | struct lwtunnel_state *lwtstate; |
400 | int ret = -EINVAL; |
401 | |
402 | if (!dst) |
403 | goto drop; |
404 | lwtstate = dst->lwtstate; |
405 | |
406 | if (lwtstate->type == LWTUNNEL_ENCAP_NONE || |
407 | lwtstate->type > LWTUNNEL_ENCAP_MAX) |
408 | return 0; |
409 | |
410 | ret = -EOPNOTSUPP; |
411 | rcu_read_lock(); |
412 | ops = rcu_dereference(lwtun_encaps[lwtstate->type]); |
413 | if (likely(ops && ops->input)) |
414 | ret = ops->input(skb); |
415 | rcu_read_unlock(); |
416 | |
417 | if (ret == -EOPNOTSUPP) |
418 | goto drop; |
419 | |
420 | return ret; |
421 | |
422 | drop: |
423 | kfree_skb(skb); |
424 | |
425 | return ret; |
426 | } |
427 | EXPORT_SYMBOL_GPL(lwtunnel_input); |
428 | |