1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * mpls tunnels An implementation mpls tunnels using the light weight tunnel |
4 | * infrastructure |
5 | * |
6 | * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> |
7 | */ |
8 | #include <linux/types.h> |
9 | #include <linux/skbuff.h> |
10 | #include <linux/net.h> |
11 | #include <linux/module.h> |
12 | #include <linux/mpls.h> |
13 | #include <linux/vmalloc.h> |
14 | #include <net/ip.h> |
15 | #include <net/dst.h> |
16 | #include <net/lwtunnel.h> |
17 | #include <net/netevent.h> |
18 | #include <net/netns/generic.h> |
19 | #include <net/ip6_fib.h> |
20 | #include <net/route.h> |
21 | #include <net/mpls_iptunnel.h> |
22 | #include <linux/mpls_iptunnel.h> |
23 | #include "internal.h" |
24 | |
25 | static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { |
26 | [MPLS_IPTUNNEL_DST] = { .len = sizeof(u32) }, |
27 | [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, |
28 | }; |
29 | |
30 | static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) |
31 | { |
32 | /* The size of the layer 2.5 labels to be added for this route */ |
33 | return en->labels * sizeof(struct mpls_shim_hdr); |
34 | } |
35 | |
36 | static int mpls_xmit(struct sk_buff *skb) |
37 | { |
38 | struct mpls_iptunnel_encap *tun_encap_info; |
39 | struct mpls_shim_hdr *hdr; |
40 | struct net_device *out_dev; |
41 | unsigned int hh_len; |
42 | unsigned int ; |
43 | unsigned int mtu; |
44 | struct dst_entry *dst = skb_dst(skb); |
45 | struct rtable *rt = NULL; |
46 | struct rt6_info *rt6 = NULL; |
47 | struct mpls_dev *out_mdev; |
48 | struct net *net; |
49 | int err = 0; |
50 | bool bos; |
51 | int i; |
52 | unsigned int ttl; |
53 | |
54 | /* Find the output device */ |
55 | out_dev = dst->dev; |
56 | net = dev_net(dev: out_dev); |
57 | |
58 | skb_orphan(skb); |
59 | |
60 | if (!mpls_output_possible(dev: out_dev) || |
61 | !dst->lwtstate || skb_warn_if_lro(skb)) |
62 | goto drop; |
63 | |
64 | skb_forward_csum(skb); |
65 | |
66 | tun_encap_info = mpls_lwtunnel_encap(lwtstate: dst->lwtstate); |
67 | |
68 | /* Obtain the ttl using the following set of rules. |
69 | * |
70 | * LWT ttl propagation setting: |
71 | * - disabled => use default TTL value from LWT |
72 | * - enabled => use TTL value from IPv4/IPv6 header |
73 | * - default => |
74 | * Global ttl propagation setting: |
75 | * - disabled => use default TTL value from global setting |
76 | * - enabled => use TTL value from IPv4/IPv6 header |
77 | */ |
78 | if (dst->ops->family == AF_INET) { |
79 | if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) |
80 | ttl = tun_encap_info->default_ttl; |
81 | else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && |
82 | !net->mpls.ip_ttl_propagate) |
83 | ttl = net->mpls.default_ttl; |
84 | else |
85 | ttl = ip_hdr(skb)->ttl; |
86 | rt = (struct rtable *)dst; |
87 | } else if (dst->ops->family == AF_INET6) { |
88 | if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) |
89 | ttl = tun_encap_info->default_ttl; |
90 | else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && |
91 | !net->mpls.ip_ttl_propagate) |
92 | ttl = net->mpls.default_ttl; |
93 | else |
94 | ttl = ipv6_hdr(skb)->hop_limit; |
95 | rt6 = (struct rt6_info *)dst; |
96 | } else { |
97 | goto drop; |
98 | } |
99 | |
100 | /* Verify the destination can hold the packet */ |
101 | new_header_size = mpls_encap_size(en: tun_encap_info); |
102 | mtu = mpls_dev_mtu(dev: out_dev); |
103 | if (mpls_pkt_too_big(skb, mtu: mtu - new_header_size)) |
104 | goto drop; |
105 | |
106 | hh_len = LL_RESERVED_SPACE(out_dev); |
107 | if (!out_dev->header_ops) |
108 | hh_len = 0; |
109 | |
110 | /* Ensure there is enough space for the headers in the skb */ |
111 | if (skb_cow(skb, headroom: hh_len + new_header_size)) |
112 | goto drop; |
113 | |
114 | skb_set_inner_protocol(skb, protocol: skb->protocol); |
115 | skb_reset_inner_network_header(skb); |
116 | |
117 | skb_push(skb, len: new_header_size); |
118 | |
119 | skb_reset_network_header(skb); |
120 | |
121 | skb->dev = out_dev; |
122 | skb->protocol = htons(ETH_P_MPLS_UC); |
123 | |
124 | /* Push the new labels */ |
125 | hdr = mpls_hdr(skb); |
126 | bos = true; |
127 | for (i = tun_encap_info->labels - 1; i >= 0; i--) { |
128 | hdr[i] = mpls_entry_encode(label: tun_encap_info->label[i], |
129 | ttl, tc: 0, bos); |
130 | bos = false; |
131 | } |
132 | |
133 | mpls_stats_inc_outucastpkts(dev: out_dev, skb); |
134 | |
135 | if (rt) { |
136 | if (rt->rt_gw_family == AF_INET6) |
137 | err = neigh_xmit(fam: NEIGH_ND_TABLE, out_dev, &rt->rt_gw6, |
138 | skb); |
139 | else |
140 | err = neigh_xmit(fam: NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4, |
141 | skb); |
142 | } else if (rt6) { |
143 | if (ipv6_addr_v4mapped(a: &rt6->rt6i_gateway)) { |
144 | /* 6PE (RFC 4798) */ |
145 | err = neigh_xmit(fam: NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3], |
146 | skb); |
147 | } else |
148 | err = neigh_xmit(fam: NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, |
149 | skb); |
150 | } |
151 | if (err) |
152 | net_dbg_ratelimited("%s: packet transmission failed: %d\n" , |
153 | __func__, err); |
154 | |
155 | return LWTUNNEL_XMIT_DONE; |
156 | |
157 | drop: |
158 | out_mdev = out_dev ? mpls_dev_get(dev: out_dev) : NULL; |
159 | if (out_mdev) |
160 | MPLS_INC_STATS(out_mdev, tx_errors); |
161 | kfree_skb(skb); |
162 | return -EINVAL; |
163 | } |
164 | |
165 | static int mpls_build_state(struct net *net, struct nlattr *nla, |
166 | unsigned int family, const void *cfg, |
167 | struct lwtunnel_state **ts, |
168 | struct netlink_ext_ack *extack) |
169 | { |
170 | struct mpls_iptunnel_encap *tun_encap_info; |
171 | struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; |
172 | struct lwtunnel_state *newts; |
173 | u8 n_labels; |
174 | int ret; |
175 | |
176 | ret = nla_parse_nested_deprecated(tb, MPLS_IPTUNNEL_MAX, nla, |
177 | policy: mpls_iptunnel_policy, extack); |
178 | if (ret < 0) |
179 | return ret; |
180 | |
181 | if (!tb[MPLS_IPTUNNEL_DST]) { |
182 | NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing" ); |
183 | return -EINVAL; |
184 | } |
185 | |
186 | /* determine number of labels */ |
187 | if (nla_get_labels(nla: tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, |
188 | labels: &n_labels, NULL, extack)) |
189 | return -EINVAL; |
190 | |
191 | newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label, |
192 | n_labels)); |
193 | if (!newts) |
194 | return -ENOMEM; |
195 | |
196 | tun_encap_info = mpls_lwtunnel_encap(lwtstate: newts); |
197 | ret = nla_get_labels(nla: tb[MPLS_IPTUNNEL_DST], max_labels: n_labels, |
198 | labels: &tun_encap_info->labels, label: tun_encap_info->label, |
199 | extack); |
200 | if (ret) |
201 | goto errout; |
202 | |
203 | tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT; |
204 | |
205 | if (tb[MPLS_IPTUNNEL_TTL]) { |
206 | tun_encap_info->default_ttl = nla_get_u8(nla: tb[MPLS_IPTUNNEL_TTL]); |
207 | /* TTL 0 implies propagate from IP header */ |
208 | tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ? |
209 | MPLS_TTL_PROP_DISABLED : |
210 | MPLS_TTL_PROP_ENABLED; |
211 | } |
212 | |
213 | newts->type = LWTUNNEL_ENCAP_MPLS; |
214 | newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; |
215 | newts->headroom = mpls_encap_size(en: tun_encap_info); |
216 | |
217 | *ts = newts; |
218 | |
219 | return 0; |
220 | |
221 | errout: |
222 | kfree(objp: newts); |
223 | *ts = NULL; |
224 | |
225 | return ret; |
226 | } |
227 | |
228 | static int mpls_fill_encap_info(struct sk_buff *skb, |
229 | struct lwtunnel_state *lwtstate) |
230 | { |
231 | struct mpls_iptunnel_encap *tun_encap_info; |
232 | |
233 | tun_encap_info = mpls_lwtunnel_encap(lwtstate); |
234 | |
235 | if (nla_put_labels(skb, attrtype: MPLS_IPTUNNEL_DST, labels: tun_encap_info->labels, |
236 | label: tun_encap_info->label)) |
237 | goto nla_put_failure; |
238 | |
239 | if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT && |
240 | nla_put_u8(skb, attrtype: MPLS_IPTUNNEL_TTL, value: tun_encap_info->default_ttl)) |
241 | goto nla_put_failure; |
242 | |
243 | return 0; |
244 | |
245 | nla_put_failure: |
246 | return -EMSGSIZE; |
247 | } |
248 | |
249 | static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) |
250 | { |
251 | struct mpls_iptunnel_encap *tun_encap_info; |
252 | int nlsize; |
253 | |
254 | tun_encap_info = mpls_lwtunnel_encap(lwtstate); |
255 | |
256 | nlsize = nla_total_size(payload: tun_encap_info->labels * 4); |
257 | |
258 | if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT) |
259 | nlsize += nla_total_size(payload: 1); |
260 | |
261 | return nlsize; |
262 | } |
263 | |
264 | static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) |
265 | { |
266 | struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(lwtstate: a); |
267 | struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(lwtstate: b); |
268 | int l; |
269 | |
270 | if (a_hdr->labels != b_hdr->labels || |
271 | a_hdr->ttl_propagate != b_hdr->ttl_propagate || |
272 | a_hdr->default_ttl != b_hdr->default_ttl) |
273 | return 1; |
274 | |
275 | for (l = 0; l < a_hdr->labels; l++) |
276 | if (a_hdr->label[l] != b_hdr->label[l]) |
277 | return 1; |
278 | return 0; |
279 | } |
280 | |
281 | static const struct lwtunnel_encap_ops mpls_iptun_ops = { |
282 | .build_state = mpls_build_state, |
283 | .xmit = mpls_xmit, |
284 | .fill_encap = mpls_fill_encap_info, |
285 | .get_encap_size = mpls_encap_nlsize, |
286 | .cmp_encap = mpls_encap_cmp, |
287 | .owner = THIS_MODULE, |
288 | }; |
289 | |
290 | static int __init mpls_iptunnel_init(void) |
291 | { |
292 | return lwtunnel_encap_add_ops(op: &mpls_iptun_ops, num: LWTUNNEL_ENCAP_MPLS); |
293 | } |
294 | module_init(mpls_iptunnel_init); |
295 | |
296 | static void __exit mpls_iptunnel_exit(void) |
297 | { |
298 | lwtunnel_encap_del_ops(op: &mpls_iptun_ops, num: LWTUNNEL_ENCAP_MPLS); |
299 | } |
300 | module_exit(mpls_iptunnel_exit); |
301 | |
302 | MODULE_ALIAS_RTNL_LWT(MPLS); |
303 | MODULE_SOFTDEP("post: mpls_gso" ); |
304 | MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels" ); |
305 | MODULE_LICENSE("GPL v2" ); |
306 | |