1 | #include <stddef.h> |
2 | #include <inttypes.h> |
3 | #include <errno.h> |
4 | #include <linux/seg6_local.h> |
5 | #include <linux/bpf.h> |
6 | #include <bpf/bpf_helpers.h> |
7 | #include <bpf/bpf_endian.h> |
8 | |
9 | /* Packet parsing state machine helpers. */ |
10 | #define cursor_advance(_cursor, _len) \ |
11 | ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) |
12 | |
13 | #define SR6_FLAG_ALERT (1 << 4) |
14 | |
15 | #define __attribute__((packed)) |
16 | |
17 | struct ip6_t { |
18 | unsigned int ver:4; |
19 | unsigned int priority:8; |
20 | unsigned int flow_label:20; |
21 | unsigned short payload_len; |
22 | unsigned char ; |
23 | unsigned char hop_limit; |
24 | unsigned long long src_hi; |
25 | unsigned long long src_lo; |
26 | unsigned long long dst_hi; |
27 | unsigned long long dst_lo; |
28 | } BPF_PACKET_HEADER; |
29 | |
30 | struct ip6_addr_t { |
31 | unsigned long long hi; |
32 | unsigned long long lo; |
33 | } BPF_PACKET_HEADER; |
34 | |
35 | struct ip6_srh_t { |
36 | unsigned char nexthdr; |
37 | unsigned char hdrlen; |
38 | unsigned char type; |
39 | unsigned char segments_left; |
40 | unsigned char first_segment; |
41 | unsigned char flags; |
42 | unsigned short tag; |
43 | |
44 | struct ip6_addr_t segments[0]; |
45 | } BPF_PACKET_HEADER; |
46 | |
47 | struct sr6_tlv_t { |
48 | unsigned char type; |
49 | unsigned char len; |
50 | unsigned char value[0]; |
51 | } BPF_PACKET_HEADER; |
52 | |
53 | static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb) |
54 | { |
55 | void *cursor, *data_end; |
56 | struct ip6_srh_t *srh; |
57 | struct ip6_t *ip; |
58 | uint8_t *ipver; |
59 | |
60 | data_end = (void *)(long)skb->data_end; |
61 | cursor = (void *)(long)skb->data; |
62 | ipver = (uint8_t *)cursor; |
63 | |
64 | if ((void *)ipver + sizeof(*ipver) > data_end) |
65 | return NULL; |
66 | |
67 | if ((*ipver >> 4) != 6) |
68 | return NULL; |
69 | |
70 | ip = cursor_advance(cursor, sizeof(*ip)); |
71 | if ((void *)ip + sizeof(*ip) > data_end) |
72 | return NULL; |
73 | |
74 | if (ip->next_header != 43) |
75 | return NULL; |
76 | |
77 | srh = cursor_advance(cursor, sizeof(*srh)); |
78 | if ((void *)srh + sizeof(*srh) > data_end) |
79 | return NULL; |
80 | |
81 | if (srh->type != 4) |
82 | return NULL; |
83 | |
84 | return srh; |
85 | } |
86 | |
87 | static __always_inline |
88 | int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, |
89 | uint32_t old_pad, uint32_t pad_off) |
90 | { |
91 | int err; |
92 | |
93 | if (new_pad != old_pad) { |
94 | err = bpf_lwt_seg6_adjust_srh(skb, pad_off, |
95 | (int) new_pad - (int) old_pad); |
96 | if (err) |
97 | return err; |
98 | } |
99 | |
100 | if (new_pad > 0) { |
101 | char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
102 | 0, 0, 0}; |
103 | struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; |
104 | |
105 | pad_tlv->type = SR6_TLV_PADDING; |
106 | pad_tlv->len = new_pad - 2; |
107 | |
108 | err = bpf_lwt_seg6_store_bytes(skb, pad_off, |
109 | (void *)pad_tlv_buf, new_pad); |
110 | if (err) |
111 | return err; |
112 | } |
113 | |
114 | return 0; |
115 | } |
116 | |
117 | static __always_inline |
118 | int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, |
119 | uint32_t *tlv_off, uint32_t *pad_size, |
120 | uint32_t *pad_off) |
121 | { |
122 | uint32_t srh_off, cur_off; |
123 | int offset_valid = 0; |
124 | int err; |
125 | |
126 | srh_off = (char *)srh - (char *)(long)skb->data; |
127 | // cur_off = end of segments, start of possible TLVs |
128 | cur_off = srh_off + sizeof(*srh) + |
129 | sizeof(struct ip6_addr_t) * (srh->first_segment + 1); |
130 | |
131 | *pad_off = 0; |
132 | |
133 | // we can only go as far as ~10 TLVs due to the BPF max stack size |
134 | #pragma clang loop unroll(full) |
135 | for (int i = 0; i < 10; i++) { |
136 | struct sr6_tlv_t tlv; |
137 | |
138 | if (cur_off == *tlv_off) |
139 | offset_valid = 1; |
140 | |
141 | if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) |
142 | break; |
143 | |
144 | err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); |
145 | if (err) |
146 | return err; |
147 | |
148 | if (tlv.type == SR6_TLV_PADDING) { |
149 | *pad_size = tlv.len + sizeof(tlv); |
150 | *pad_off = cur_off; |
151 | |
152 | if (*tlv_off == srh_off) { |
153 | *tlv_off = cur_off; |
154 | offset_valid = 1; |
155 | } |
156 | break; |
157 | |
158 | } else if (tlv.type == SR6_TLV_HMAC) { |
159 | break; |
160 | } |
161 | |
162 | cur_off += sizeof(tlv) + tlv.len; |
163 | } // we reached the padding or HMAC TLVs, or the end of the SRH |
164 | |
165 | if (*pad_off == 0) |
166 | *pad_off = cur_off; |
167 | |
168 | if (*tlv_off == -1) |
169 | *tlv_off = cur_off; |
170 | else if (!offset_valid) |
171 | return -EINVAL; |
172 | |
173 | return 0; |
174 | } |
175 | |
176 | static __always_inline |
177 | int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, |
178 | struct sr6_tlv_t *itlv, uint8_t tlv_size) |
179 | { |
180 | uint32_t srh_off = (char *)srh - (char *)(long)skb->data; |
181 | uint8_t len_remaining, new_pad; |
182 | uint32_t pad_off = 0; |
183 | uint32_t pad_size = 0; |
184 | uint32_t partial_srh_len; |
185 | int err; |
186 | |
187 | if (tlv_off != -1) |
188 | tlv_off += srh_off; |
189 | |
190 | if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) |
191 | return -EINVAL; |
192 | |
193 | err = is_valid_tlv_boundary(skb, srh, tlv_off: &tlv_off, pad_size: &pad_size, pad_off: &pad_off); |
194 | if (err) |
195 | return err; |
196 | |
197 | err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); |
198 | if (err) |
199 | return err; |
200 | |
201 | err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); |
202 | if (err) |
203 | return err; |
204 | |
205 | // the following can't be moved inside update_tlv_pad because the |
206 | // bpf verifier has some issues with it |
207 | pad_off += sizeof(*itlv) + itlv->len; |
208 | partial_srh_len = pad_off - srh_off; |
209 | len_remaining = partial_srh_len % 8; |
210 | new_pad = 8 - len_remaining; |
211 | |
212 | if (new_pad == 1) // cannot pad for 1 byte only |
213 | new_pad = 9; |
214 | else if (new_pad == 8) |
215 | new_pad = 0; |
216 | |
217 | return update_tlv_pad(skb, new_pad, old_pad: pad_size, pad_off); |
218 | } |
219 | |
220 | static __always_inline |
221 | int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, |
222 | uint32_t tlv_off) |
223 | { |
224 | uint32_t srh_off = (char *)srh - (char *)(long)skb->data; |
225 | uint8_t len_remaining, new_pad; |
226 | uint32_t partial_srh_len; |
227 | uint32_t pad_off = 0; |
228 | uint32_t pad_size = 0; |
229 | struct sr6_tlv_t tlv; |
230 | int err; |
231 | |
232 | tlv_off += srh_off; |
233 | |
234 | err = is_valid_tlv_boundary(skb, srh, tlv_off: &tlv_off, pad_size: &pad_size, pad_off: &pad_off); |
235 | if (err) |
236 | return err; |
237 | |
238 | err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); |
239 | if (err) |
240 | return err; |
241 | |
242 | err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); |
243 | if (err) |
244 | return err; |
245 | |
246 | pad_off -= sizeof(tlv) + tlv.len; |
247 | partial_srh_len = pad_off - srh_off; |
248 | len_remaining = partial_srh_len % 8; |
249 | new_pad = 8 - len_remaining; |
250 | if (new_pad == 1) // cannot pad for 1 byte only |
251 | new_pad = 9; |
252 | else if (new_pad == 8) |
253 | new_pad = 0; |
254 | |
255 | return update_tlv_pad(skb, new_pad, old_pad: pad_size, pad_off); |
256 | } |
257 | |
258 | static __always_inline |
259 | int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) |
260 | { |
261 | int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + |
262 | ((srh->first_segment + 1) << 4); |
263 | struct sr6_tlv_t tlv; |
264 | |
265 | if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) |
266 | return 0; |
267 | |
268 | if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { |
269 | struct ip6_addr_t egr_addr; |
270 | |
271 | if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) |
272 | return 0; |
273 | |
274 | // check if egress TLV value is correct |
275 | if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 && |
276 | bpf_be64_to_cpu(egr_addr.lo) == 0x4) |
277 | return 1; |
278 | } |
279 | |
280 | return 0; |
281 | } |
282 | |
283 | // This function will push a SRH with segments fd00::1, fd00::2, fd00::3, |
284 | // fd00::4 |
285 | SEC("encap_srh" ) |
286 | int __encap_srh(struct __sk_buff *skb) |
287 | { |
288 | unsigned long long hi = 0xfd00000000000000; |
289 | struct ip6_addr_t *seg; |
290 | struct ip6_srh_t *srh; |
291 | char srh_buf[72]; // room for 4 segments |
292 | int err; |
293 | |
294 | srh = (struct ip6_srh_t *)srh_buf; |
295 | srh->nexthdr = 0; |
296 | srh->hdrlen = 8; |
297 | srh->type = 4; |
298 | srh->segments_left = 3; |
299 | srh->first_segment = 3; |
300 | srh->flags = 0; |
301 | srh->tag = 0; |
302 | |
303 | seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); |
304 | |
305 | #pragma clang loop unroll(full) |
306 | for (unsigned long long lo = 0; lo < 4; lo++) { |
307 | seg->lo = bpf_cpu_to_be64(4 - lo); |
308 | seg->hi = bpf_cpu_to_be64(hi); |
309 | seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); |
310 | } |
311 | |
312 | err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); |
313 | if (err) |
314 | return BPF_DROP; |
315 | |
316 | return BPF_REDIRECT; |
317 | } |
318 | |
319 | // Add an Egress TLV fc00::4, add the flag A, |
320 | // and apply End.X action to fc42::1 |
321 | SEC("add_egr_x" ) |
322 | int __add_egr_x(struct __sk_buff *skb) |
323 | { |
324 | unsigned long long hi = 0xfc42000000000000; |
325 | unsigned long long lo = 0x1; |
326 | struct ip6_srh_t *srh = get_srh(skb); |
327 | uint8_t new_flags = SR6_FLAG_ALERT; |
328 | struct ip6_addr_t addr; |
329 | int err, offset; |
330 | |
331 | if (srh == NULL) |
332 | return BPF_DROP; |
333 | |
334 | uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
335 | 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; |
336 | |
337 | err = add_tlv(skb, srh, tlv_off: (srh->hdrlen+1) << 3, |
338 | itlv: (struct sr6_tlv_t *)&tlv, tlv_size: 20); |
339 | if (err) |
340 | return BPF_DROP; |
341 | |
342 | offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); |
343 | err = bpf_lwt_seg6_store_bytes(skb, offset, |
344 | (void *)&new_flags, sizeof(new_flags)); |
345 | if (err) |
346 | return BPF_DROP; |
347 | |
348 | addr.lo = bpf_cpu_to_be64(lo); |
349 | addr.hi = bpf_cpu_to_be64(hi); |
350 | err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, |
351 | (void *)&addr, sizeof(addr)); |
352 | if (err) |
353 | return BPF_DROP; |
354 | return BPF_REDIRECT; |
355 | } |
356 | |
357 | // Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a |
358 | // simple End action |
359 | SEC("pop_egr" ) |
360 | int __pop_egr(struct __sk_buff *skb) |
361 | { |
362 | struct ip6_srh_t *srh = get_srh(skb); |
363 | uint16_t new_tag = bpf_htons(2442); |
364 | uint8_t new_flags = 0; |
365 | int err, offset; |
366 | |
367 | if (srh == NULL) |
368 | return BPF_DROP; |
369 | |
370 | if (srh->flags != SR6_FLAG_ALERT) |
371 | return BPF_DROP; |
372 | |
373 | if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV |
374 | return BPF_DROP; |
375 | |
376 | if (!has_egr_tlv(skb, srh)) |
377 | return BPF_DROP; |
378 | |
379 | err = delete_tlv(skb, srh, tlv_off: 8 + (srh->first_segment + 1) * 16); |
380 | if (err) |
381 | return BPF_DROP; |
382 | |
383 | offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); |
384 | if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, |
385 | sizeof(new_flags))) |
386 | return BPF_DROP; |
387 | |
388 | offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); |
389 | if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, |
390 | sizeof(new_tag))) |
391 | return BPF_DROP; |
392 | |
393 | return BPF_OK; |
394 | } |
395 | |
396 | // Inspect if the Egress TLV and flag have been removed, if the tag is correct, |
397 | // then apply a End.T action to reach the last segment |
398 | SEC("inspect_t" ) |
399 | int __inspect_t(struct __sk_buff *skb) |
400 | { |
401 | struct ip6_srh_t *srh = get_srh(skb); |
402 | int table = 117; |
403 | int err; |
404 | |
405 | if (srh == NULL) |
406 | return BPF_DROP; |
407 | |
408 | if (srh->flags != 0) |
409 | return BPF_DROP; |
410 | |
411 | if (srh->tag != bpf_htons(2442)) |
412 | return BPF_DROP; |
413 | |
414 | if (srh->hdrlen != 8) // 4 segments |
415 | return BPF_DROP; |
416 | |
417 | err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, |
418 | (void *)&table, sizeof(table)); |
419 | |
420 | if (err) |
421 | return BPF_DROP; |
422 | |
423 | return BPF_REDIRECT; |
424 | } |
425 | |
426 | char __license[] SEC("license" ) = "GPL" ; |
427 | |