1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * Copyright(c) 2018 Jesper Dangaard Brouer. |
3 | * |
4 | * XDP/TC VLAN manipulation example |
5 | * |
6 | * GOTCHA: Remember to disable NIC hardware offloading of VLANs, |
7 | * else the VLAN tags are NOT inlined in the packet payload: |
8 | * |
9 | * # ethtool -K ixgbe2 rxvlan off |
10 | * |
11 | * Verify setting: |
12 | * # ethtool -k ixgbe2 | grep rx-vlan-offload |
13 | * rx-vlan-offload: off |
14 | * |
15 | */ |
16 | #include <stddef.h> |
17 | #include <stdbool.h> |
18 | #include <string.h> |
19 | #include <linux/bpf.h> |
20 | #include <linux/if_ether.h> |
21 | #include <linux/if_vlan.h> |
22 | #include <linux/in.h> |
23 | #include <linux/pkt_cls.h> |
24 | |
25 | #include <bpf/bpf_helpers.h> |
26 | #include <bpf/bpf_endian.h> |
27 | |
28 | /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here |
29 | * |
30 | * struct vlan_hdr - vlan header |
31 | * @h_vlan_TCI: priority and VLAN ID |
32 | * @h_vlan_encapsulated_proto: packet type ID or len |
33 | */ |
34 | struct _vlan_hdr { |
35 | __be16 h_vlan_TCI; |
36 | __be16 h_vlan_encapsulated_proto; |
37 | }; |
38 | #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ |
39 | #define VLAN_PRIO_SHIFT 13 |
40 | #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ |
41 | #define VLAN_TAG_PRESENT VLAN_CFI_MASK |
42 | #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ |
43 | #define VLAN_N_VID 4096 |
44 | |
45 | struct parse_pkt { |
46 | __u16 l3_proto; |
47 | __u16 l3_offset; |
48 | __u16 vlan_outer; |
49 | __u16 vlan_inner; |
50 | __u8 vlan_outer_offset; |
51 | __u8 vlan_inner_offset; |
52 | }; |
53 | |
54 | char _license[] SEC("license" ) = "GPL" ; |
55 | |
56 | static __always_inline |
57 | bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) |
58 | { |
59 | __u16 eth_type; |
60 | __u8 offset; |
61 | |
62 | offset = sizeof(*eth); |
63 | /* Make sure packet is large enough for parsing eth + 2 VLAN headers */ |
64 | if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end) |
65 | return false; |
66 | |
67 | eth_type = eth->h_proto; |
68 | |
69 | /* Handle outer VLAN tag */ |
70 | if (eth_type == bpf_htons(ETH_P_8021Q) |
71 | || eth_type == bpf_htons(ETH_P_8021AD)) { |
72 | struct _vlan_hdr *vlan_hdr; |
73 | |
74 | vlan_hdr = (void *)eth + offset; |
75 | pkt->vlan_outer_offset = offset; |
76 | pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI) |
77 | & VLAN_VID_MASK; |
78 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; |
79 | offset += sizeof(*vlan_hdr); |
80 | } |
81 | |
82 | /* Handle inner (double) VLAN tag */ |
83 | if (eth_type == bpf_htons(ETH_P_8021Q) |
84 | || eth_type == bpf_htons(ETH_P_8021AD)) { |
85 | struct _vlan_hdr *vlan_hdr; |
86 | |
87 | vlan_hdr = (void *)eth + offset; |
88 | pkt->vlan_inner_offset = offset; |
89 | pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI) |
90 | & VLAN_VID_MASK; |
91 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; |
92 | offset += sizeof(*vlan_hdr); |
93 | } |
94 | |
95 | pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */ |
96 | pkt->l3_offset = offset; |
97 | |
98 | return true; |
99 | } |
100 | |
101 | /* Hint, VLANs are chosen to hit network-byte-order issues */ |
102 | #define TESTVLAN 4011 /* 0xFAB */ |
103 | // #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ |
104 | |
105 | SEC("xdp_drop_vlan_4011" ) |
106 | int xdp_prognum0(struct xdp_md *ctx) |
107 | { |
108 | void *data_end = (void *)(long)ctx->data_end; |
109 | void *data = (void *)(long)ctx->data; |
110 | struct parse_pkt pkt = { 0 }; |
111 | |
112 | if (!parse_eth_frame(eth: data, data_end, pkt: &pkt)) |
113 | return XDP_ABORTED; |
114 | |
115 | /* Drop specific VLAN ID example */ |
116 | if (pkt.vlan_outer == TESTVLAN) |
117 | return XDP_ABORTED; |
118 | /* |
119 | * Using XDP_ABORTED makes it possible to record this event, |
120 | * via tracepoint xdp:xdp_exception like: |
121 | * # perf record -a -e xdp:xdp_exception |
122 | * # perf script |
123 | */ |
124 | return XDP_PASS; |
125 | } |
126 | /* |
127 | Commands to setup VLAN on Linux to test packets gets dropped: |
128 | |
129 | export ROOTDEV=ixgbe2 |
130 | export VLANID=4011 |
131 | ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID |
132 | ip link set dev $ROOTDEV.$VLANID up |
133 | |
134 | ip link set dev $ROOTDEV mtu 1508 |
135 | ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID |
136 | |
137 | Load prog with ip tool: |
138 | |
139 | ip link set $ROOTDEV xdp off |
140 | ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011 |
141 | |
142 | */ |
143 | |
144 | /* Changing VLAN to zero, have same practical effect as removing the VLAN. */ |
145 | #define TO_VLAN 0 |
146 | |
147 | SEC("xdp_vlan_change" ) |
148 | int xdp_prognum1(struct xdp_md *ctx) |
149 | { |
150 | void *data_end = (void *)(long)ctx->data_end; |
151 | void *data = (void *)(long)ctx->data; |
152 | struct parse_pkt pkt = { 0 }; |
153 | |
154 | if (!parse_eth_frame(eth: data, data_end, pkt: &pkt)) |
155 | return XDP_ABORTED; |
156 | |
157 | /* Change specific VLAN ID */ |
158 | if (pkt.vlan_outer == TESTVLAN) { |
159 | struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset; |
160 | |
161 | /* Modifying VLAN, preserve top 4 bits */ |
162 | vlan_hdr->h_vlan_TCI = |
163 | bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000) |
164 | | TO_VLAN); |
165 | } |
166 | |
167 | return XDP_PASS; |
168 | } |
169 | |
170 | /* |
171 | * Show XDP+TC can cooperate, on creating a VLAN rewriter. |
172 | * 1. Create a XDP prog that can "pop"/remove a VLAN header. |
173 | * 2. Create a TC-bpf prog that egress can add a VLAN header. |
174 | */ |
175 | |
176 | #ifndef ETH_ALEN /* Ethernet MAC address length */ |
177 | #define ETH_ALEN 6 /* bytes */ |
178 | #endif |
179 | #define VLAN_HDR_SZ 4 /* bytes */ |
180 | |
181 | SEC("xdp_vlan_remove_outer" ) |
182 | int xdp_prognum2(struct xdp_md *ctx) |
183 | { |
184 | void *data_end = (void *)(long)ctx->data_end; |
185 | void *data = (void *)(long)ctx->data; |
186 | struct parse_pkt pkt = { 0 }; |
187 | char *dest; |
188 | |
189 | if (!parse_eth_frame(eth: data, data_end, pkt: &pkt)) |
190 | return XDP_ABORTED; |
191 | |
192 | /* Skip packet if no outer VLAN was detected */ |
193 | if (pkt.vlan_outer_offset == 0) |
194 | return XDP_PASS; |
195 | |
196 | /* Moving Ethernet header, dest overlap with src, memmove handle this */ |
197 | dest = data; |
198 | dest += VLAN_HDR_SZ; |
199 | /* |
200 | * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by |
201 | * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes |
202 | */ |
203 | __builtin_memmove(dest, data, ETH_ALEN * 2); |
204 | /* Note: LLVM built-in memmove inlining require size to be constant */ |
205 | |
206 | /* Move start of packet header seen by Linux kernel stack */ |
207 | bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); |
208 | |
209 | return XDP_PASS; |
210 | } |
211 | |
212 | static __always_inline |
213 | void shift_mac_4bytes_32bit(void *data) |
214 | { |
215 | __u32 *p = data; |
216 | |
217 | /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets |
218 | * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI. |
219 | * The vlan_hdr->h_vlan_encapsulated_proto take over role as |
220 | * ethhdr->h_proto. |
221 | */ |
222 | p[3] = p[2]; |
223 | p[2] = p[1]; |
224 | p[1] = p[0]; |
225 | } |
226 | |
227 | SEC("xdp_vlan_remove_outer2" ) |
228 | int xdp_prognum3(struct xdp_md *ctx) |
229 | { |
230 | void *data_end = (void *)(long)ctx->data_end; |
231 | void *data = (void *)(long)ctx->data; |
232 | struct ethhdr *orig_eth = data; |
233 | struct parse_pkt pkt = { 0 }; |
234 | |
235 | if (!parse_eth_frame(eth: orig_eth, data_end, pkt: &pkt)) |
236 | return XDP_ABORTED; |
237 | |
238 | /* Skip packet if no outer VLAN was detected */ |
239 | if (pkt.vlan_outer_offset == 0) |
240 | return XDP_PASS; |
241 | |
242 | /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */ |
243 | shift_mac_4bytes_32bit(data); |
244 | |
245 | /* Move start of packet header seen by Linux kernel stack */ |
246 | bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); |
247 | |
248 | return XDP_PASS; |
249 | } |
250 | |
251 | /*===================================== |
252 | * BELOW: TC-hook based ebpf programs |
253 | * ==================================== |
254 | * The TC-clsact eBPF programs (currently) need to be attach via TC commands |
255 | */ |
256 | |
257 | SEC("tc_vlan_push" ) |
258 | int _tc_progA(struct __sk_buff *ctx) |
259 | { |
260 | bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); |
261 | |
262 | return TC_ACT_OK; |
263 | } |
264 | /* |
265 | Commands to setup TC to use above bpf prog: |
266 | |
267 | export ROOTDEV=ixgbe2 |
268 | export FILE=xdp_vlan01_kern.o |
269 | |
270 | # Re-attach clsact to clear/flush existing role |
271 | tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\ |
272 | tc qdisc add dev $ROOTDEV clsact |
273 | |
274 | # Attach BPF prog EGRESS |
275 | tc filter add dev $ROOTDEV egress \ |
276 | prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push |
277 | |
278 | tc filter show dev $ROOTDEV egress |
279 | */ |
280 | |