1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <uapi/linux/bpf.h>
8#include <uapi/linux/in.h>
9#include <uapi/linux/if.h>
10#include <uapi/linux/if_ether.h>
11#include <uapi/linux/ip.h>
12#include <uapi/linux/ipv6.h>
13#include <uapi/linux/if_tunnel.h>
14#include <uapi/linux/mpls.h>
15#include <bpf/bpf_helpers.h>
16#include "bpf_legacy.h"
17#define IP_MF 0x2000
18#define IP_OFFSET 0x1FFF
19
20#define PARSE_VLAN 1
21#define PARSE_MPLS 2
22#define PARSE_IP 3
23#define PARSE_IPV6 4
24
25struct vlan_hdr {
26 __be16 h_vlan_TCI;
27 __be16 h_vlan_encapsulated_proto;
28};
29
30struct flow_key_record {
31 __be32 src;
32 __be32 dst;
33 union {
34 __be32 ports;
35 __be16 port16[2];
36 };
37 __u32 ip_proto;
38};
39
40static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
41
42static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
43{
44 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
45 & (IP_MF | IP_OFFSET);
46}
47
48static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
49{
50 __u64 w0 = load_word(ctx, off);
51 __u64 w1 = load_word(ctx, off + 4);
52 __u64 w2 = load_word(ctx, off + 8);
53 __u64 w3 = load_word(ctx, off + 12);
54
55 return (__u32)(w0 ^ w1 ^ w2 ^ w3);
56}
57
58struct globals {
59 struct flow_key_record flow;
60};
61
62struct {
63 __uint(type, BPF_MAP_TYPE_ARRAY);
64 __type(key, __u32);
65 __type(value, struct globals);
66 __uint(max_entries, 32);
67} percpu_map SEC(".maps");
68
69/* user poor man's per_cpu until native support is ready */
70static struct globals *this_cpu_globals(void)
71{
72 u32 key = bpf_get_smp_processor_id();
73
74 return bpf_map_lookup_elem(&percpu_map, &key);
75}
76
77/* some simple stats for user space consumption */
78struct pair {
79 __u64 packets;
80 __u64 bytes;
81};
82
83struct {
84 __uint(type, BPF_MAP_TYPE_HASH);
85 __type(key, struct flow_key_record);
86 __type(value, struct pair);
87 __uint(max_entries, 1024);
88} hash_map SEC(".maps");
89
90static void update_stats(struct __sk_buff *skb, struct globals *g)
91{
92 struct flow_key_record key = g->flow;
93 struct pair *value;
94
95 value = bpf_map_lookup_elem(&hash_map, &key);
96 if (value) {
97 __sync_fetch_and_add(&value->packets, 1);
98 __sync_fetch_and_add(&value->bytes, skb->len);
99 } else {
100 struct pair val = {1, skb->len};
101
102 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
103 }
104}
105
106static __always_inline void parse_ip_proto(struct __sk_buff *skb,
107 struct globals *g, __u32 ip_proto)
108{
109 __u32 nhoff = skb->cb[0];
110 int poff;
111
112 switch (ip_proto) {
113 case IPPROTO_GRE: {
114 struct gre_hdr {
115 __be16 flags;
116 __be16 proto;
117 };
118
119 __u32 gre_flags = load_half(skb,
120 nhoff + offsetof(struct gre_hdr, flags));
121 __u32 gre_proto = load_half(skb,
122 nhoff + offsetof(struct gre_hdr, proto));
123
124 if (gre_flags & (GRE_VERSION|GRE_ROUTING))
125 break;
126
127 nhoff += 4;
128 if (gre_flags & GRE_CSUM)
129 nhoff += 4;
130 if (gre_flags & GRE_KEY)
131 nhoff += 4;
132 if (gre_flags & GRE_SEQ)
133 nhoff += 4;
134
135 skb->cb[0] = nhoff;
136 parse_eth_proto(skb, gre_proto);
137 break;
138 }
139 case IPPROTO_IPIP:
140 parse_eth_proto(skb, ETH_P_IP);
141 break;
142 case IPPROTO_IPV6:
143 parse_eth_proto(skb, ETH_P_IPV6);
144 break;
145 case IPPROTO_TCP:
146 case IPPROTO_UDP:
147 g->flow.ports = load_word(skb, nhoff);
148 case IPPROTO_ICMP:
149 g->flow.ip_proto = ip_proto;
150 update_stats(skb, g);
151 break;
152 default:
153 break;
154 }
155}
156
157SEC("socket")
158int bpf_func_ip(struct __sk_buff *skb)
159{
160 struct globals *g = this_cpu_globals();
161 __u32 nhoff, verlen, ip_proto;
162
163 if (!g)
164 return 0;
165
166 nhoff = skb->cb[0];
167
168 if (unlikely(ip_is_fragment(skb, nhoff)))
169 return 0;
170
171 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
172
173 if (ip_proto != IPPROTO_GRE) {
174 g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
175 g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
176 }
177
178 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
179 nhoff += (verlen & 0xF) << 2;
180
181 skb->cb[0] = nhoff;
182 parse_ip_proto(skb, g, ip_proto);
183 return 0;
184}
185
186SEC("socket")
187int bpf_func_ipv6(struct __sk_buff *skb)
188{
189 struct globals *g = this_cpu_globals();
190 __u32 nhoff, ip_proto;
191
192 if (!g)
193 return 0;
194
195 nhoff = skb->cb[0];
196
197 ip_proto = load_byte(skb,
198 nhoff + offsetof(struct ipv6hdr, nexthdr));
199 g->flow.src = ipv6_addr_hash(skb,
200 nhoff + offsetof(struct ipv6hdr, saddr));
201 g->flow.dst = ipv6_addr_hash(skb,
202 nhoff + offsetof(struct ipv6hdr, daddr));
203 nhoff += sizeof(struct ipv6hdr);
204
205 skb->cb[0] = nhoff;
206 parse_ip_proto(skb, g, ip_proto);
207 return 0;
208}
209
210SEC("socket")
211int bpf_func_vlan(struct __sk_buff *skb)
212{
213 __u32 nhoff, proto;
214
215 nhoff = skb->cb[0];
216
217 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
218 h_vlan_encapsulated_proto));
219 nhoff += sizeof(struct vlan_hdr);
220 skb->cb[0] = nhoff;
221
222 parse_eth_proto(skb, proto);
223
224 return 0;
225}
226
227SEC("socket")
228int bpf_func_mpls(struct __sk_buff *skb)
229{
230 __u32 nhoff, label;
231
232 nhoff = skb->cb[0];
233
234 label = load_word(skb, nhoff);
235 nhoff += sizeof(struct mpls_label);
236 skb->cb[0] = nhoff;
237
238 if (label & MPLS_LS_S_MASK) {
239 __u8 verlen = load_byte(skb, nhoff);
240 if ((verlen & 0xF0) == 4)
241 parse_eth_proto(skb, ETH_P_IP);
242 else
243 parse_eth_proto(skb, ETH_P_IPV6);
244 } else {
245 parse_eth_proto(skb, ETH_P_MPLS_UC);
246 }
247
248 return 0;
249}
250
251struct {
252 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
253 __uint(key_size, sizeof(u32));
254 __uint(max_entries, 8);
255 __array(values, u32 (void *));
256} prog_array_init SEC(".maps") = {
257 .values = {
258 [PARSE_VLAN] = (void *)&bpf_func_vlan,
259 [PARSE_IP] = (void *)&bpf_func_ip,
260 [PARSE_IPV6] = (void *)&bpf_func_ipv6,
261 [PARSE_MPLS] = (void *)&bpf_func_mpls,
262 },
263};
264
265/* Protocol dispatch routine. It tail-calls next BPF program depending
266 * on eth proto. Note, we could have used ...
267 *
268 * bpf_tail_call(skb, &prog_array_init, proto);
269 *
270 * ... but it would need large prog_array and cannot be optimised given
271 * the map key is not static.
272 */
273static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
274{
275 switch (proto) {
276 case ETH_P_8021Q:
277 case ETH_P_8021AD:
278 bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
279 break;
280 case ETH_P_MPLS_UC:
281 case ETH_P_MPLS_MC:
282 bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
283 break;
284 case ETH_P_IP:
285 bpf_tail_call(skb, &prog_array_init, PARSE_IP);
286 break;
287 case ETH_P_IPV6:
288 bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
289 break;
290 }
291}
292
293SEC("socket")
294int main_prog(struct __sk_buff *skb)
295{
296 __u32 nhoff = ETH_HLEN;
297 __u32 proto = load_half(skb, 12);
298
299 skb->cb[0] = nhoff;
300 parse_eth_proto(skb, proto);
301 return 0;
302}
303
304char _license[] SEC("license") = "GPL";
305

source code of linux/samples/bpf/sockex3_kern.c