1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * (C) 2015 Red Hat GmbH |
4 | * Author: Florian Westphal <fw@strlen.de> |
5 | */ |
6 | |
7 | #include <linux/module.h> |
8 | #include <linux/static_key.h> |
9 | #include <linux/hash.h> |
10 | #include <linux/siphash.h> |
11 | #include <linux/if_vlan.h> |
12 | #include <linux/init.h> |
13 | #include <linux/skbuff.h> |
14 | #include <linux/netlink.h> |
15 | #include <linux/netfilter.h> |
16 | #include <linux/netfilter/nfnetlink.h> |
17 | #include <linux/netfilter/nf_tables.h> |
18 | #include <net/netfilter/nf_tables_core.h> |
19 | #include <net/netfilter/nf_tables.h> |
20 | |
21 | #define NFT_TRACETYPE_LL_HSIZE 20 |
22 | #define NFT_TRACETYPE_NETWORK_HSIZE 40 |
23 | #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 |
24 | |
25 | DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); |
26 | EXPORT_SYMBOL_GPL(nft_trace_enabled); |
27 | |
28 | static int (struct sk_buff *nlskb, u16 type, |
29 | const struct sk_buff *skb, |
30 | int off, unsigned int len) |
31 | { |
32 | struct nlattr *nla; |
33 | |
34 | if (len == 0) |
35 | return 0; |
36 | |
37 | nla = nla_reserve(skb: nlskb, attrtype: type, attrlen: len); |
38 | if (!nla || skb_copy_bits(skb, offset: off, to: nla_data(nla), len)) |
39 | return -1; |
40 | |
41 | return 0; |
42 | } |
43 | |
44 | static int (struct sk_buff *nlskb, |
45 | const struct sk_buff *skb) |
46 | { |
47 | struct vlan_ethhdr veth; |
48 | int off; |
49 | |
50 | BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); |
51 | |
52 | off = skb_mac_header(skb) - skb->data; |
53 | if (off != -ETH_HLEN) |
54 | return -1; |
55 | |
56 | if (skb_copy_bits(skb, offset: off, to: &veth, ETH_HLEN)) |
57 | return -1; |
58 | |
59 | veth.h_vlan_proto = skb->vlan_proto; |
60 | veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); |
61 | veth.h_vlan_encapsulated_proto = skb->protocol; |
62 | |
63 | return nla_put(skb: nlskb, attrtype: NFTA_TRACE_LL_HEADER, attrlen: sizeof(veth), data: &veth); |
64 | } |
65 | |
66 | static int nf_trace_fill_dev_info(struct sk_buff *nlskb, |
67 | const struct net_device *indev, |
68 | const struct net_device *outdev) |
69 | { |
70 | if (indev) { |
71 | if (nla_put_be32(skb: nlskb, attrtype: NFTA_TRACE_IIF, |
72 | htonl(indev->ifindex))) |
73 | return -1; |
74 | |
75 | if (nla_put_be16(skb: nlskb, attrtype: NFTA_TRACE_IIFTYPE, |
76 | htons(indev->type))) |
77 | return -1; |
78 | } |
79 | |
80 | if (outdev) { |
81 | if (nla_put_be32(skb: nlskb, attrtype: NFTA_TRACE_OIF, |
82 | htonl(outdev->ifindex))) |
83 | return -1; |
84 | |
85 | if (nla_put_be16(skb: nlskb, attrtype: NFTA_TRACE_OIFTYPE, |
86 | htons(outdev->type))) |
87 | return -1; |
88 | } |
89 | |
90 | return 0; |
91 | } |
92 | |
93 | static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, |
94 | const struct nft_pktinfo *pkt) |
95 | { |
96 | const struct sk_buff *skb = pkt->skb; |
97 | int off = skb_network_offset(skb); |
98 | unsigned int len, nh_end; |
99 | |
100 | nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len; |
101 | len = min_t(unsigned int, nh_end - skb_network_offset(skb), |
102 | NFT_TRACETYPE_NETWORK_HSIZE); |
103 | if (trace_fill_header(nlskb, type: NFTA_TRACE_NETWORK_HEADER, skb, off, len)) |
104 | return -1; |
105 | |
106 | if (pkt->flags & NFT_PKTINFO_L4PROTO) { |
107 | len = min_t(unsigned int, skb->len - nft_thoff(pkt), |
108 | NFT_TRACETYPE_TRANSPORT_HSIZE); |
109 | if (trace_fill_header(nlskb, type: NFTA_TRACE_TRANSPORT_HEADER, skb, |
110 | off: nft_thoff(pkt), len)) |
111 | return -1; |
112 | } |
113 | |
114 | if (!skb_mac_header_was_set(skb)) |
115 | return 0; |
116 | |
117 | if (skb_vlan_tag_get(skb)) |
118 | return nf_trace_fill_ll_header(nlskb, skb); |
119 | |
120 | off = skb_mac_header(skb) - skb->data; |
121 | len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); |
122 | return trace_fill_header(nlskb, type: NFTA_TRACE_LL_HEADER, |
123 | skb, off, len); |
124 | } |
125 | |
126 | static int nf_trace_fill_rule_info(struct sk_buff *nlskb, |
127 | const struct nft_verdict *verdict, |
128 | const struct nft_rule_dp *rule, |
129 | const struct nft_traceinfo *info) |
130 | { |
131 | if (!rule || rule->is_last) |
132 | return 0; |
133 | |
134 | /* a continue verdict with ->type == RETURN means that this is |
135 | * an implicit return (end of chain reached). |
136 | * |
137 | * Since no rule matched, the ->rule pointer is invalid. |
138 | */ |
139 | if (info->type == NFT_TRACETYPE_RETURN && |
140 | verdict->code == NFT_CONTINUE) |
141 | return 0; |
142 | |
143 | return nla_put_be64(skb: nlskb, attrtype: NFTA_TRACE_RULE_HANDLE, |
144 | cpu_to_be64(rule->handle), |
145 | padattr: NFTA_TRACE_PAD); |
146 | } |
147 | |
148 | static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict, |
149 | struct nft_traceinfo *info) |
150 | { |
151 | switch (info->type) { |
152 | case NFT_TRACETYPE_RETURN: |
153 | case NFT_TRACETYPE_RULE: |
154 | break; |
155 | default: |
156 | return false; |
157 | } |
158 | |
159 | switch (verdict->code) { |
160 | case NFT_JUMP: |
161 | case NFT_GOTO: |
162 | break; |
163 | default: |
164 | return false; |
165 | } |
166 | |
167 | return true; |
168 | } |
169 | |
170 | static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule, |
171 | const struct nft_traceinfo *info) |
172 | { |
173 | const struct nft_rule_dp_last *last; |
174 | |
175 | if (!rule) |
176 | return &info->basechain->chain; |
177 | |
178 | while (!rule->is_last) |
179 | rule = nft_rule_next(rule); |
180 | |
181 | last = (const struct nft_rule_dp_last *)rule; |
182 | |
183 | if (WARN_ON_ONCE(!last->chain)) |
184 | return &info->basechain->chain; |
185 | |
186 | return last->chain; |
187 | } |
188 | |
189 | void nft_trace_notify(const struct nft_pktinfo *pkt, |
190 | const struct nft_verdict *verdict, |
191 | const struct nft_rule_dp *rule, |
192 | struct nft_traceinfo *info) |
193 | { |
194 | const struct nft_chain *chain; |
195 | struct nlmsghdr *nlh; |
196 | struct sk_buff *skb; |
197 | unsigned int size; |
198 | u32 mark = 0; |
199 | u16 event; |
200 | |
201 | if (!nfnetlink_has_listeners(net: nft_net(pkt), NFNLGRP_NFTRACE)) |
202 | return; |
203 | |
204 | chain = nft_trace_get_chain(rule, info); |
205 | |
206 | size = nlmsg_total_size(payload: sizeof(struct nfgenmsg)) + |
207 | nla_total_size(strlen(chain->table->name)) + |
208 | nla_total_size(strlen(chain->name)) + |
209 | nla_total_size_64bit(payload: sizeof(__be64)) + /* rule handle */ |
210 | nla_total_size(payload: sizeof(__be32)) + /* trace type */ |
211 | nla_total_size(payload: 0) + /* VERDICT, nested */ |
212 | nla_total_size(payload: sizeof(u32)) + /* verdict code */ |
213 | nla_total_size(payload: sizeof(u32)) + /* id */ |
214 | nla_total_size(NFT_TRACETYPE_LL_HSIZE) + |
215 | nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + |
216 | nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + |
217 | nla_total_size(payload: sizeof(u32)) + /* iif */ |
218 | nla_total_size(payload: sizeof(__be16)) + /* iiftype */ |
219 | nla_total_size(payload: sizeof(u32)) + /* oif */ |
220 | nla_total_size(payload: sizeof(__be16)) + /* oiftype */ |
221 | nla_total_size(payload: sizeof(u32)) + /* mark */ |
222 | nla_total_size(payload: sizeof(u32)) + /* nfproto */ |
223 | nla_total_size(payload: sizeof(u32)); /* policy */ |
224 | |
225 | if (nft_trace_have_verdict_chain(verdict, info)) |
226 | size += nla_total_size(strlen(verdict->chain->name)); /* jump target */ |
227 | |
228 | skb = nlmsg_new(payload: size, GFP_ATOMIC); |
229 | if (!skb) |
230 | return; |
231 | |
232 | event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, msg_type: NFT_MSG_TRACE); |
233 | nlh = nfnl_msg_put(skb, portid: 0, seq: 0, type: event, flags: 0, family: info->basechain->type->family, |
234 | NFNETLINK_V0, res_id: 0); |
235 | if (!nlh) |
236 | goto nla_put_failure; |
237 | |
238 | if (nla_put_be32(skb, attrtype: NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) |
239 | goto nla_put_failure; |
240 | |
241 | if (nla_put_be32(skb, attrtype: NFTA_TRACE_TYPE, htonl(info->type))) |
242 | goto nla_put_failure; |
243 | |
244 | if (nla_put_u32(skb, attrtype: NFTA_TRACE_ID, value: info->skbid)) |
245 | goto nla_put_failure; |
246 | |
247 | if (nla_put_string(skb, attrtype: NFTA_TRACE_CHAIN, str: chain->name)) |
248 | goto nla_put_failure; |
249 | |
250 | if (nla_put_string(skb, attrtype: NFTA_TRACE_TABLE, str: chain->table->name)) |
251 | goto nla_put_failure; |
252 | |
253 | if (nf_trace_fill_rule_info(nlskb: skb, verdict, rule, info)) |
254 | goto nla_put_failure; |
255 | |
256 | switch (info->type) { |
257 | case NFT_TRACETYPE_UNSPEC: |
258 | case __NFT_TRACETYPE_MAX: |
259 | break; |
260 | case NFT_TRACETYPE_RETURN: |
261 | case NFT_TRACETYPE_RULE: { |
262 | unsigned int v; |
263 | |
264 | if (nft_verdict_dump(skb, type: NFTA_TRACE_VERDICT, v: verdict)) |
265 | goto nla_put_failure; |
266 | |
267 | /* pkt->skb undefined iff NF_STOLEN, disable dump */ |
268 | v = verdict->code & NF_VERDICT_MASK; |
269 | if (v == NF_STOLEN) |
270 | info->packet_dumped = true; |
271 | else |
272 | mark = pkt->skb->mark; |
273 | |
274 | break; |
275 | } |
276 | case NFT_TRACETYPE_POLICY: |
277 | mark = pkt->skb->mark; |
278 | |
279 | if (nla_put_be32(skb, attrtype: NFTA_TRACE_POLICY, |
280 | htonl(info->basechain->policy))) |
281 | goto nla_put_failure; |
282 | break; |
283 | } |
284 | |
285 | if (mark && nla_put_be32(skb, attrtype: NFTA_TRACE_MARK, htonl(mark))) |
286 | goto nla_put_failure; |
287 | |
288 | if (!info->packet_dumped) { |
289 | if (nf_trace_fill_dev_info(nlskb: skb, indev: nft_in(pkt), outdev: nft_out(pkt))) |
290 | goto nla_put_failure; |
291 | |
292 | if (nf_trace_fill_pkt_info(nlskb: skb, pkt)) |
293 | goto nla_put_failure; |
294 | info->packet_dumped = true; |
295 | } |
296 | |
297 | nlmsg_end(skb, nlh); |
298 | nfnetlink_send(skb, net: nft_net(pkt), portid: 0, NFNLGRP_NFTRACE, echo: 0, GFP_ATOMIC); |
299 | return; |
300 | |
301 | nla_put_failure: |
302 | WARN_ON_ONCE(1); |
303 | kfree_skb(skb); |
304 | } |
305 | |
306 | void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, |
307 | const struct nft_chain *chain) |
308 | { |
309 | static siphash_key_t trace_key __read_mostly; |
310 | struct sk_buff *skb = pkt->skb; |
311 | |
312 | info->basechain = nft_base_chain(chain); |
313 | info->trace = true; |
314 | info->nf_trace = pkt->skb->nf_trace; |
315 | info->packet_dumped = false; |
316 | |
317 | net_get_random_once(&trace_key, sizeof(trace_key)); |
318 | |
319 | info->skbid = (u32)siphash_3u32(a: hash32_ptr(ptr: skb), |
320 | b: skb_get_hash(skb), |
321 | c: skb->skb_iif, |
322 | key: &trace_key); |
323 | } |
324 | |