1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * net/sched/act_sample.c - Packet sampling tc action |
4 | * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> |
5 | */ |
6 | |
7 | #include <linux/types.h> |
8 | #include <linux/kernel.h> |
9 | #include <linux/string.h> |
10 | #include <linux/errno.h> |
11 | #include <linux/skbuff.h> |
12 | #include <linux/rtnetlink.h> |
13 | #include <linux/module.h> |
14 | #include <linux/init.h> |
15 | #include <linux/gfp.h> |
16 | #include <net/net_namespace.h> |
17 | #include <net/netlink.h> |
18 | #include <net/pkt_sched.h> |
19 | #include <linux/tc_act/tc_sample.h> |
20 | #include <net/tc_act/tc_sample.h> |
21 | #include <net/psample.h> |
22 | #include <net/pkt_cls.h> |
23 | #include <net/tc_wrapper.h> |
24 | |
25 | #include <linux/if_arp.h> |
26 | |
27 | static struct tc_action_ops act_sample_ops; |
28 | |
29 | static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { |
30 | [TCA_SAMPLE_PARMS] = { .len = sizeof(struct tc_sample) }, |
31 | [TCA_SAMPLE_RATE] = { .type = NLA_U32 }, |
32 | [TCA_SAMPLE_TRUNC_SIZE] = { .type = NLA_U32 }, |
33 | [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NLA_U32 }, |
34 | }; |
35 | |
36 | static int tcf_sample_init(struct net *net, struct nlattr *nla, |
37 | struct nlattr *est, struct tc_action **a, |
38 | struct tcf_proto *tp, |
39 | u32 flags, struct netlink_ext_ack *extack) |
40 | { |
41 | struct tc_action_net *tn = net_generic(net, id: act_sample_ops.net_id); |
42 | bool bind = flags & TCA_ACT_FLAGS_BIND; |
43 | struct nlattr *tb[TCA_SAMPLE_MAX + 1]; |
44 | struct psample_group *psample_group; |
45 | u32 psample_group_num, rate, index; |
46 | struct tcf_chain *goto_ch = NULL; |
47 | struct tc_sample *parm; |
48 | struct tcf_sample *s; |
49 | bool exists = false; |
50 | int ret, err; |
51 | |
52 | if (!nla) |
53 | return -EINVAL; |
54 | ret = nla_parse_nested_deprecated(tb, TCA_SAMPLE_MAX, nla, |
55 | policy: sample_policy, NULL); |
56 | if (ret < 0) |
57 | return ret; |
58 | |
59 | if (!tb[TCA_SAMPLE_PARMS]) |
60 | return -EINVAL; |
61 | |
62 | parm = nla_data(nla: tb[TCA_SAMPLE_PARMS]); |
63 | index = parm->index; |
64 | err = tcf_idr_check_alloc(tn, index: &index, a, bind); |
65 | if (err < 0) |
66 | return err; |
67 | exists = err; |
68 | if (exists && bind) |
69 | return 0; |
70 | |
71 | if (!exists) { |
72 | ret = tcf_idr_create(tn, index, est, a, |
73 | ops: &act_sample_ops, bind, cpustats: true, flags); |
74 | if (ret) { |
75 | tcf_idr_cleanup(tn, index); |
76 | return ret; |
77 | } |
78 | ret = ACT_P_CREATED; |
79 | } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { |
80 | tcf_idr_release(a: *a, bind); |
81 | return -EEXIST; |
82 | } |
83 | |
84 | if (!tb[TCA_SAMPLE_RATE] || !tb[TCA_SAMPLE_PSAMPLE_GROUP]) { |
85 | NL_SET_ERR_MSG(extack, "sample rate and group are required" ); |
86 | err = -EINVAL; |
87 | goto release_idr; |
88 | } |
89 | |
90 | err = tcf_action_check_ctrlact(action: parm->action, tp, handle: &goto_ch, newchain: extack); |
91 | if (err < 0) |
92 | goto release_idr; |
93 | |
94 | rate = nla_get_u32(nla: tb[TCA_SAMPLE_RATE]); |
95 | if (!rate) { |
96 | NL_SET_ERR_MSG(extack, "invalid sample rate" ); |
97 | err = -EINVAL; |
98 | goto put_chain; |
99 | } |
100 | psample_group_num = nla_get_u32(nla: tb[TCA_SAMPLE_PSAMPLE_GROUP]); |
101 | psample_group = psample_group_get(net, group_num: psample_group_num); |
102 | if (!psample_group) { |
103 | err = -ENOMEM; |
104 | goto put_chain; |
105 | } |
106 | |
107 | s = to_sample(*a); |
108 | |
109 | spin_lock_bh(lock: &s->tcf_lock); |
110 | goto_ch = tcf_action_set_ctrlact(a: *a, action: parm->action, newchain: goto_ch); |
111 | s->rate = rate; |
112 | s->psample_group_num = psample_group_num; |
113 | psample_group = rcu_replace_pointer(s->psample_group, psample_group, |
114 | lockdep_is_held(&s->tcf_lock)); |
115 | |
116 | if (tb[TCA_SAMPLE_TRUNC_SIZE]) { |
117 | s->truncate = true; |
118 | s->trunc_size = nla_get_u32(nla: tb[TCA_SAMPLE_TRUNC_SIZE]); |
119 | } |
120 | spin_unlock_bh(lock: &s->tcf_lock); |
121 | |
122 | if (psample_group) |
123 | psample_group_put(group: psample_group); |
124 | if (goto_ch) |
125 | tcf_chain_put_by_act(chain: goto_ch); |
126 | |
127 | return ret; |
128 | put_chain: |
129 | if (goto_ch) |
130 | tcf_chain_put_by_act(chain: goto_ch); |
131 | release_idr: |
132 | tcf_idr_release(a: *a, bind); |
133 | return err; |
134 | } |
135 | |
136 | static void tcf_sample_cleanup(struct tc_action *a) |
137 | { |
138 | struct tcf_sample *s = to_sample(a); |
139 | struct psample_group *psample_group; |
140 | |
141 | /* last reference to action, no need to lock */ |
142 | psample_group = rcu_dereference_protected(s->psample_group, 1); |
143 | RCU_INIT_POINTER(s->psample_group, NULL); |
144 | if (psample_group) |
145 | psample_group_put(group: psample_group); |
146 | } |
147 | |
148 | static bool tcf_sample_dev_ok_push(struct net_device *dev) |
149 | { |
150 | switch (dev->type) { |
151 | case ARPHRD_TUNNEL: |
152 | case ARPHRD_TUNNEL6: |
153 | case ARPHRD_SIT: |
154 | case ARPHRD_IPGRE: |
155 | case ARPHRD_IP6GRE: |
156 | case ARPHRD_VOID: |
157 | case ARPHRD_NONE: |
158 | return false; |
159 | default: |
160 | return true; |
161 | } |
162 | } |
163 | |
164 | TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb, |
165 | const struct tc_action *a, |
166 | struct tcf_result *res) |
167 | { |
168 | struct tcf_sample *s = to_sample(a); |
169 | struct psample_group *psample_group; |
170 | struct psample_metadata md = {}; |
171 | int retval; |
172 | |
173 | tcf_lastuse_update(tm: &s->tcf_tm); |
174 | bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb); |
175 | retval = READ_ONCE(s->tcf_action); |
176 | |
177 | psample_group = rcu_dereference_bh(s->psample_group); |
178 | |
179 | /* randomly sample packets according to rate */ |
180 | if (psample_group && (get_random_u32_below(ceil: s->rate) == 0)) { |
181 | if (!skb_at_tc_ingress(skb)) { |
182 | md.in_ifindex = skb->skb_iif; |
183 | md.out_ifindex = skb->dev->ifindex; |
184 | } else { |
185 | md.in_ifindex = skb->dev->ifindex; |
186 | } |
187 | |
188 | /* on ingress, the mac header gets popped, so push it back */ |
189 | if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(dev: skb->dev)) |
190 | skb_push(skb, len: skb->mac_len); |
191 | |
192 | md.trunc_size = s->truncate ? s->trunc_size : skb->len; |
193 | psample_sample_packet(group: psample_group, skb, sample_rate: s->rate, md: &md); |
194 | |
195 | if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(dev: skb->dev)) |
196 | skb_pull(skb, len: skb->mac_len); |
197 | } |
198 | |
199 | return retval; |
200 | } |
201 | |
202 | static void tcf_sample_stats_update(struct tc_action *a, u64 bytes, u64 packets, |
203 | u64 drops, u64 lastuse, bool hw) |
204 | { |
205 | struct tcf_sample *s = to_sample(a); |
206 | struct tcf_t *tm = &s->tcf_tm; |
207 | |
208 | tcf_action_update_stats(a, bytes, packets, drops, hw); |
209 | tm->lastuse = max_t(u64, tm->lastuse, lastuse); |
210 | } |
211 | |
212 | static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, |
213 | int bind, int ref) |
214 | { |
215 | unsigned char *b = skb_tail_pointer(skb); |
216 | struct tcf_sample *s = to_sample(a); |
217 | struct tc_sample opt = { |
218 | .index = s->tcf_index, |
219 | .refcnt = refcount_read(r: &s->tcf_refcnt) - ref, |
220 | .bindcnt = atomic_read(v: &s->tcf_bindcnt) - bind, |
221 | }; |
222 | struct tcf_t t; |
223 | |
224 | spin_lock_bh(lock: &s->tcf_lock); |
225 | opt.action = s->tcf_action; |
226 | if (nla_put(skb, attrtype: TCA_SAMPLE_PARMS, attrlen: sizeof(opt), data: &opt)) |
227 | goto nla_put_failure; |
228 | |
229 | tcf_tm_dump(dtm: &t, stm: &s->tcf_tm); |
230 | if (nla_put_64bit(skb, attrtype: TCA_SAMPLE_TM, attrlen: sizeof(t), data: &t, padattr: TCA_SAMPLE_PAD)) |
231 | goto nla_put_failure; |
232 | |
233 | if (nla_put_u32(skb, attrtype: TCA_SAMPLE_RATE, value: s->rate)) |
234 | goto nla_put_failure; |
235 | |
236 | if (s->truncate) |
237 | if (nla_put_u32(skb, attrtype: TCA_SAMPLE_TRUNC_SIZE, value: s->trunc_size)) |
238 | goto nla_put_failure; |
239 | |
240 | if (nla_put_u32(skb, attrtype: TCA_SAMPLE_PSAMPLE_GROUP, value: s->psample_group_num)) |
241 | goto nla_put_failure; |
242 | spin_unlock_bh(lock: &s->tcf_lock); |
243 | |
244 | return skb->len; |
245 | |
246 | nla_put_failure: |
247 | spin_unlock_bh(lock: &s->tcf_lock); |
248 | nlmsg_trim(skb, mark: b); |
249 | return -1; |
250 | } |
251 | |
252 | static void tcf_psample_group_put(void *priv) |
253 | { |
254 | struct psample_group *group = priv; |
255 | |
256 | psample_group_put(group); |
257 | } |
258 | |
259 | static struct psample_group * |
260 | tcf_sample_get_group(const struct tc_action *a, |
261 | tc_action_priv_destructor *destructor) |
262 | { |
263 | struct tcf_sample *s = to_sample(a); |
264 | struct psample_group *group; |
265 | |
266 | group = rcu_dereference_protected(s->psample_group, |
267 | lockdep_is_held(&s->tcf_lock)); |
268 | if (group) { |
269 | psample_group_take(group); |
270 | *destructor = tcf_psample_group_put; |
271 | } |
272 | |
273 | return group; |
274 | } |
275 | |
276 | static void tcf_offload_sample_get_group(struct flow_action_entry *entry, |
277 | const struct tc_action *act) |
278 | { |
279 | entry->sample.psample_group = |
280 | act->ops->get_psample_group(act, &entry->destructor); |
281 | entry->destructor_priv = entry->sample.psample_group; |
282 | } |
283 | |
284 | static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data, |
285 | u32 *index_inc, bool bind, |
286 | struct netlink_ext_ack *extack) |
287 | { |
288 | if (bind) { |
289 | struct flow_action_entry *entry = entry_data; |
290 | |
291 | entry->id = FLOW_ACTION_SAMPLE; |
292 | entry->sample.trunc_size = tcf_sample_trunc_size(a: act); |
293 | entry->sample.truncate = tcf_sample_truncate(a: act); |
294 | entry->sample.rate = tcf_sample_rate(a: act); |
295 | tcf_offload_sample_get_group(entry, act); |
296 | *index_inc = 1; |
297 | } else { |
298 | struct flow_offload_action *fl_action = entry_data; |
299 | |
300 | fl_action->id = FLOW_ACTION_SAMPLE; |
301 | } |
302 | |
303 | return 0; |
304 | } |
305 | |
306 | static struct tc_action_ops act_sample_ops = { |
307 | .kind = "sample" , |
308 | .id = TCA_ID_SAMPLE, |
309 | .owner = THIS_MODULE, |
310 | .act = tcf_sample_act, |
311 | .stats_update = tcf_sample_stats_update, |
312 | .dump = tcf_sample_dump, |
313 | .init = tcf_sample_init, |
314 | .cleanup = tcf_sample_cleanup, |
315 | .get_psample_group = tcf_sample_get_group, |
316 | .offload_act_setup = tcf_sample_offload_act_setup, |
317 | .size = sizeof(struct tcf_sample), |
318 | }; |
319 | |
320 | static __net_init int sample_init_net(struct net *net) |
321 | { |
322 | struct tc_action_net *tn = net_generic(net, id: act_sample_ops.net_id); |
323 | |
324 | return tc_action_net_init(net, tn, ops: &act_sample_ops); |
325 | } |
326 | |
327 | static void __net_exit sample_exit_net(struct list_head *net_list) |
328 | { |
329 | tc_action_net_exit(net_list, id: act_sample_ops.net_id); |
330 | } |
331 | |
332 | static struct pernet_operations sample_net_ops = { |
333 | .init = sample_init_net, |
334 | .exit_batch = sample_exit_net, |
335 | .id = &act_sample_ops.net_id, |
336 | .size = sizeof(struct tc_action_net), |
337 | }; |
338 | |
339 | static int __init sample_init_module(void) |
340 | { |
341 | return tcf_register_action(a: &act_sample_ops, ops: &sample_net_ops); |
342 | } |
343 | |
344 | static void __exit sample_cleanup_module(void) |
345 | { |
346 | tcf_unregister_action(a: &act_sample_ops, ops: &sample_net_ops); |
347 | } |
348 | |
349 | module_init(sample_init_module); |
350 | module_exit(sample_cleanup_module); |
351 | |
352 | MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>" ); |
353 | MODULE_DESCRIPTION("Packet sampling action" ); |
354 | MODULE_LICENSE("GPL v2" ); |
355 | |