1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2017 Nicira, Inc. |
4 | */ |
5 | |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
7 | |
8 | #include <linux/if.h> |
9 | #include <linux/skbuff.h> |
10 | #include <linux/ip.h> |
11 | #include <linux/kernel.h> |
12 | #include <linux/openvswitch.h> |
13 | #include <linux/netlink.h> |
14 | #include <linux/rculist.h> |
15 | |
16 | #include <net/netlink.h> |
17 | #include <net/genetlink.h> |
18 | |
19 | #include "datapath.h" |
20 | #include "meter.h" |
21 | |
22 | static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { |
23 | [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, |
24 | [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, |
25 | [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, |
26 | [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, |
27 | [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, |
28 | [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, |
29 | [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, |
30 | [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, |
31 | }; |
32 | |
33 | static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { |
34 | [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, |
35 | [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, |
36 | [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, |
37 | [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, |
38 | }; |
39 | |
40 | static u32 meter_hash(struct dp_meter_instance *ti, u32 id) |
41 | { |
42 | return id % ti->n_meters; |
43 | } |
44 | |
45 | static void ovs_meter_free(struct dp_meter *meter) |
46 | { |
47 | if (!meter) |
48 | return; |
49 | |
50 | kfree_rcu(meter, rcu); |
51 | } |
52 | |
53 | /* Call with ovs_mutex or RCU read lock. */ |
54 | static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl, |
55 | u32 meter_id) |
56 | { |
57 | struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); |
58 | u32 hash = meter_hash(ti, id: meter_id); |
59 | struct dp_meter *meter; |
60 | |
61 | meter = rcu_dereference_ovsl(ti->dp_meters[hash]); |
62 | if (meter && likely(meter->id == meter_id)) |
63 | return meter; |
64 | |
65 | return NULL; |
66 | } |
67 | |
68 | static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size) |
69 | { |
70 | struct dp_meter_instance *ti; |
71 | |
72 | ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL); |
73 | if (!ti) |
74 | return NULL; |
75 | |
76 | ti->n_meters = size; |
77 | |
78 | return ti; |
79 | } |
80 | |
81 | static void dp_meter_instance_free(struct dp_meter_instance *ti) |
82 | { |
83 | kvfree(addr: ti); |
84 | } |
85 | |
86 | static void dp_meter_instance_free_rcu(struct rcu_head *rcu) |
87 | { |
88 | struct dp_meter_instance *ti; |
89 | |
90 | ti = container_of(rcu, struct dp_meter_instance, rcu); |
91 | kvfree(addr: ti); |
92 | } |
93 | |
94 | static int |
95 | dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size) |
96 | { |
97 | struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); |
98 | int n_meters = min(size, ti->n_meters); |
99 | struct dp_meter_instance *new_ti; |
100 | int i; |
101 | |
102 | new_ti = dp_meter_instance_alloc(size); |
103 | if (!new_ti) |
104 | return -ENOMEM; |
105 | |
106 | for (i = 0; i < n_meters; i++) |
107 | if (rcu_dereference_ovsl(ti->dp_meters[i])) |
108 | new_ti->dp_meters[i] = ti->dp_meters[i]; |
109 | |
110 | rcu_assign_pointer(tbl->ti, new_ti); |
111 | call_rcu(head: &ti->rcu, func: dp_meter_instance_free_rcu); |
112 | |
113 | return 0; |
114 | } |
115 | |
116 | static void dp_meter_instance_insert(struct dp_meter_instance *ti, |
117 | struct dp_meter *meter) |
118 | { |
119 | u32 hash; |
120 | |
121 | hash = meter_hash(ti, id: meter->id); |
122 | rcu_assign_pointer(ti->dp_meters[hash], meter); |
123 | } |
124 | |
125 | static void dp_meter_instance_remove(struct dp_meter_instance *ti, |
126 | struct dp_meter *meter) |
127 | { |
128 | u32 hash; |
129 | |
130 | hash = meter_hash(ti, id: meter->id); |
131 | RCU_INIT_POINTER(ti->dp_meters[hash], NULL); |
132 | } |
133 | |
134 | static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) |
135 | { |
136 | struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); |
137 | u32 hash = meter_hash(ti, id: meter->id); |
138 | int err; |
139 | |
140 | /* In generally, slots selected should be empty, because |
141 | * OvS uses id-pool to fetch a available id. |
142 | */ |
143 | if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash]))) |
144 | return -EBUSY; |
145 | |
146 | dp_meter_instance_insert(ti, meter); |
147 | |
148 | /* That function is thread-safe. */ |
149 | tbl->count++; |
150 | if (tbl->count >= tbl->max_meters_allowed) { |
151 | err = -EFBIG; |
152 | goto attach_err; |
153 | } |
154 | |
155 | if (tbl->count >= ti->n_meters && |
156 | dp_meter_instance_realloc(tbl, size: ti->n_meters * 2)) { |
157 | err = -ENOMEM; |
158 | goto attach_err; |
159 | } |
160 | |
161 | return 0; |
162 | |
163 | attach_err: |
164 | dp_meter_instance_remove(ti, meter); |
165 | tbl->count--; |
166 | return err; |
167 | } |
168 | |
169 | static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) |
170 | { |
171 | struct dp_meter_instance *ti; |
172 | |
173 | ASSERT_OVSL(); |
174 | if (!meter) |
175 | return 0; |
176 | |
177 | ti = rcu_dereference_ovsl(tbl->ti); |
178 | dp_meter_instance_remove(ti, meter); |
179 | |
180 | tbl->count--; |
181 | |
182 | /* Shrink the meter array if necessary. */ |
183 | if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && |
184 | tbl->count <= (ti->n_meters / 4)) { |
185 | int half_size = ti->n_meters / 2; |
186 | int i; |
187 | |
188 | /* Avoid hash collision, don't move slots to other place. |
189 | * Make sure there are no references of meters in array |
190 | * which will be released. |
191 | */ |
192 | for (i = half_size; i < ti->n_meters; i++) |
193 | if (rcu_dereference_ovsl(ti->dp_meters[i])) |
194 | goto out; |
195 | |
196 | if (dp_meter_instance_realloc(tbl, size: half_size)) |
197 | goto shrink_err; |
198 | } |
199 | |
200 | out: |
201 | return 0; |
202 | |
203 | shrink_err: |
204 | dp_meter_instance_insert(ti, meter); |
205 | tbl->count++; |
206 | return -ENOMEM; |
207 | } |
208 | |
209 | static struct sk_buff * |
210 | ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, |
211 | struct ovs_header **) |
212 | { |
213 | struct sk_buff *skb; |
214 | struct ovs_header * = genl_info_userhdr(info); |
215 | |
216 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); |
217 | if (!skb) |
218 | return ERR_PTR(error: -ENOMEM); |
219 | |
220 | *ovs_reply_header = genlmsg_put(skb, portid: info->snd_portid, |
221 | seq: info->snd_seq, |
222 | family: &dp_meter_genl_family, flags: 0, cmd); |
223 | if (!*ovs_reply_header) { |
224 | nlmsg_free(skb); |
225 | return ERR_PTR(error: -EMSGSIZE); |
226 | } |
227 | (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; |
228 | |
229 | return skb; |
230 | } |
231 | |
232 | static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, |
233 | struct dp_meter *meter) |
234 | { |
235 | struct nlattr *nla; |
236 | struct dp_meter_band *band; |
237 | u16 i; |
238 | |
239 | if (nla_put_u32(skb: reply, attrtype: OVS_METER_ATTR_ID, value: meter_id)) |
240 | goto error; |
241 | |
242 | if (nla_put(skb: reply, attrtype: OVS_METER_ATTR_STATS, |
243 | attrlen: sizeof(struct ovs_flow_stats), data: &meter->stats)) |
244 | goto error; |
245 | |
246 | if (nla_put_u64_64bit(skb: reply, attrtype: OVS_METER_ATTR_USED, value: meter->used, |
247 | padattr: OVS_METER_ATTR_PAD)) |
248 | goto error; |
249 | |
250 | nla = nla_nest_start_noflag(skb: reply, attrtype: OVS_METER_ATTR_BANDS); |
251 | if (!nla) |
252 | goto error; |
253 | |
254 | band = meter->bands; |
255 | |
256 | for (i = 0; i < meter->n_bands; ++i, ++band) { |
257 | struct nlattr *band_nla; |
258 | |
259 | band_nla = nla_nest_start_noflag(skb: reply, attrtype: OVS_BAND_ATTR_UNSPEC); |
260 | if (!band_nla || nla_put(skb: reply, attrtype: OVS_BAND_ATTR_STATS, |
261 | attrlen: sizeof(struct ovs_flow_stats), |
262 | data: &band->stats)) |
263 | goto error; |
264 | nla_nest_end(skb: reply, start: band_nla); |
265 | } |
266 | nla_nest_end(skb: reply, start: nla); |
267 | |
268 | return 0; |
269 | error: |
270 | return -EMSGSIZE; |
271 | } |
272 | |
273 | static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) |
274 | { |
275 | struct ovs_header * = genl_info_userhdr(info); |
276 | struct ovs_header *; |
277 | struct nlattr *nla, *band_nla; |
278 | struct sk_buff *reply; |
279 | struct datapath *dp; |
280 | int err = -EMSGSIZE; |
281 | |
282 | reply = ovs_meter_cmd_reply_start(info, cmd: OVS_METER_CMD_FEATURES, |
283 | ovs_reply_header: &ovs_reply_header); |
284 | if (IS_ERR(ptr: reply)) |
285 | return PTR_ERR(ptr: reply); |
286 | |
287 | ovs_lock(); |
288 | dp = get_dp(net: sock_net(sk: skb->sk), dp_ifindex: ovs_header->dp_ifindex); |
289 | if (!dp) { |
290 | err = -ENODEV; |
291 | goto exit_unlock; |
292 | } |
293 | |
294 | if (nla_put_u32(skb: reply, attrtype: OVS_METER_ATTR_MAX_METERS, |
295 | value: dp->meter_tbl.max_meters_allowed)) |
296 | goto exit_unlock; |
297 | |
298 | ovs_unlock(); |
299 | |
300 | if (nla_put_u32(skb: reply, attrtype: OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) |
301 | goto nla_put_failure; |
302 | |
303 | nla = nla_nest_start_noflag(skb: reply, attrtype: OVS_METER_ATTR_BANDS); |
304 | if (!nla) |
305 | goto nla_put_failure; |
306 | |
307 | band_nla = nla_nest_start_noflag(skb: reply, attrtype: OVS_BAND_ATTR_UNSPEC); |
308 | if (!band_nla) |
309 | goto nla_put_failure; |
310 | /* Currently only DROP band type is supported. */ |
311 | if (nla_put_u32(skb: reply, attrtype: OVS_BAND_ATTR_TYPE, value: OVS_METER_BAND_TYPE_DROP)) |
312 | goto nla_put_failure; |
313 | nla_nest_end(skb: reply, start: band_nla); |
314 | nla_nest_end(skb: reply, start: nla); |
315 | |
316 | genlmsg_end(skb: reply, hdr: ovs_reply_header); |
317 | return genlmsg_reply(skb: reply, info); |
318 | |
319 | exit_unlock: |
320 | ovs_unlock(); |
321 | nla_put_failure: |
322 | nlmsg_free(skb: reply); |
323 | return err; |
324 | } |
325 | |
326 | static struct dp_meter *dp_meter_create(struct nlattr **a) |
327 | { |
328 | struct nlattr *nla; |
329 | int rem; |
330 | u16 n_bands = 0; |
331 | struct dp_meter *meter; |
332 | struct dp_meter_band *band; |
333 | int err; |
334 | |
335 | /* Validate attributes, count the bands. */ |
336 | if (!a[OVS_METER_ATTR_BANDS]) |
337 | return ERR_PTR(error: -EINVAL); |
338 | |
339 | nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) |
340 | if (++n_bands > DP_MAX_BANDS) |
341 | return ERR_PTR(error: -EINVAL); |
342 | |
343 | /* Allocate and set up the meter before locking anything. */ |
344 | meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT); |
345 | if (!meter) |
346 | return ERR_PTR(error: -ENOMEM); |
347 | |
348 | meter->id = nla_get_u32(nla: a[OVS_METER_ATTR_ID]); |
349 | meter->used = div_u64(dividend: ktime_get_ns(), divisor: 1000 * 1000); |
350 | meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; |
351 | meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; |
352 | spin_lock_init(&meter->lock); |
353 | if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { |
354 | meter->stats = *(struct ovs_flow_stats *) |
355 | nla_data(nla: a[OVS_METER_ATTR_STATS]); |
356 | } |
357 | meter->n_bands = n_bands; |
358 | |
359 | /* Set up meter bands. */ |
360 | band = meter->bands; |
361 | nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { |
362 | struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; |
363 | u32 band_max_delta_t; |
364 | |
365 | err = nla_parse_deprecated(tb: (struct nlattr **)&attr, |
366 | OVS_BAND_ATTR_MAX, head: nla_data(nla), |
367 | len: nla_len(nla), policy: band_policy, NULL); |
368 | if (err) |
369 | goto exit_free_meter; |
370 | |
371 | if (!attr[OVS_BAND_ATTR_TYPE] || |
372 | !attr[OVS_BAND_ATTR_RATE] || |
373 | !attr[OVS_BAND_ATTR_BURST]) { |
374 | err = -EINVAL; |
375 | goto exit_free_meter; |
376 | } |
377 | |
378 | band->type = nla_get_u32(nla: attr[OVS_BAND_ATTR_TYPE]); |
379 | band->rate = nla_get_u32(nla: attr[OVS_BAND_ATTR_RATE]); |
380 | if (band->rate == 0) { |
381 | err = -EINVAL; |
382 | goto exit_free_meter; |
383 | } |
384 | |
385 | band->burst_size = nla_get_u32(nla: attr[OVS_BAND_ATTR_BURST]); |
386 | /* Figure out max delta_t that is enough to fill any bucket. |
387 | * Keep max_delta_t size to the bucket units: |
388 | * pkts => 1/1000 packets, kilobits => bits. |
389 | * |
390 | * Start with a full bucket. |
391 | */ |
392 | band->bucket = band->burst_size * 1000ULL; |
393 | band_max_delta_t = div_u64(dividend: band->bucket, divisor: band->rate); |
394 | if (band_max_delta_t > meter->max_delta_t) |
395 | meter->max_delta_t = band_max_delta_t; |
396 | band++; |
397 | } |
398 | |
399 | return meter; |
400 | |
401 | exit_free_meter: |
402 | kfree(objp: meter); |
403 | return ERR_PTR(error: err); |
404 | } |
405 | |
406 | static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) |
407 | { |
408 | struct nlattr **a = info->attrs; |
409 | struct dp_meter *meter, *old_meter; |
410 | struct sk_buff *reply; |
411 | struct ovs_header *; |
412 | struct ovs_header * = genl_info_userhdr(info); |
413 | struct dp_meter_table *meter_tbl; |
414 | struct datapath *dp; |
415 | int err; |
416 | u32 meter_id; |
417 | bool failed; |
418 | |
419 | if (!a[OVS_METER_ATTR_ID]) |
420 | return -EINVAL; |
421 | |
422 | meter = dp_meter_create(a); |
423 | if (IS_ERR(ptr: meter)) |
424 | return PTR_ERR(ptr: meter); |
425 | |
426 | reply = ovs_meter_cmd_reply_start(info, cmd: OVS_METER_CMD_SET, |
427 | ovs_reply_header: &ovs_reply_header); |
428 | if (IS_ERR(ptr: reply)) { |
429 | err = PTR_ERR(ptr: reply); |
430 | goto exit_free_meter; |
431 | } |
432 | |
433 | ovs_lock(); |
434 | dp = get_dp(net: sock_net(sk: skb->sk), dp_ifindex: ovs_header->dp_ifindex); |
435 | if (!dp) { |
436 | err = -ENODEV; |
437 | goto exit_unlock; |
438 | } |
439 | |
440 | meter_tbl = &dp->meter_tbl; |
441 | meter_id = nla_get_u32(nla: a[OVS_METER_ATTR_ID]); |
442 | |
443 | old_meter = lookup_meter(tbl: meter_tbl, meter_id); |
444 | err = detach_meter(tbl: meter_tbl, meter: old_meter); |
445 | if (err) |
446 | goto exit_unlock; |
447 | |
448 | err = attach_meter(tbl: meter_tbl, meter); |
449 | if (err) |
450 | goto exit_free_old_meter; |
451 | |
452 | ovs_unlock(); |
453 | |
454 | /* Build response with the meter_id and stats from |
455 | * the old meter, if any. |
456 | */ |
457 | failed = nla_put_u32(skb: reply, attrtype: OVS_METER_ATTR_ID, value: meter_id); |
458 | WARN_ON(failed); |
459 | if (old_meter) { |
460 | spin_lock_bh(lock: &old_meter->lock); |
461 | if (old_meter->keep_stats) { |
462 | err = ovs_meter_cmd_reply_stats(reply, meter_id, |
463 | meter: old_meter); |
464 | WARN_ON(err); |
465 | } |
466 | spin_unlock_bh(lock: &old_meter->lock); |
467 | ovs_meter_free(meter: old_meter); |
468 | } |
469 | |
470 | genlmsg_end(skb: reply, hdr: ovs_reply_header); |
471 | return genlmsg_reply(skb: reply, info); |
472 | |
473 | exit_free_old_meter: |
474 | ovs_meter_free(meter: old_meter); |
475 | exit_unlock: |
476 | ovs_unlock(); |
477 | nlmsg_free(skb: reply); |
478 | exit_free_meter: |
479 | kfree(objp: meter); |
480 | return err; |
481 | } |
482 | |
483 | static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) |
484 | { |
485 | struct ovs_header * = genl_info_userhdr(info); |
486 | struct ovs_header *; |
487 | struct nlattr **a = info->attrs; |
488 | struct dp_meter *meter; |
489 | struct sk_buff *reply; |
490 | struct datapath *dp; |
491 | u32 meter_id; |
492 | int err; |
493 | |
494 | if (!a[OVS_METER_ATTR_ID]) |
495 | return -EINVAL; |
496 | |
497 | meter_id = nla_get_u32(nla: a[OVS_METER_ATTR_ID]); |
498 | |
499 | reply = ovs_meter_cmd_reply_start(info, cmd: OVS_METER_CMD_GET, |
500 | ovs_reply_header: &ovs_reply_header); |
501 | if (IS_ERR(ptr: reply)) |
502 | return PTR_ERR(ptr: reply); |
503 | |
504 | ovs_lock(); |
505 | |
506 | dp = get_dp(net: sock_net(sk: skb->sk), dp_ifindex: ovs_header->dp_ifindex); |
507 | if (!dp) { |
508 | err = -ENODEV; |
509 | goto exit_unlock; |
510 | } |
511 | |
512 | /* Locate meter, copy stats. */ |
513 | meter = lookup_meter(tbl: &dp->meter_tbl, meter_id); |
514 | if (!meter) { |
515 | err = -ENOENT; |
516 | goto exit_unlock; |
517 | } |
518 | |
519 | spin_lock_bh(lock: &meter->lock); |
520 | err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); |
521 | spin_unlock_bh(lock: &meter->lock); |
522 | if (err) |
523 | goto exit_unlock; |
524 | |
525 | ovs_unlock(); |
526 | |
527 | genlmsg_end(skb: reply, hdr: ovs_reply_header); |
528 | return genlmsg_reply(skb: reply, info); |
529 | |
530 | exit_unlock: |
531 | ovs_unlock(); |
532 | nlmsg_free(skb: reply); |
533 | return err; |
534 | } |
535 | |
536 | static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) |
537 | { |
538 | struct ovs_header * = genl_info_userhdr(info); |
539 | struct ovs_header *; |
540 | struct nlattr **a = info->attrs; |
541 | struct dp_meter *old_meter; |
542 | struct sk_buff *reply; |
543 | struct datapath *dp; |
544 | u32 meter_id; |
545 | int err; |
546 | |
547 | if (!a[OVS_METER_ATTR_ID]) |
548 | return -EINVAL; |
549 | |
550 | reply = ovs_meter_cmd_reply_start(info, cmd: OVS_METER_CMD_DEL, |
551 | ovs_reply_header: &ovs_reply_header); |
552 | if (IS_ERR(ptr: reply)) |
553 | return PTR_ERR(ptr: reply); |
554 | |
555 | ovs_lock(); |
556 | |
557 | dp = get_dp(net: sock_net(sk: skb->sk), dp_ifindex: ovs_header->dp_ifindex); |
558 | if (!dp) { |
559 | err = -ENODEV; |
560 | goto exit_unlock; |
561 | } |
562 | |
563 | meter_id = nla_get_u32(nla: a[OVS_METER_ATTR_ID]); |
564 | old_meter = lookup_meter(tbl: &dp->meter_tbl, meter_id); |
565 | if (old_meter) { |
566 | spin_lock_bh(lock: &old_meter->lock); |
567 | err = ovs_meter_cmd_reply_stats(reply, meter_id, meter: old_meter); |
568 | WARN_ON(err); |
569 | spin_unlock_bh(lock: &old_meter->lock); |
570 | |
571 | err = detach_meter(tbl: &dp->meter_tbl, meter: old_meter); |
572 | if (err) |
573 | goto exit_unlock; |
574 | } |
575 | |
576 | ovs_unlock(); |
577 | ovs_meter_free(meter: old_meter); |
578 | genlmsg_end(skb: reply, hdr: ovs_reply_header); |
579 | return genlmsg_reply(skb: reply, info); |
580 | |
581 | exit_unlock: |
582 | ovs_unlock(); |
583 | nlmsg_free(skb: reply); |
584 | return err; |
585 | } |
586 | |
587 | /* Meter action execution. |
588 | * |
589 | * Return true 'meter_id' drop band is triggered. The 'skb' should be |
590 | * dropped by the caller'. |
591 | */ |
592 | bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, |
593 | struct sw_flow_key *key, u32 meter_id) |
594 | { |
595 | long long int now_ms = div_u64(dividend: ktime_get_ns(), divisor: 1000 * 1000); |
596 | long long int long_delta_ms; |
597 | struct dp_meter_band *band; |
598 | struct dp_meter *meter; |
599 | int i, band_exceeded_max = -1; |
600 | u32 band_exceeded_rate = 0; |
601 | u32 delta_ms; |
602 | u32 cost; |
603 | |
604 | meter = lookup_meter(tbl: &dp->meter_tbl, meter_id); |
605 | /* Do not drop the packet when there is no meter. */ |
606 | if (!meter) |
607 | return false; |
608 | |
609 | /* Lock the meter while using it. */ |
610 | spin_lock(lock: &meter->lock); |
611 | |
612 | long_delta_ms = (now_ms - meter->used); /* ms */ |
613 | if (long_delta_ms < 0) { |
614 | /* This condition means that we have several threads fighting |
615 | * for a meter lock, and the one who received the packets a |
616 | * bit later wins. Assuming that all racing threads received |
617 | * packets at the same time to avoid overflow. |
618 | */ |
619 | long_delta_ms = 0; |
620 | } |
621 | |
622 | /* Make sure delta_ms will not be too large, so that bucket will not |
623 | * wrap around below. |
624 | */ |
625 | delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) |
626 | ? meter->max_delta_t : (u32)long_delta_ms; |
627 | |
628 | /* Update meter statistics. |
629 | */ |
630 | meter->used = now_ms; |
631 | meter->stats.n_packets += 1; |
632 | meter->stats.n_bytes += skb->len; |
633 | |
634 | /* Bucket rate is either in kilobits per second, or in packets per |
635 | * second. We maintain the bucket in the units of either bits or |
636 | * 1/1000th of a packet, correspondingly. |
637 | * Then, when rate is multiplied with milliseconds, we get the |
638 | * bucket units: |
639 | * msec * kbps = bits, and |
640 | * msec * packets/sec = 1/1000 packets. |
641 | * |
642 | * 'cost' is the number of bucket units in this packet. |
643 | */ |
644 | cost = (meter->kbps) ? skb->len * 8 : 1000; |
645 | |
646 | /* Update all bands and find the one hit with the highest rate. */ |
647 | for (i = 0; i < meter->n_bands; ++i) { |
648 | long long int max_bucket_size; |
649 | |
650 | band = &meter->bands[i]; |
651 | max_bucket_size = band->burst_size * 1000LL; |
652 | |
653 | band->bucket += delta_ms * band->rate; |
654 | if (band->bucket > max_bucket_size) |
655 | band->bucket = max_bucket_size; |
656 | |
657 | if (band->bucket >= cost) { |
658 | band->bucket -= cost; |
659 | } else if (band->rate > band_exceeded_rate) { |
660 | band_exceeded_rate = band->rate; |
661 | band_exceeded_max = i; |
662 | } |
663 | } |
664 | |
665 | if (band_exceeded_max >= 0) { |
666 | /* Update band statistics. */ |
667 | band = &meter->bands[band_exceeded_max]; |
668 | band->stats.n_packets += 1; |
669 | band->stats.n_bytes += skb->len; |
670 | |
671 | /* Drop band triggered, let the caller drop the 'skb'. */ |
672 | if (band->type == OVS_METER_BAND_TYPE_DROP) { |
673 | spin_unlock(lock: &meter->lock); |
674 | return true; |
675 | } |
676 | } |
677 | |
678 | spin_unlock(lock: &meter->lock); |
679 | return false; |
680 | } |
681 | |
682 | static const struct genl_small_ops dp_meter_genl_ops[] = { |
683 | { .cmd = OVS_METER_CMD_FEATURES, |
684 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
685 | .flags = 0, /* OK for unprivileged users. */ |
686 | .doit = ovs_meter_cmd_features |
687 | }, |
688 | { .cmd = OVS_METER_CMD_SET, |
689 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
690 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN |
691 | * privilege. |
692 | */ |
693 | .doit = ovs_meter_cmd_set, |
694 | }, |
695 | { .cmd = OVS_METER_CMD_GET, |
696 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
697 | .flags = 0, /* OK for unprivileged users. */ |
698 | .doit = ovs_meter_cmd_get, |
699 | }, |
700 | { .cmd = OVS_METER_CMD_DEL, |
701 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
702 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN |
703 | * privilege. |
704 | */ |
705 | .doit = ovs_meter_cmd_del |
706 | }, |
707 | }; |
708 | |
709 | static const struct genl_multicast_group ovs_meter_multicast_group = { |
710 | .name = OVS_METER_MCGROUP, |
711 | }; |
712 | |
713 | struct genl_family dp_meter_genl_family __ro_after_init = { |
714 | .hdrsize = sizeof(struct ovs_header), |
715 | .name = OVS_METER_FAMILY, |
716 | .version = OVS_METER_VERSION, |
717 | .maxattr = OVS_METER_ATTR_MAX, |
718 | .policy = meter_policy, |
719 | .netnsok = true, |
720 | .parallel_ops = true, |
721 | .small_ops = dp_meter_genl_ops, |
722 | .n_small_ops = ARRAY_SIZE(dp_meter_genl_ops), |
723 | .resv_start_op = OVS_METER_CMD_GET + 1, |
724 | .mcgrps = &ovs_meter_multicast_group, |
725 | .n_mcgrps = 1, |
726 | .module = THIS_MODULE, |
727 | }; |
728 | |
729 | int ovs_meters_init(struct datapath *dp) |
730 | { |
731 | struct dp_meter_table *tbl = &dp->meter_tbl; |
732 | struct dp_meter_instance *ti; |
733 | unsigned long free_mem_bytes; |
734 | |
735 | ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); |
736 | if (!ti) |
737 | return -ENOMEM; |
738 | |
739 | /* Allow meters in a datapath to use ~3.12% of physical memory. */ |
740 | free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5); |
741 | tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter), |
742 | DP_METER_NUM_MAX); |
743 | if (!tbl->max_meters_allowed) |
744 | goto out_err; |
745 | |
746 | rcu_assign_pointer(tbl->ti, ti); |
747 | tbl->count = 0; |
748 | |
749 | return 0; |
750 | |
751 | out_err: |
752 | dp_meter_instance_free(ti); |
753 | return -ENOMEM; |
754 | } |
755 | |
756 | void ovs_meters_exit(struct datapath *dp) |
757 | { |
758 | struct dp_meter_table *tbl = &dp->meter_tbl; |
759 | struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti); |
760 | int i; |
761 | |
762 | for (i = 0; i < ti->n_meters; i++) |
763 | ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i])); |
764 | |
765 | dp_meter_instance_free(ti); |
766 | } |
767 | |