1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * net/sched/sch_gred.c Generic Random Early Detection queue. |
4 | * |
5 | * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 |
6 | * |
7 | * 991129: - Bug fix with grio mode |
8 | * - a better sing. AvgQ mode with Grio(WRED) |
9 | * - A finer grained VQ dequeue based on suggestion |
10 | * from Ren Liu |
11 | * - More error checks |
12 | * |
13 | * For all the glorious comments look at include/net/red.h |
14 | */ |
15 | |
16 | #include <linux/slab.h> |
17 | #include <linux/module.h> |
18 | #include <linux/types.h> |
19 | #include <linux/kernel.h> |
20 | #include <linux/skbuff.h> |
21 | #include <net/pkt_cls.h> |
22 | #include <net/pkt_sched.h> |
23 | #include <net/red.h> |
24 | |
25 | #define GRED_DEF_PRIO (MAX_DPs / 2) |
26 | #define GRED_VQ_MASK (MAX_DPs - 1) |
27 | |
28 | #define GRED_VQ_RED_FLAGS (TC_RED_ECN | TC_RED_HARDDROP) |
29 | |
30 | struct gred_sched_data; |
31 | struct gred_sched; |
32 | |
33 | struct gred_sched_data { |
34 | u32 limit; /* HARD maximal queue length */ |
35 | u32 DP; /* the drop parameters */ |
36 | u32 red_flags; /* virtualQ version of red_flags */ |
37 | u64 bytesin; /* bytes seen on virtualQ so far*/ |
38 | u32 packetsin; /* packets seen on virtualQ so far*/ |
39 | u32 backlog; /* bytes on the virtualQ */ |
40 | u8 prio; /* the prio of this vq */ |
41 | |
42 | struct red_parms parms; |
43 | struct red_vars vars; |
44 | struct red_stats stats; |
45 | }; |
46 | |
47 | enum { |
48 | GRED_WRED_MODE = 1, |
49 | GRED_RIO_MODE, |
50 | }; |
51 | |
52 | struct gred_sched { |
53 | struct gred_sched_data *tab[MAX_DPs]; |
54 | unsigned long flags; |
55 | u32 red_flags; |
56 | u32 DPs; |
57 | u32 def; |
58 | struct red_vars wred_set; |
59 | struct tc_gred_qopt_offload *opt; |
60 | }; |
61 | |
62 | static inline int gred_wred_mode(struct gred_sched *table) |
63 | { |
64 | return test_bit(GRED_WRED_MODE, &table->flags); |
65 | } |
66 | |
67 | static inline void gred_enable_wred_mode(struct gred_sched *table) |
68 | { |
69 | __set_bit(GRED_WRED_MODE, &table->flags); |
70 | } |
71 | |
72 | static inline void gred_disable_wred_mode(struct gred_sched *table) |
73 | { |
74 | __clear_bit(GRED_WRED_MODE, &table->flags); |
75 | } |
76 | |
77 | static inline int gred_rio_mode(struct gred_sched *table) |
78 | { |
79 | return test_bit(GRED_RIO_MODE, &table->flags); |
80 | } |
81 | |
82 | static inline void gred_enable_rio_mode(struct gred_sched *table) |
83 | { |
84 | __set_bit(GRED_RIO_MODE, &table->flags); |
85 | } |
86 | |
87 | static inline void gred_disable_rio_mode(struct gred_sched *table) |
88 | { |
89 | __clear_bit(GRED_RIO_MODE, &table->flags); |
90 | } |
91 | |
92 | static inline int gred_wred_mode_check(struct Qdisc *sch) |
93 | { |
94 | struct gred_sched *table = qdisc_priv(sch); |
95 | int i; |
96 | |
97 | /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ |
98 | for (i = 0; i < table->DPs; i++) { |
99 | struct gred_sched_data *q = table->tab[i]; |
100 | int n; |
101 | |
102 | if (q == NULL) |
103 | continue; |
104 | |
105 | for (n = i + 1; n < table->DPs; n++) |
106 | if (table->tab[n] && table->tab[n]->prio == q->prio) |
107 | return 1; |
108 | } |
109 | |
110 | return 0; |
111 | } |
112 | |
113 | static inline unsigned int gred_backlog(struct gred_sched *table, |
114 | struct gred_sched_data *q, |
115 | struct Qdisc *sch) |
116 | { |
117 | if (gred_wred_mode(table)) |
118 | return sch->qstats.backlog; |
119 | else |
120 | return q->backlog; |
121 | } |
122 | |
123 | static inline u16 tc_index_to_dp(struct sk_buff *skb) |
124 | { |
125 | return skb->tc_index & GRED_VQ_MASK; |
126 | } |
127 | |
128 | static inline void gred_load_wred_set(const struct gred_sched *table, |
129 | struct gred_sched_data *q) |
130 | { |
131 | q->vars.qavg = table->wred_set.qavg; |
132 | q->vars.qidlestart = table->wred_set.qidlestart; |
133 | } |
134 | |
135 | static inline void gred_store_wred_set(struct gred_sched *table, |
136 | struct gred_sched_data *q) |
137 | { |
138 | table->wred_set.qavg = q->vars.qavg; |
139 | table->wred_set.qidlestart = q->vars.qidlestart; |
140 | } |
141 | |
142 | static int gred_use_ecn(struct gred_sched_data *q) |
143 | { |
144 | return q->red_flags & TC_RED_ECN; |
145 | } |
146 | |
147 | static int gred_use_harddrop(struct gred_sched_data *q) |
148 | { |
149 | return q->red_flags & TC_RED_HARDDROP; |
150 | } |
151 | |
152 | static bool gred_per_vq_red_flags_used(struct gred_sched *table) |
153 | { |
154 | unsigned int i; |
155 | |
156 | /* Local per-vq flags couldn't have been set unless global are 0 */ |
157 | if (table->red_flags) |
158 | return false; |
159 | for (i = 0; i < MAX_DPs; i++) |
160 | if (table->tab[i] && table->tab[i]->red_flags) |
161 | return true; |
162 | return false; |
163 | } |
164 | |
165 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
166 | struct sk_buff **to_free) |
167 | { |
168 | struct gred_sched_data *q = NULL; |
169 | struct gred_sched *t = qdisc_priv(sch); |
170 | unsigned long qavg = 0; |
171 | u16 dp = tc_index_to_dp(skb); |
172 | |
173 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
174 | dp = t->def; |
175 | |
176 | q = t->tab[dp]; |
177 | if (!q) { |
178 | /* Pass through packets not assigned to a DP |
179 | * if no default DP has been configured. This |
180 | * allows for DP flows to be left untouched. |
181 | */ |
182 | if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= |
183 | sch->limit)) |
184 | return qdisc_enqueue_tail(skb, sch); |
185 | else |
186 | goto drop; |
187 | } |
188 | |
189 | /* fix tc_index? --could be controversial but needed for |
190 | requeueing */ |
191 | skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; |
192 | } |
193 | |
194 | /* sum up all the qaves of prios < ours to get the new qave */ |
195 | if (!gred_wred_mode(table: t) && gred_rio_mode(table: t)) { |
196 | int i; |
197 | |
198 | for (i = 0; i < t->DPs; i++) { |
199 | if (t->tab[i] && t->tab[i]->prio < q->prio && |
200 | !red_is_idling(v: &t->tab[i]->vars)) |
201 | qavg += t->tab[i]->vars.qavg; |
202 | } |
203 | |
204 | } |
205 | |
206 | q->packetsin++; |
207 | q->bytesin += qdisc_pkt_len(skb); |
208 | |
209 | if (gred_wred_mode(table: t)) |
210 | gred_load_wred_set(table: t, q); |
211 | |
212 | q->vars.qavg = red_calc_qavg(p: &q->parms, |
213 | v: &q->vars, |
214 | backlog: gred_backlog(table: t, q, sch)); |
215 | |
216 | if (red_is_idling(v: &q->vars)) |
217 | red_end_of_idle_period(v: &q->vars); |
218 | |
219 | if (gred_wred_mode(table: t)) |
220 | gred_store_wred_set(table: t, q); |
221 | |
222 | switch (red_action(p: &q->parms, v: &q->vars, qavg: q->vars.qavg + qavg)) { |
223 | case RED_DONT_MARK: |
224 | break; |
225 | |
226 | case RED_PROB_MARK: |
227 | qdisc_qstats_overlimit(sch); |
228 | if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) { |
229 | q->stats.prob_drop++; |
230 | goto congestion_drop; |
231 | } |
232 | |
233 | q->stats.prob_mark++; |
234 | break; |
235 | |
236 | case RED_HARD_MARK: |
237 | qdisc_qstats_overlimit(sch); |
238 | if (gred_use_harddrop(q) || !gred_use_ecn(q) || |
239 | !INET_ECN_set_ce(skb)) { |
240 | q->stats.forced_drop++; |
241 | goto congestion_drop; |
242 | } |
243 | q->stats.forced_mark++; |
244 | break; |
245 | } |
246 | |
247 | if (gred_backlog(table: t, q, sch) + qdisc_pkt_len(skb) <= q->limit) { |
248 | q->backlog += qdisc_pkt_len(skb); |
249 | return qdisc_enqueue_tail(skb, sch); |
250 | } |
251 | |
252 | q->stats.pdrop++; |
253 | drop: |
254 | return qdisc_drop(skb, sch, to_free); |
255 | |
256 | congestion_drop: |
257 | qdisc_drop(skb, sch, to_free); |
258 | return NET_XMIT_CN; |
259 | } |
260 | |
261 | static struct sk_buff *gred_dequeue(struct Qdisc *sch) |
262 | { |
263 | struct sk_buff *skb; |
264 | struct gred_sched *t = qdisc_priv(sch); |
265 | |
266 | skb = qdisc_dequeue_head(sch); |
267 | |
268 | if (skb) { |
269 | struct gred_sched_data *q; |
270 | u16 dp = tc_index_to_dp(skb); |
271 | |
272 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
273 | net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n" , |
274 | tc_index_to_dp(skb)); |
275 | } else { |
276 | q->backlog -= qdisc_pkt_len(skb); |
277 | |
278 | if (gred_wred_mode(table: t)) { |
279 | if (!sch->qstats.backlog) |
280 | red_start_of_idle_period(v: &t->wred_set); |
281 | } else { |
282 | if (!q->backlog) |
283 | red_start_of_idle_period(v: &q->vars); |
284 | } |
285 | } |
286 | |
287 | return skb; |
288 | } |
289 | |
290 | return NULL; |
291 | } |
292 | |
293 | static void gred_reset(struct Qdisc *sch) |
294 | { |
295 | int i; |
296 | struct gred_sched *t = qdisc_priv(sch); |
297 | |
298 | qdisc_reset_queue(sch); |
299 | |
300 | for (i = 0; i < t->DPs; i++) { |
301 | struct gred_sched_data *q = t->tab[i]; |
302 | |
303 | if (!q) |
304 | continue; |
305 | |
306 | red_restart(v: &q->vars); |
307 | q->backlog = 0; |
308 | } |
309 | } |
310 | |
311 | static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) |
312 | { |
313 | struct gred_sched *table = qdisc_priv(sch); |
314 | struct net_device *dev = qdisc_dev(qdisc: sch); |
315 | struct tc_gred_qopt_offload *opt = table->opt; |
316 | |
317 | if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) |
318 | return; |
319 | |
320 | memset(opt, 0, sizeof(*opt)); |
321 | opt->command = command; |
322 | opt->handle = sch->handle; |
323 | opt->parent = sch->parent; |
324 | |
325 | if (command == TC_GRED_REPLACE) { |
326 | unsigned int i; |
327 | |
328 | opt->set.grio_on = gred_rio_mode(table); |
329 | opt->set.wred_on = gred_wred_mode(table); |
330 | opt->set.dp_cnt = table->DPs; |
331 | opt->set.dp_def = table->def; |
332 | |
333 | for (i = 0; i < table->DPs; i++) { |
334 | struct gred_sched_data *q = table->tab[i]; |
335 | |
336 | if (!q) |
337 | continue; |
338 | opt->set.tab[i].present = true; |
339 | opt->set.tab[i].limit = q->limit; |
340 | opt->set.tab[i].prio = q->prio; |
341 | opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; |
342 | opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; |
343 | opt->set.tab[i].is_ecn = gred_use_ecn(q); |
344 | opt->set.tab[i].is_harddrop = gred_use_harddrop(q); |
345 | opt->set.tab[i].probability = q->parms.max_P; |
346 | opt->set.tab[i].backlog = &q->backlog; |
347 | } |
348 | opt->set.qstats = &sch->qstats; |
349 | } |
350 | |
351 | dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); |
352 | } |
353 | |
354 | static int gred_offload_dump_stats(struct Qdisc *sch) |
355 | { |
356 | struct gred_sched *table = qdisc_priv(sch); |
357 | struct tc_gred_qopt_offload *hw_stats; |
358 | u64 bytes = 0, packets = 0; |
359 | unsigned int i; |
360 | int ret; |
361 | |
362 | hw_stats = kzalloc(size: sizeof(*hw_stats), GFP_KERNEL); |
363 | if (!hw_stats) |
364 | return -ENOMEM; |
365 | |
366 | hw_stats->command = TC_GRED_STATS; |
367 | hw_stats->handle = sch->handle; |
368 | hw_stats->parent = sch->parent; |
369 | |
370 | for (i = 0; i < MAX_DPs; i++) { |
371 | gnet_stats_basic_sync_init(b: &hw_stats->stats.bstats[i]); |
372 | if (table->tab[i]) |
373 | hw_stats->stats.xstats[i] = &table->tab[i]->stats; |
374 | } |
375 | |
376 | ret = qdisc_offload_dump_helper(q: sch, type: TC_SETUP_QDISC_GRED, type_data: hw_stats); |
377 | /* Even if driver returns failure adjust the stats - in case offload |
378 | * ended but driver still wants to adjust the values. |
379 | */ |
380 | sch_tree_lock(q: sch); |
381 | for (i = 0; i < MAX_DPs; i++) { |
382 | if (!table->tab[i]) |
383 | continue; |
384 | table->tab[i]->packetsin += u64_stats_read(p: &hw_stats->stats.bstats[i].packets); |
385 | table->tab[i]->bytesin += u64_stats_read(p: &hw_stats->stats.bstats[i].bytes); |
386 | table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; |
387 | |
388 | bytes += u64_stats_read(p: &hw_stats->stats.bstats[i].bytes); |
389 | packets += u64_stats_read(p: &hw_stats->stats.bstats[i].packets); |
390 | sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; |
391 | sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; |
392 | sch->qstats.drops += hw_stats->stats.qstats[i].drops; |
393 | sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; |
394 | sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; |
395 | } |
396 | _bstats_update(bstats: &sch->bstats, bytes, packets); |
397 | sch_tree_unlock(q: sch); |
398 | |
399 | kfree(objp: hw_stats); |
400 | return ret; |
401 | } |
402 | |
403 | static inline void gred_destroy_vq(struct gred_sched_data *q) |
404 | { |
405 | kfree(objp: q); |
406 | } |
407 | |
408 | static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps, |
409 | struct netlink_ext_ack *extack) |
410 | { |
411 | struct gred_sched *table = qdisc_priv(sch); |
412 | struct tc_gred_sopt *sopt; |
413 | bool red_flags_changed; |
414 | int i; |
415 | |
416 | if (!dps) |
417 | return -EINVAL; |
418 | |
419 | sopt = nla_data(nla: dps); |
420 | |
421 | if (sopt->DPs > MAX_DPs) { |
422 | NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high" ); |
423 | return -EINVAL; |
424 | } |
425 | if (sopt->DPs == 0) { |
426 | NL_SET_ERR_MSG_MOD(extack, |
427 | "number of virtual queues can't be 0" ); |
428 | return -EINVAL; |
429 | } |
430 | if (sopt->def_DP >= sopt->DPs) { |
431 | NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count" ); |
432 | return -EINVAL; |
433 | } |
434 | if (sopt->flags && gred_per_vq_red_flags_used(table)) { |
435 | NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used" ); |
436 | return -EINVAL; |
437 | } |
438 | |
439 | sch_tree_lock(q: sch); |
440 | table->DPs = sopt->DPs; |
441 | table->def = sopt->def_DP; |
442 | red_flags_changed = table->red_flags != sopt->flags; |
443 | table->red_flags = sopt->flags; |
444 | |
445 | /* |
446 | * Every entry point to GRED is synchronized with the above code |
447 | * and the DP is checked against DPs, i.e. shadowed VQs can no |
448 | * longer be found so we can unlock right here. |
449 | */ |
450 | sch_tree_unlock(q: sch); |
451 | |
452 | if (sopt->grio) { |
453 | gred_enable_rio_mode(table); |
454 | gred_disable_wred_mode(table); |
455 | if (gred_wred_mode_check(sch)) |
456 | gred_enable_wred_mode(table); |
457 | } else { |
458 | gred_disable_rio_mode(table); |
459 | gred_disable_wred_mode(table); |
460 | } |
461 | |
462 | if (red_flags_changed) |
463 | for (i = 0; i < table->DPs; i++) |
464 | if (table->tab[i]) |
465 | table->tab[i]->red_flags = |
466 | table->red_flags & GRED_VQ_RED_FLAGS; |
467 | |
468 | for (i = table->DPs; i < MAX_DPs; i++) { |
469 | if (table->tab[i]) { |
470 | pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n" , |
471 | i); |
472 | gred_destroy_vq(q: table->tab[i]); |
473 | table->tab[i] = NULL; |
474 | } |
475 | } |
476 | |
477 | gred_offload(sch, command: TC_GRED_REPLACE); |
478 | return 0; |
479 | } |
480 | |
481 | static inline int gred_change_vq(struct Qdisc *sch, int dp, |
482 | struct tc_gred_qopt *ctl, int prio, |
483 | u8 *stab, u32 max_P, |
484 | struct gred_sched_data **prealloc, |
485 | struct netlink_ext_ack *extack) |
486 | { |
487 | struct gred_sched *table = qdisc_priv(sch); |
488 | struct gred_sched_data *q = table->tab[dp]; |
489 | |
490 | if (!red_check_params(qth_min: ctl->qth_min, qth_max: ctl->qth_max, Wlog: ctl->Wlog, Scell_log: ctl->Scell_log, stab)) { |
491 | NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters" ); |
492 | return -EINVAL; |
493 | } |
494 | |
495 | if (!q) { |
496 | table->tab[dp] = q = *prealloc; |
497 | *prealloc = NULL; |
498 | if (!q) |
499 | return -ENOMEM; |
500 | q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS; |
501 | } |
502 | |
503 | q->DP = dp; |
504 | q->prio = prio; |
505 | if (ctl->limit > sch->limit) |
506 | q->limit = sch->limit; |
507 | else |
508 | q->limit = ctl->limit; |
509 | |
510 | if (q->backlog == 0) |
511 | red_end_of_idle_period(v: &q->vars); |
512 | |
513 | red_set_parms(p: &q->parms, |
514 | qth_min: ctl->qth_min, qth_max: ctl->qth_max, Wlog: ctl->Wlog, Plog: ctl->Plog, |
515 | Scell_log: ctl->Scell_log, stab, max_P); |
516 | red_set_vars(v: &q->vars); |
517 | return 0; |
518 | } |
519 | |
520 | static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = { |
521 | [TCA_GRED_VQ_DP] = { .type = NLA_U32 }, |
522 | [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 }, |
523 | }; |
524 | |
525 | static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = { |
526 | [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED }, |
527 | }; |
528 | |
529 | static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { |
530 | [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, |
531 | [TCA_GRED_STAB] = { .len = 256 }, |
532 | [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, |
533 | [TCA_GRED_MAX_P] = { .type = NLA_U32 }, |
534 | [TCA_GRED_LIMIT] = { .type = NLA_U32 }, |
535 | [TCA_GRED_VQ_LIST] = { .type = NLA_NESTED }, |
536 | }; |
537 | |
538 | static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) |
539 | { |
540 | struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; |
541 | u32 dp; |
542 | |
543 | nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, nla: entry, |
544 | policy: gred_vq_policy, NULL); |
545 | |
546 | dp = nla_get_u32(nla: tb[TCA_GRED_VQ_DP]); |
547 | |
548 | if (tb[TCA_GRED_VQ_FLAGS]) |
549 | table->tab[dp]->red_flags = nla_get_u32(nla: tb[TCA_GRED_VQ_FLAGS]); |
550 | } |
551 | |
552 | static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs) |
553 | { |
554 | const struct nlattr *attr; |
555 | int rem; |
556 | |
557 | nla_for_each_nested(attr, vqs, rem) { |
558 | switch (nla_type(nla: attr)) { |
559 | case TCA_GRED_VQ_ENTRY: |
560 | gred_vq_apply(table, entry: attr); |
561 | break; |
562 | } |
563 | } |
564 | } |
565 | |
566 | static int gred_vq_validate(struct gred_sched *table, u32 cdp, |
567 | const struct nlattr *entry, |
568 | struct netlink_ext_ack *extack) |
569 | { |
570 | struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; |
571 | int err; |
572 | u32 dp; |
573 | |
574 | err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, nla: entry, |
575 | policy: gred_vq_policy, extack); |
576 | if (err < 0) |
577 | return err; |
578 | |
579 | if (!tb[TCA_GRED_VQ_DP]) { |
580 | NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified" ); |
581 | return -EINVAL; |
582 | } |
583 | dp = nla_get_u32(nla: tb[TCA_GRED_VQ_DP]); |
584 | if (dp >= table->DPs) { |
585 | NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds" ); |
586 | return -EINVAL; |
587 | } |
588 | if (dp != cdp && !table->tab[dp]) { |
589 | NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated" ); |
590 | return -EINVAL; |
591 | } |
592 | |
593 | if (tb[TCA_GRED_VQ_FLAGS]) { |
594 | u32 red_flags = nla_get_u32(nla: tb[TCA_GRED_VQ_FLAGS]); |
595 | |
596 | if (table->red_flags && table->red_flags != red_flags) { |
597 | NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used" ); |
598 | return -EINVAL; |
599 | } |
600 | if (red_flags & ~GRED_VQ_RED_FLAGS) { |
601 | NL_SET_ERR_MSG_MOD(extack, |
602 | "invalid RED flags specified" ); |
603 | return -EINVAL; |
604 | } |
605 | } |
606 | |
607 | return 0; |
608 | } |
609 | |
610 | static int gred_vqs_validate(struct gred_sched *table, u32 cdp, |
611 | struct nlattr *vqs, struct netlink_ext_ack *extack) |
612 | { |
613 | const struct nlattr *attr; |
614 | int rem, err; |
615 | |
616 | err = nla_validate_nested_deprecated(start: vqs, TCA_GRED_VQ_ENTRY_MAX, |
617 | policy: gred_vqe_policy, extack); |
618 | if (err < 0) |
619 | return err; |
620 | |
621 | nla_for_each_nested(attr, vqs, rem) { |
622 | switch (nla_type(nla: attr)) { |
623 | case TCA_GRED_VQ_ENTRY: |
624 | err = gred_vq_validate(table, cdp, entry: attr, extack); |
625 | if (err) |
626 | return err; |
627 | break; |
628 | default: |
629 | NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes" ); |
630 | return -EINVAL; |
631 | } |
632 | } |
633 | |
634 | if (rem > 0) { |
635 | NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list" ); |
636 | return -EINVAL; |
637 | } |
638 | |
639 | return 0; |
640 | } |
641 | |
642 | static int gred_change(struct Qdisc *sch, struct nlattr *opt, |
643 | struct netlink_ext_ack *extack) |
644 | { |
645 | struct gred_sched *table = qdisc_priv(sch); |
646 | struct tc_gred_qopt *ctl; |
647 | struct nlattr *tb[TCA_GRED_MAX + 1]; |
648 | int err, prio = GRED_DEF_PRIO; |
649 | u8 *stab; |
650 | u32 max_P; |
651 | struct gred_sched_data *prealloc; |
652 | |
653 | err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, nla: opt, policy: gred_policy, |
654 | extack); |
655 | if (err < 0) |
656 | return err; |
657 | |
658 | if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { |
659 | if (tb[TCA_GRED_LIMIT] != NULL) |
660 | sch->limit = nla_get_u32(nla: tb[TCA_GRED_LIMIT]); |
661 | return gred_change_table_def(sch, dps: tb[TCA_GRED_DPS], extack); |
662 | } |
663 | |
664 | if (tb[TCA_GRED_PARMS] == NULL || |
665 | tb[TCA_GRED_STAB] == NULL || |
666 | tb[TCA_GRED_LIMIT] != NULL) { |
667 | NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time" ); |
668 | return -EINVAL; |
669 | } |
670 | |
671 | max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(nla: tb[TCA_GRED_MAX_P]) : 0; |
672 | |
673 | ctl = nla_data(nla: tb[TCA_GRED_PARMS]); |
674 | stab = nla_data(nla: tb[TCA_GRED_STAB]); |
675 | |
676 | if (ctl->DP >= table->DPs) { |
677 | NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count" ); |
678 | return -EINVAL; |
679 | } |
680 | |
681 | if (tb[TCA_GRED_VQ_LIST]) { |
682 | err = gred_vqs_validate(table, cdp: ctl->DP, vqs: tb[TCA_GRED_VQ_LIST], |
683 | extack); |
684 | if (err) |
685 | return err; |
686 | } |
687 | |
688 | if (gred_rio_mode(table)) { |
689 | if (ctl->prio == 0) { |
690 | int def_prio = GRED_DEF_PRIO; |
691 | |
692 | if (table->tab[table->def]) |
693 | def_prio = table->tab[table->def]->prio; |
694 | |
695 | printk(KERN_DEBUG "GRED: DP %u does not have a prio " |
696 | "setting default to %d\n" , ctl->DP, def_prio); |
697 | |
698 | prio = def_prio; |
699 | } else |
700 | prio = ctl->prio; |
701 | } |
702 | |
703 | prealloc = kzalloc(size: sizeof(*prealloc), GFP_KERNEL); |
704 | sch_tree_lock(q: sch); |
705 | |
706 | err = gred_change_vq(sch, dp: ctl->DP, ctl, prio, stab, max_P, prealloc: &prealloc, |
707 | extack); |
708 | if (err < 0) |
709 | goto err_unlock_free; |
710 | |
711 | if (tb[TCA_GRED_VQ_LIST]) |
712 | gred_vqs_apply(table, vqs: tb[TCA_GRED_VQ_LIST]); |
713 | |
714 | if (gred_rio_mode(table)) { |
715 | gred_disable_wred_mode(table); |
716 | if (gred_wred_mode_check(sch)) |
717 | gred_enable_wred_mode(table); |
718 | } |
719 | |
720 | sch_tree_unlock(q: sch); |
721 | kfree(objp: prealloc); |
722 | |
723 | gred_offload(sch, command: TC_GRED_REPLACE); |
724 | return 0; |
725 | |
726 | err_unlock_free: |
727 | sch_tree_unlock(q: sch); |
728 | kfree(objp: prealloc); |
729 | return err; |
730 | } |
731 | |
732 | static int gred_init(struct Qdisc *sch, struct nlattr *opt, |
733 | struct netlink_ext_ack *extack) |
734 | { |
735 | struct gred_sched *table = qdisc_priv(sch); |
736 | struct nlattr *tb[TCA_GRED_MAX + 1]; |
737 | int err; |
738 | |
739 | if (!opt) |
740 | return -EINVAL; |
741 | |
742 | err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, nla: opt, policy: gred_policy, |
743 | extack); |
744 | if (err < 0) |
745 | return err; |
746 | |
747 | if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) { |
748 | NL_SET_ERR_MSG_MOD(extack, |
749 | "virtual queue configuration can't be specified at initialization time" ); |
750 | return -EINVAL; |
751 | } |
752 | |
753 | if (tb[TCA_GRED_LIMIT]) |
754 | sch->limit = nla_get_u32(nla: tb[TCA_GRED_LIMIT]); |
755 | else |
756 | sch->limit = qdisc_dev(qdisc: sch)->tx_queue_len |
757 | * psched_mtu(dev: qdisc_dev(qdisc: sch)); |
758 | |
759 | if (qdisc_dev(qdisc: sch)->netdev_ops->ndo_setup_tc) { |
760 | table->opt = kzalloc(size: sizeof(*table->opt), GFP_KERNEL); |
761 | if (!table->opt) |
762 | return -ENOMEM; |
763 | } |
764 | |
765 | return gred_change_table_def(sch, dps: tb[TCA_GRED_DPS], extack); |
766 | } |
767 | |
768 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) |
769 | { |
770 | struct gred_sched *table = qdisc_priv(sch); |
771 | struct nlattr *parms, *vqs, *opts = NULL; |
772 | int i; |
773 | u32 max_p[MAX_DPs]; |
774 | struct tc_gred_sopt sopt = { |
775 | .DPs = table->DPs, |
776 | .def_DP = table->def, |
777 | .grio = gred_rio_mode(table), |
778 | .flags = table->red_flags, |
779 | }; |
780 | |
781 | if (gred_offload_dump_stats(sch)) |
782 | goto nla_put_failure; |
783 | |
784 | opts = nla_nest_start_noflag(skb, attrtype: TCA_OPTIONS); |
785 | if (opts == NULL) |
786 | goto nla_put_failure; |
787 | if (nla_put(skb, attrtype: TCA_GRED_DPS, attrlen: sizeof(sopt), data: &sopt)) |
788 | goto nla_put_failure; |
789 | |
790 | for (i = 0; i < MAX_DPs; i++) { |
791 | struct gred_sched_data *q = table->tab[i]; |
792 | |
793 | max_p[i] = q ? q->parms.max_P : 0; |
794 | } |
795 | if (nla_put(skb, attrtype: TCA_GRED_MAX_P, attrlen: sizeof(max_p), data: max_p)) |
796 | goto nla_put_failure; |
797 | |
798 | if (nla_put_u32(skb, attrtype: TCA_GRED_LIMIT, value: sch->limit)) |
799 | goto nla_put_failure; |
800 | |
801 | /* Old style all-in-one dump of VQs */ |
802 | parms = nla_nest_start_noflag(skb, attrtype: TCA_GRED_PARMS); |
803 | if (parms == NULL) |
804 | goto nla_put_failure; |
805 | |
806 | for (i = 0; i < MAX_DPs; i++) { |
807 | struct gred_sched_data *q = table->tab[i]; |
808 | struct tc_gred_qopt opt; |
809 | unsigned long qavg; |
810 | |
811 | memset(&opt, 0, sizeof(opt)); |
812 | |
813 | if (!q) { |
814 | /* hack -- fix at some point with proper message |
815 | This is how we indicate to tc that there is no VQ |
816 | at this DP */ |
817 | |
818 | opt.DP = MAX_DPs + i; |
819 | goto append_opt; |
820 | } |
821 | |
822 | opt.limit = q->limit; |
823 | opt.DP = q->DP; |
824 | opt.backlog = gred_backlog(table, q, sch); |
825 | opt.prio = q->prio; |
826 | opt.qth_min = q->parms.qth_min >> q->parms.Wlog; |
827 | opt.qth_max = q->parms.qth_max >> q->parms.Wlog; |
828 | opt.Wlog = q->parms.Wlog; |
829 | opt.Plog = q->parms.Plog; |
830 | opt.Scell_log = q->parms.Scell_log; |
831 | opt.early = q->stats.prob_drop; |
832 | opt.forced = q->stats.forced_drop; |
833 | opt.pdrop = q->stats.pdrop; |
834 | opt.packets = q->packetsin; |
835 | opt.bytesin = q->bytesin; |
836 | |
837 | if (gred_wred_mode(table)) |
838 | gred_load_wred_set(table, q); |
839 | |
840 | qavg = red_calc_qavg(p: &q->parms, v: &q->vars, |
841 | backlog: q->vars.qavg >> q->parms.Wlog); |
842 | opt.qave = qavg >> q->parms.Wlog; |
843 | |
844 | append_opt: |
845 | if (nla_append(skb, attrlen: sizeof(opt), data: &opt) < 0) |
846 | goto nla_put_failure; |
847 | } |
848 | |
849 | nla_nest_end(skb, start: parms); |
850 | |
851 | /* Dump the VQs again, in more structured way */ |
852 | vqs = nla_nest_start_noflag(skb, attrtype: TCA_GRED_VQ_LIST); |
853 | if (!vqs) |
854 | goto nla_put_failure; |
855 | |
856 | for (i = 0; i < MAX_DPs; i++) { |
857 | struct gred_sched_data *q = table->tab[i]; |
858 | struct nlattr *vq; |
859 | |
860 | if (!q) |
861 | continue; |
862 | |
863 | vq = nla_nest_start_noflag(skb, attrtype: TCA_GRED_VQ_ENTRY); |
864 | if (!vq) |
865 | goto nla_put_failure; |
866 | |
867 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_DP, value: q->DP)) |
868 | goto nla_put_failure; |
869 | |
870 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_FLAGS, value: q->red_flags)) |
871 | goto nla_put_failure; |
872 | |
873 | /* Stats */ |
874 | if (nla_put_u64_64bit(skb, attrtype: TCA_GRED_VQ_STAT_BYTES, value: q->bytesin, |
875 | padattr: TCA_GRED_VQ_PAD)) |
876 | goto nla_put_failure; |
877 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_PACKETS, value: q->packetsin)) |
878 | goto nla_put_failure; |
879 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_BACKLOG, |
880 | value: gred_backlog(table, q, sch))) |
881 | goto nla_put_failure; |
882 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_PROB_DROP, |
883 | value: q->stats.prob_drop)) |
884 | goto nla_put_failure; |
885 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_PROB_MARK, |
886 | value: q->stats.prob_mark)) |
887 | goto nla_put_failure; |
888 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_FORCED_DROP, |
889 | value: q->stats.forced_drop)) |
890 | goto nla_put_failure; |
891 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_FORCED_MARK, |
892 | value: q->stats.forced_mark)) |
893 | goto nla_put_failure; |
894 | if (nla_put_u32(skb, attrtype: TCA_GRED_VQ_STAT_PDROP, value: q->stats.pdrop)) |
895 | goto nla_put_failure; |
896 | |
897 | nla_nest_end(skb, start: vq); |
898 | } |
899 | nla_nest_end(skb, start: vqs); |
900 | |
901 | return nla_nest_end(skb, start: opts); |
902 | |
903 | nla_put_failure: |
904 | nla_nest_cancel(skb, start: opts); |
905 | return -EMSGSIZE; |
906 | } |
907 | |
908 | static void gred_destroy(struct Qdisc *sch) |
909 | { |
910 | struct gred_sched *table = qdisc_priv(sch); |
911 | int i; |
912 | |
913 | for (i = 0; i < table->DPs; i++) |
914 | gred_destroy_vq(q: table->tab[i]); |
915 | |
916 | gred_offload(sch, command: TC_GRED_DESTROY); |
917 | kfree(objp: table->opt); |
918 | } |
919 | |
920 | static struct Qdisc_ops gred_qdisc_ops __read_mostly = { |
921 | .id = "gred" , |
922 | .priv_size = sizeof(struct gred_sched), |
923 | .enqueue = gred_enqueue, |
924 | .dequeue = gred_dequeue, |
925 | .peek = qdisc_peek_head, |
926 | .init = gred_init, |
927 | .reset = gred_reset, |
928 | .destroy = gred_destroy, |
929 | .change = gred_change, |
930 | .dump = gred_dump, |
931 | .owner = THIS_MODULE, |
932 | }; |
933 | |
934 | static int __init gred_module_init(void) |
935 | { |
936 | return register_qdisc(qops: &gred_qdisc_ops); |
937 | } |
938 | |
939 | static void __exit gred_module_exit(void) |
940 | { |
941 | unregister_qdisc(qops: &gred_qdisc_ops); |
942 | } |
943 | |
944 | module_init(gred_module_init) |
945 | module_exit(gred_module_exit) |
946 | |
947 | MODULE_LICENSE("GPL" ); |
948 | |