1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * net/sched/sch_prio.c Simple 3-band priority "scheduler". |
4 | * |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | * Fixes: 19990609: J Hadi Salim <hadi@nortelnetworks.com>: |
7 | * Init -- EINVAL when opt undefined |
8 | */ |
9 | |
10 | #include <linux/module.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/types.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/string.h> |
15 | #include <linux/errno.h> |
16 | #include <linux/skbuff.h> |
17 | #include <net/netlink.h> |
18 | #include <net/pkt_sched.h> |
19 | #include <net/pkt_cls.h> |
20 | |
21 | struct prio_sched_data { |
22 | int bands; |
23 | struct tcf_proto __rcu *filter_list; |
24 | struct tcf_block *block; |
25 | u8 prio2band[TC_PRIO_MAX+1]; |
26 | struct Qdisc *queues[TCQ_PRIO_BANDS]; |
27 | }; |
28 | |
29 | |
30 | static struct Qdisc * |
31 | prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) |
32 | { |
33 | struct prio_sched_data *q = qdisc_priv(sch); |
34 | u32 band = skb->priority; |
35 | struct tcf_result res; |
36 | struct tcf_proto *fl; |
37 | int err; |
38 | |
39 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; |
40 | if (TC_H_MAJ(skb->priority) != sch->handle) { |
41 | fl = rcu_dereference_bh(q->filter_list); |
42 | err = tcf_classify(skb, NULL, tp: fl, res: &res, compat_mode: false); |
43 | #ifdef CONFIG_NET_CLS_ACT |
44 | switch (err) { |
45 | case TC_ACT_STOLEN: |
46 | case TC_ACT_QUEUED: |
47 | case TC_ACT_TRAP: |
48 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; |
49 | fallthrough; |
50 | case TC_ACT_SHOT: |
51 | return NULL; |
52 | } |
53 | #endif |
54 | if (!fl || err < 0) { |
55 | if (TC_H_MAJ(band)) |
56 | band = 0; |
57 | return q->queues[q->prio2band[band & TC_PRIO_MAX]]; |
58 | } |
59 | band = res.classid; |
60 | } |
61 | band = TC_H_MIN(band) - 1; |
62 | if (band >= q->bands) |
63 | return q->queues[q->prio2band[0]]; |
64 | |
65 | return q->queues[band]; |
66 | } |
67 | |
68 | static int |
69 | prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) |
70 | { |
71 | unsigned int len = qdisc_pkt_len(skb); |
72 | struct Qdisc *qdisc; |
73 | int ret; |
74 | |
75 | qdisc = prio_classify(skb, sch, qerr: &ret); |
76 | #ifdef CONFIG_NET_CLS_ACT |
77 | if (qdisc == NULL) { |
78 | |
79 | if (ret & __NET_XMIT_BYPASS) |
80 | qdisc_qstats_drop(sch); |
81 | __qdisc_drop(skb, to_free); |
82 | return ret; |
83 | } |
84 | #endif |
85 | |
86 | ret = qdisc_enqueue(skb, sch: qdisc, to_free); |
87 | if (ret == NET_XMIT_SUCCESS) { |
88 | sch->qstats.backlog += len; |
89 | sch->q.qlen++; |
90 | return NET_XMIT_SUCCESS; |
91 | } |
92 | if (net_xmit_drop_count(ret)) |
93 | qdisc_qstats_drop(sch); |
94 | return ret; |
95 | } |
96 | |
97 | static struct sk_buff *prio_peek(struct Qdisc *sch) |
98 | { |
99 | struct prio_sched_data *q = qdisc_priv(sch); |
100 | int prio; |
101 | |
102 | for (prio = 0; prio < q->bands; prio++) { |
103 | struct Qdisc *qdisc = q->queues[prio]; |
104 | struct sk_buff *skb = qdisc->ops->peek(qdisc); |
105 | if (skb) |
106 | return skb; |
107 | } |
108 | return NULL; |
109 | } |
110 | |
111 | static struct sk_buff *prio_dequeue(struct Qdisc *sch) |
112 | { |
113 | struct prio_sched_data *q = qdisc_priv(sch); |
114 | int prio; |
115 | |
116 | for (prio = 0; prio < q->bands; prio++) { |
117 | struct Qdisc *qdisc = q->queues[prio]; |
118 | struct sk_buff *skb = qdisc_dequeue_peeked(sch: qdisc); |
119 | if (skb) { |
120 | qdisc_bstats_update(sch, skb); |
121 | qdisc_qstats_backlog_dec(sch, skb); |
122 | sch->q.qlen--; |
123 | return skb; |
124 | } |
125 | } |
126 | return NULL; |
127 | |
128 | } |
129 | |
130 | static void |
131 | prio_reset(struct Qdisc *sch) |
132 | { |
133 | int prio; |
134 | struct prio_sched_data *q = qdisc_priv(sch); |
135 | |
136 | for (prio = 0; prio < q->bands; prio++) |
137 | qdisc_reset(qdisc: q->queues[prio]); |
138 | } |
139 | |
140 | static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) |
141 | { |
142 | struct net_device *dev = qdisc_dev(qdisc: sch); |
143 | struct tc_prio_qopt_offload opt = { |
144 | .handle = sch->handle, |
145 | .parent = sch->parent, |
146 | }; |
147 | |
148 | if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) |
149 | return -EOPNOTSUPP; |
150 | |
151 | if (qopt) { |
152 | opt.command = TC_PRIO_REPLACE; |
153 | opt.replace_params.bands = qopt->bands; |
154 | memcpy(&opt.replace_params.priomap, qopt->priomap, |
155 | TC_PRIO_MAX + 1); |
156 | opt.replace_params.qstats = &sch->qstats; |
157 | } else { |
158 | opt.command = TC_PRIO_DESTROY; |
159 | } |
160 | |
161 | return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt); |
162 | } |
163 | |
164 | static void |
165 | prio_destroy(struct Qdisc *sch) |
166 | { |
167 | int prio; |
168 | struct prio_sched_data *q = qdisc_priv(sch); |
169 | |
170 | tcf_block_put(block: q->block); |
171 | prio_offload(sch, NULL); |
172 | for (prio = 0; prio < q->bands; prio++) |
173 | qdisc_put(qdisc: q->queues[prio]); |
174 | } |
175 | |
176 | static int prio_tune(struct Qdisc *sch, struct nlattr *opt, |
177 | struct netlink_ext_ack *extack) |
178 | { |
179 | struct prio_sched_data *q = qdisc_priv(sch); |
180 | struct Qdisc *queues[TCQ_PRIO_BANDS]; |
181 | int oldbands = q->bands, i; |
182 | struct tc_prio_qopt *qopt; |
183 | |
184 | if (nla_len(nla: opt) < sizeof(*qopt)) |
185 | return -EINVAL; |
186 | qopt = nla_data(nla: opt); |
187 | |
188 | if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < TCQ_MIN_PRIO_BANDS) |
189 | return -EINVAL; |
190 | |
191 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
192 | if (qopt->priomap[i] >= qopt->bands) |
193 | return -EINVAL; |
194 | } |
195 | |
196 | /* Before commit, make sure we can allocate all new qdiscs */ |
197 | for (i = oldbands; i < qopt->bands; i++) { |
198 | queues[i] = qdisc_create_dflt(dev_queue: sch->dev_queue, ops: &pfifo_qdisc_ops, |
199 | TC_H_MAKE(sch->handle, i + 1), |
200 | extack); |
201 | if (!queues[i]) { |
202 | while (i > oldbands) |
203 | qdisc_put(qdisc: queues[--i]); |
204 | return -ENOMEM; |
205 | } |
206 | } |
207 | |
208 | prio_offload(sch, qopt); |
209 | sch_tree_lock(q: sch); |
210 | q->bands = qopt->bands; |
211 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); |
212 | |
213 | for (i = q->bands; i < oldbands; i++) |
214 | qdisc_tree_flush_backlog(sch: q->queues[i]); |
215 | |
216 | for (i = oldbands; i < q->bands; i++) { |
217 | q->queues[i] = queues[i]; |
218 | if (q->queues[i] != &noop_qdisc) |
219 | qdisc_hash_add(q: q->queues[i], invisible: true); |
220 | } |
221 | |
222 | sch_tree_unlock(q: sch); |
223 | |
224 | for (i = q->bands; i < oldbands; i++) |
225 | qdisc_put(qdisc: q->queues[i]); |
226 | return 0; |
227 | } |
228 | |
229 | static int prio_init(struct Qdisc *sch, struct nlattr *opt, |
230 | struct netlink_ext_ack *extack) |
231 | { |
232 | struct prio_sched_data *q = qdisc_priv(sch); |
233 | int err; |
234 | |
235 | if (!opt) |
236 | return -EINVAL; |
237 | |
238 | err = tcf_block_get(p_block: &q->block, p_filter_chain: &q->filter_list, q: sch, extack); |
239 | if (err) |
240 | return err; |
241 | |
242 | return prio_tune(sch, opt, extack); |
243 | } |
244 | |
245 | static int prio_dump_offload(struct Qdisc *sch) |
246 | { |
247 | struct tc_prio_qopt_offload hw_stats = { |
248 | .command = TC_PRIO_STATS, |
249 | .handle = sch->handle, |
250 | .parent = sch->parent, |
251 | { |
252 | .stats = { |
253 | .bstats = &sch->bstats, |
254 | .qstats = &sch->qstats, |
255 | }, |
256 | }, |
257 | }; |
258 | |
259 | return qdisc_offload_dump_helper(q: sch, type: TC_SETUP_QDISC_PRIO, type_data: &hw_stats); |
260 | } |
261 | |
262 | static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) |
263 | { |
264 | struct prio_sched_data *q = qdisc_priv(sch); |
265 | unsigned char *b = skb_tail_pointer(skb); |
266 | struct tc_prio_qopt opt; |
267 | int err; |
268 | |
269 | opt.bands = q->bands; |
270 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); |
271 | |
272 | err = prio_dump_offload(sch); |
273 | if (err) |
274 | goto nla_put_failure; |
275 | |
276 | if (nla_put(skb, attrtype: TCA_OPTIONS, attrlen: sizeof(opt), data: &opt)) |
277 | goto nla_put_failure; |
278 | |
279 | return skb->len; |
280 | |
281 | nla_put_failure: |
282 | nlmsg_trim(skb, mark: b); |
283 | return -1; |
284 | } |
285 | |
286 | static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, |
287 | struct Qdisc **old, struct netlink_ext_ack *extack) |
288 | { |
289 | struct prio_sched_data *q = qdisc_priv(sch); |
290 | struct tc_prio_qopt_offload graft_offload; |
291 | unsigned long band = arg - 1; |
292 | |
293 | if (!new) { |
294 | new = qdisc_create_dflt(dev_queue: sch->dev_queue, ops: &pfifo_qdisc_ops, |
295 | TC_H_MAKE(sch->handle, arg), extack); |
296 | if (!new) |
297 | new = &noop_qdisc; |
298 | else |
299 | qdisc_hash_add(q: new, invisible: true); |
300 | } |
301 | |
302 | *old = qdisc_replace(sch, new, pold: &q->queues[band]); |
303 | |
304 | graft_offload.handle = sch->handle; |
305 | graft_offload.parent = sch->parent; |
306 | graft_offload.graft_params.band = band; |
307 | graft_offload.graft_params.child_handle = new->handle; |
308 | graft_offload.command = TC_PRIO_GRAFT; |
309 | |
310 | qdisc_offload_graft_helper(dev: qdisc_dev(qdisc: sch), sch, new, old: *old, |
311 | type: TC_SETUP_QDISC_PRIO, type_data: &graft_offload, |
312 | extack); |
313 | return 0; |
314 | } |
315 | |
316 | static struct Qdisc * |
317 | prio_leaf(struct Qdisc *sch, unsigned long arg) |
318 | { |
319 | struct prio_sched_data *q = qdisc_priv(sch); |
320 | unsigned long band = arg - 1; |
321 | |
322 | return q->queues[band]; |
323 | } |
324 | |
325 | static unsigned long prio_find(struct Qdisc *sch, u32 classid) |
326 | { |
327 | struct prio_sched_data *q = qdisc_priv(sch); |
328 | unsigned long band = TC_H_MIN(classid); |
329 | |
330 | if (band - 1 >= q->bands) |
331 | return 0; |
332 | return band; |
333 | } |
334 | |
335 | static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid) |
336 | { |
337 | return prio_find(sch, classid); |
338 | } |
339 | |
340 | |
341 | static void prio_unbind(struct Qdisc *q, unsigned long cl) |
342 | { |
343 | } |
344 | |
345 | static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, |
346 | struct tcmsg *tcm) |
347 | { |
348 | struct prio_sched_data *q = qdisc_priv(sch); |
349 | |
350 | tcm->tcm_handle |= TC_H_MIN(cl); |
351 | tcm->tcm_info = q->queues[cl-1]->handle; |
352 | return 0; |
353 | } |
354 | |
355 | static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
356 | struct gnet_dump *d) |
357 | { |
358 | struct prio_sched_data *q = qdisc_priv(sch); |
359 | struct Qdisc *cl_q; |
360 | |
361 | cl_q = q->queues[cl - 1]; |
362 | if (gnet_stats_copy_basic(d, cpu: cl_q->cpu_bstats, |
363 | b: &cl_q->bstats, running: true) < 0 || |
364 | qdisc_qstats_copy(d, sch: cl_q) < 0) |
365 | return -1; |
366 | |
367 | return 0; |
368 | } |
369 | |
370 | static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) |
371 | { |
372 | struct prio_sched_data *q = qdisc_priv(sch); |
373 | int prio; |
374 | |
375 | if (arg->stop) |
376 | return; |
377 | |
378 | for (prio = 0; prio < q->bands; prio++) { |
379 | if (!tc_qdisc_stats_dump(sch, cl: prio + 1, arg)) |
380 | break; |
381 | } |
382 | } |
383 | |
384 | static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl, |
385 | struct netlink_ext_ack *extack) |
386 | { |
387 | struct prio_sched_data *q = qdisc_priv(sch); |
388 | |
389 | if (cl) |
390 | return NULL; |
391 | return q->block; |
392 | } |
393 | |
394 | static const struct Qdisc_class_ops prio_class_ops = { |
395 | .graft = prio_graft, |
396 | .leaf = prio_leaf, |
397 | .find = prio_find, |
398 | .walk = prio_walk, |
399 | .tcf_block = prio_tcf_block, |
400 | .bind_tcf = prio_bind, |
401 | .unbind_tcf = prio_unbind, |
402 | .dump = prio_dump_class, |
403 | .dump_stats = prio_dump_class_stats, |
404 | }; |
405 | |
406 | static struct Qdisc_ops prio_qdisc_ops __read_mostly = { |
407 | .next = NULL, |
408 | .cl_ops = &prio_class_ops, |
409 | .id = "prio" , |
410 | .priv_size = sizeof(struct prio_sched_data), |
411 | .enqueue = prio_enqueue, |
412 | .dequeue = prio_dequeue, |
413 | .peek = prio_peek, |
414 | .init = prio_init, |
415 | .reset = prio_reset, |
416 | .destroy = prio_destroy, |
417 | .change = prio_tune, |
418 | .dump = prio_dump, |
419 | .owner = THIS_MODULE, |
420 | }; |
421 | |
422 | static int __init prio_module_init(void) |
423 | { |
424 | return register_qdisc(qops: &prio_qdisc_ops); |
425 | } |
426 | |
427 | static void __exit prio_module_exit(void) |
428 | { |
429 | unregister_qdisc(qops: &prio_qdisc_ops); |
430 | } |
431 | |
432 | module_init(prio_module_init) |
433 | module_exit(prio_module_exit) |
434 | |
435 | MODULE_LICENSE("GPL" ); |
436 | |