sch_taprio.c source code [linux/net/sched/sch_taprio.c]

1	// SPDX-License-Identifier: GPL-2.0
2
3	/ net/sched/sch_taprio.c Time Aware Priority Scheduler*
4	*
5	* Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
6	*
7	*/
8
9	#include <linux/ethtool.h>
10	#include <linux/ethtool_netlink.h>
11	#include <linux/types.h>
12	#include <linux/slab.h>
13	#include <linux/kernel.h>
14	#include <linux/string.h>
15	#include <linux/list.h>
16	#include <linux/errno.h>
17	#include <linux/skbuff.h>
18	#include <linux/math64.h>
19	#include <linux/module.h>
20	#include <linux/spinlock.h>
21	#include <linux/rcupdate.h>
22	#include <linux/time.h>
23	#include <net/gso.h>
24	#include <net/netlink.h>
25	#include <net/pkt_sched.h>
26	#include <net/pkt_cls.h>
27	#include <net/sch_generic.h>
28	#include <net/sock.h>
29	#include <net/tcp.h>
30
31	#define TAPRIO_STAT_NOT_SET (~0ULL)
32
33	#include "sch_mqprio_lib.h"
34
35	static LIST_HEAD(taprio_list);
36	static struct static_key_false taprio_have_broken_mqprio;
37	static struct static_key_false taprio_have_working_mqprio;
38
39	#define TAPRIO_ALL_GATES_OPEN -1
40
41	#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
42	#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
43	#define TAPRIO_FLAGS_INVALID U32_MAX
44
45	struct sched_entry {
46	/ Durations between this GCL entry and the GCL entry where the*
47	* respective traffic class gate closes
48	*/
49	u64 gate_duration[TC_MAX_QUEUE];
50	atomic_t budget[TC_MAX_QUEUE];
51	/ The qdisc makes some effort so that no packet leaves*
52	* after this time
53	*/
54	ktime_t gate_close_time[TC_MAX_QUEUE];
55	struct list_head list;
56	/ Used to calculate when to advance the schedule /
57	ktime_t end_time;
58	ktime_t next_txtime;
59	int index;
60	u32 gate_mask;
61	u32 interval;
62	u8 command;
63	};
64
65	struct sched_gate_list {
66	/ Longest non-zero contiguous gate durations per traffic class,*
67	* or 0 if a traffic class gate never opens during the schedule.
68	*/
69	u64 max_open_gate_duration[TC_MAX_QUEUE];
70	u32 max_frm_len[TC_MAX_QUEUE]; / for the fast path /
71	u32 max_sdu[TC_MAX_QUEUE]; / for dump /
72	struct rcu_head rcu;
73	struct list_head entries;
74	size_t num_entries;
75	ktime_t cycle_end_time;
76	s64 cycle_time;
77	s64 cycle_time_extension;
78	s64 base_time;
79	};
80
81	struct taprio_sched {
82	struct Qdisc **qdiscs;
83	struct Qdisc *root;
84	u32 flags;
85	enum tk_offsets tk_offset;
86	int clockid;
87	bool offloaded;
88	bool detected_mqprio;
89	bool broken_mqprio;
90	atomic64_t picos_per_byte; / Using picoseconds because for 10Gbps+*
91	* speeds it's sub-nanoseconds per byte
92	*/
93
94	/ Protects the update side of the RCU protected current_entry /
95	spinlock_t current_entry_lock;
96	struct sched_entry __rcu *current_entry;
97	struct sched_gate_list __rcu *oper_sched;
98	struct sched_gate_list __rcu *admin_sched;
99	struct hrtimer advance_timer;
100	struct list_head taprio_list;
101	int cur_txq[TC_MAX_QUEUE];
102	u32 max_sdu[TC_MAX_QUEUE]; / save info from the user /
103	u32 fp[TC_QOPT_MAX_QUEUE]; / only for dump and offloading /
104	u32 txtime_delay;
105	};
106
107	struct __tc_taprio_qopt_offload {
108	refcount_t users;
109	struct tc_taprio_qopt_offload offload;
110	};
111
112	static void taprio_calculate_gate_durations(struct taprio_sched *q,
113	struct sched_gate_list *sched)
114	{
115	struct net_device *dev = qdisc_dev(qdisc: q->root);
116	int num_tc = netdev_get_num_tc(dev);
117	struct sched_entry entry, cur;
118	int tc;
119
120	list_for_each_entry(entry, &sched->entries, list) {
121	u32 gates_still_open = entry->gate_mask;
122
123	/ For each traffic class, calculate each open gate duration,*
124	* starting at this schedule entry and ending at the schedule
125	* entry containing a gate close event for that TC.
126	*/
127	cur = entry;
128
129	do {
130	if (!gates_still_open)
131	break;
132
133	for (tc = `0`; tc < num_tc; tc++) {
134	if (!(gates_still_open & BIT(tc)))
135	continue;
136
137	if (cur->gate_mask & BIT(tc))
138	entry->gate_duration[tc] += cur->interval;
139	else
140	gates_still_open &= ~BIT(tc);
141	}
142
143	cur = list_next_entry_circular(cur, &sched->entries, list);
144	} while (cur != entry);
145
146	/ Keep track of the maximum gate duration for each traffic*
147	* class, taking care to not confuse a traffic class which is
148	* temporarily closed with one that is always closed.
149	*/
150	for (tc = `0`; tc < num_tc; tc++)
151	if (entry->gate_duration[tc] &&
152	sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
153	sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
154	}
155	}
156
157	static bool taprio_entry_allows_tx(ktime_t skb_end_time,
158	struct sched_entry entry, int* tc)
159	{
160	return ktime_before(cmp1: skb_end_time, cmp2: entry->gate_close_time[tc]);
161	}
162
163	static ktime_t sched_base_time(const struct sched_gate_list *sched)
164	{
165	if (!sched)
166	return KTIME_MAX;
167
168	return ns_to_ktime(ns: sched->base_time);
169	}
170
171	static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
172	{
173	/ This pairs with WRITE_ONCE() in taprio_parse_clockid() /
174	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
175
176	switch (tk_offset) {
177	case TK_OFFS_MAX:
178	return mono;
179	default:
180	return ktime_mono_to_any(tmono: mono, offs: tk_offset);
181	}
182	}
183
184	static ktime_t taprio_get_time(const struct taprio_sched *q)
185	{
186	return taprio_mono_to_any(q, mono: ktime_get());
187	}
188
189	static void taprio_free_sched_cb(struct rcu_head *head)
190	{
191	struct sched_gate_list sched = container_of(head, struct* sched_gate_list, rcu);
192	struct sched_entry entry, n;
193
194	list_for_each_entry_safe(entry, n, &sched->entries, list) {
195	list_del(entry: &entry->list);
196	kfree(objp: entry);
197	}
198
199	kfree(objp: sched);
200	}
201
202	static void switch_schedules(struct taprio_sched *q,
203	struct sched_gate_list **admin,
204	struct sched_gate_list **oper)
205	{
206	rcu_assign_pointer(q->oper_sched, *admin);
207	rcu_assign_pointer(q->admin_sched, NULL);
208
209	if (*oper)
210	call_rcu(head: &(*oper)->rcu, func: taprio_free_sched_cb);
211
212	oper = admin;
213	*admin = NULL;
214	}
215
216	/ Get how much time has been already elapsed in the current cycle. /
217	static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
218	{
219	ktime_t time_since_sched_start;
220	s32 time_elapsed;
221
222	time_since_sched_start = ktime_sub(time, sched->base_time);
223	div_s64_rem(dividend: time_since_sched_start, divisor: sched->cycle_time, remainder: &time_elapsed);
224
225	return time_elapsed;
226	}
227
228	static ktime_t get_interval_end_time(struct sched_gate_list *sched,
229	struct sched_gate_list *admin,
230	struct sched_entry *entry,
231	ktime_t intv_start)
232	{
233	s32 cycle_elapsed = get_cycle_time_elapsed(sched, time: intv_start);
234	ktime_t intv_end, cycle_ext_end, cycle_end;
235
236	cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
237	intv_end = ktime_add_ns(intv_start, entry->interval);
238	cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
239
240	if (ktime_before(cmp1: intv_end, cmp2: cycle_end))
241	return intv_end;
242	else if (admin && admin != sched &&
243	ktime_after(cmp1: admin->base_time, cmp2: cycle_end) &&
244	ktime_before(cmp1: admin->base_time, cmp2: cycle_ext_end))
245	return admin->base_time;
246	else
247	return cycle_end;
248	}
249
250	static int length_to_duration(struct taprio_sched q, int* len)
251	{
252	return div_u64(dividend: len * atomic64_read(v: &q->picos_per_byte), PSEC_PER_NSEC);
253	}
254
255	static int duration_to_length(struct taprio_sched *q, u64 duration)
256	{
257	return div_u64(dividend: duration * PSEC_PER_NSEC, divisor: atomic64_read(v: &q->picos_per_byte));
258	}
259
260	/ Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the*
261	* q->max_sdu[] requested by the user and the max_sdu dynamically determined by
262	* the maximum open gate durations at the given link speed.
263	*/
264	static void taprio_update_queue_max_sdu(struct taprio_sched *q,
265	struct sched_gate_list *sched,
266	struct qdisc_size_table *stab)
267	{
268	struct net_device *dev = qdisc_dev(qdisc: q->root);
269	int num_tc = netdev_get_num_tc(dev);
270	u32 max_sdu_from_user;
271	u32 max_sdu_dynamic;
272	u32 max_sdu;
273	int tc;
274
275	for (tc = `0`; tc < num_tc; tc++) {
276	max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
277
278	/ TC gate never closes => keep the queueMaxSDU*
279	* selected by the user
280	*/
281	if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
282	max_sdu_dynamic = U32_MAX;
283	} else {
284	u32 max_frm_len;
285
286	max_frm_len = duration_to_length(q, duration: sched->max_open_gate_duration[tc]);
287	/ Compensate for L1 overhead from size table,*
288	* but don't let the frame size go negative
289	*/
290	if (stab) {
291	max_frm_len -= stab->szopts.overhead;
292	max_frm_len = max_t(int, max_frm_len,
293	dev->hard_header_len + `1`);
294	}
295	max_sdu_dynamic = max_frm_len - dev->hard_header_len;
296	if (max_sdu_dynamic > dev->max_mtu)
297	max_sdu_dynamic = U32_MAX;
298	}
299
300	max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
301
302	if (max_sdu != U32_MAX) {
303	sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
304	sched->max_sdu[tc] = max_sdu;
305	} else {
306	sched->max_frm_len[tc] = U32_MAX; / never oversized /
307	sched->max_sdu[tc] = `0`;
308	}
309	}
310	}
311
312	/ Returns the entry corresponding to next available interval. If*
313	* validate_interval is set, it only validates whether the timestamp occurs
314	* when the gate corresponding to the skb's traffic class is open.
315	*/
316	static struct sched_entry find_entry_to_transmit(struct* sk_buff *skb,
317	struct Qdisc *sch,
318	struct sched_gate_list *sched,
319	struct sched_gate_list *admin,
320	ktime_t time,
321	ktime_t *interval_start,
322	ktime_t *interval_end,
323	bool validate_interval)
324	{
325	ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
326	ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
327	struct sched_entry entry = NULL, entry_found = NULL;
328	struct taprio_sched *q = qdisc_priv(sch);
329	struct net_device *dev = qdisc_dev(qdisc: sch);
330	bool entry_available = false;
331	s32 cycle_elapsed;
332	int tc, n;
333
334	tc = netdev_get_prio_tc_map(dev, prio: skb->priority);
335	packet_transmit_time = length_to_duration(q, len: qdisc_pkt_len(skb));
336
337	*interval_start = `0`;
338	*interval_end = `0`;
339
340	if (!sched)
341	return NULL;
342
343	cycle = sched->cycle_time;
344	cycle_elapsed = get_cycle_time_elapsed(sched, time);
345	curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
346	cycle_end = ktime_add_ns(curr_intv_end, cycle);
347
348	list_for_each_entry(entry, &sched->entries, list) {
349	curr_intv_start = curr_intv_end;
350	curr_intv_end = get_interval_end_time(sched, admin, entry,
351	intv_start: curr_intv_start);
352
353	if (ktime_after(cmp1: curr_intv_start, cmp2: cycle_end))
354	break;
355
356	if (!(entry->gate_mask & BIT(tc)) \|\|
357	packet_transmit_time > entry->interval)
358	continue;
359
360	txtime = entry->next_txtime;
361
362	if (ktime_before(cmp1: txtime, cmp2: time) \|\| validate_interval) {
363	transmit_end_time = ktime_add_ns(time, packet_transmit_time);
364	if ((ktime_before(cmp1: curr_intv_start, cmp2: time) &&
365	ktime_before(cmp1: transmit_end_time, cmp2: curr_intv_end)) \|\|
366	(ktime_after(cmp1: curr_intv_start, cmp2: time) && !validate_interval)) {
367	entry_found = entry;
368	*interval_start = curr_intv_start;
369	*interval_end = curr_intv_end;
370	break;
371	} else if (!entry_available && !validate_interval) {
372	/ Here, we are just trying to find out the*
373	* first available interval in the next cycle.
374	*/
375	entry_available = true;
376	entry_found = entry;
377	*interval_start = ktime_add_ns(curr_intv_start, cycle);
378	*interval_end = ktime_add_ns(curr_intv_end, cycle);
379	}
380	} else if (ktime_before(cmp1: txtime, cmp2: earliest_txtime) &&
381	!entry_available) {
382	earliest_txtime = txtime;
383	entry_found = entry;
384	n = div_s64(ktime_sub(txtime, curr_intv_start), divisor: cycle);
385	interval_start = ktime_add(curr_intv_start, n cycle);
386	interval_end = ktime_add(curr_intv_end, n cycle);
387	}
388	}
389
390	return entry_found;
391	}
392
393	static bool is_valid_interval(struct sk_buff skb, struct* Qdisc *sch)
394	{
395	struct taprio_sched *q = qdisc_priv(sch);
396	struct sched_gate_list sched, admin;
397	ktime_t interval_start, interval_end;
398	struct sched_entry *entry;
399
400	rcu_read_lock();
401	sched = rcu_dereference(q->oper_sched);
402	admin = rcu_dereference(q->admin_sched);
403
404	entry = find_entry_to_transmit(skb, sch, sched, admin, time: skb->tstamp,
405	interval_start: &interval_start, interval_end: &interval_end, validate_interval: true);
406	rcu_read_unlock();
407
408	return entry;
409	}
410
411	static bool taprio_flags_valid(u32 flags)
412	{
413	/ Make sure no other flag bits are set. /
414	if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST \|
415	TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
416	return false;
417	/ txtime-assist and full offload are mutually exclusive /
418	if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
419	(flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
420	return false;
421	return true;
422	}
423
424	/ This returns the tstamp value set by TCP in terms of the set clock. /
425	static ktime_t get_tcp_tstamp(struct taprio_sched q, struct* sk_buff *skb)
426	{
427	unsigned int offset = skb_network_offset(skb);
428	const struct ipv6hdr *ipv6h;
429	const struct iphdr *iph;
430	struct ipv6hdr _ipv6h;
431
432	ipv6h = skb_header_pointer(skb, offset, len: sizeof(_ipv6h), buffer: &_ipv6h);
433	if (!ipv6h)
434	return `0`;
435
436	if (ipv6h->version == `4`) {
437	iph = (struct iphdr *)ipv6h;
438	offset += iph->ihl * `4`;
439
440	/ special-case 6in4 tunnelling, as that is a common way to get*
441	* v6 connectivity in the home
442	*/
443	if (iph->protocol == IPPROTO_IPV6) {
444	ipv6h = skb_header_pointer(skb, offset,
445	len: sizeof(_ipv6h), buffer: &_ipv6h);
446
447	if (!ipv6h \|\| ipv6h->nexthdr != IPPROTO_TCP)
448	return `0`;
449	} else if (iph->protocol != IPPROTO_TCP) {
450	return `0`;
451	}
452	} else if (ipv6h->version == `6` && ipv6h->nexthdr != IPPROTO_TCP) {
453	return `0`;
454	}
455
456	return taprio_mono_to_any(q, mono: skb->skb_mstamp_ns);
457	}
458
459	/ There are a few scenarios where we will have to modify the txtime from*
460	* what is read from next_txtime in sched_entry. They are:
461	* 1. If txtime is in the past,
462	* a. The gate for the traffic class is currently open and packet can be
463	* transmitted before it closes, schedule the packet right away.
464	* b. If the gate corresponding to the traffic class is going to open later
465	* in the cycle, set the txtime of packet to the interval start.
466	* 2. If txtime is in the future, there are packets corresponding to the
467	* current traffic class waiting to be transmitted. So, the following
468	* possibilities exist:
469	* a. We can transmit the packet before the window containing the txtime
470	* closes.
471	* b. The window might close before the transmission can be completed
472	* successfully. So, schedule the packet in the next open window.
473	*/
474	static long get_packet_txtime(struct sk_buff skb, struct* Qdisc *sch)
475	{
476	ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
477	struct taprio_sched *q = qdisc_priv(sch);
478	struct sched_gate_list sched, admin;
479	ktime_t minimum_time, now, txtime;
480	int len, packet_transmit_time;
481	struct sched_entry *entry;
482	bool sched_changed;
483
484	now = taprio_get_time(q);
485	minimum_time = ktime_add_ns(now, q->txtime_delay);
486
487	tcp_tstamp = get_tcp_tstamp(q, skb);
488	minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
489
490	rcu_read_lock();
491	admin = rcu_dereference(q->admin_sched);
492	sched = rcu_dereference(q->oper_sched);
493	if (admin && ktime_after(cmp1: minimum_time, cmp2: admin->base_time))
494	switch_schedules(q, admin: &admin, oper: &sched);
495
496	/ Until the schedule starts, all the queues are open /
497	if (!sched \|\| ktime_before(cmp1: minimum_time, cmp2: sched->base_time)) {
498	txtime = minimum_time;
499	goto done;
500	}
501
502	len = qdisc_pkt_len(skb);
503	packet_transmit_time = length_to_duration(q, len);
504
505	do {
506	sched_changed = false;
507
508	entry = find_entry_to_transmit(skb, sch, sched, admin,
509	time: minimum_time,
510	interval_start: &interval_start, interval_end: &interval_end,
511	validate_interval: false);
512	if (!entry) {
513	txtime = `0`;
514	goto done;
515	}
516
517	txtime = entry->next_txtime;
518	txtime = max_t(ktime_t, txtime, minimum_time);
519	txtime = max_t(ktime_t, txtime, interval_start);
520
521	if (admin && admin != sched &&
522	ktime_after(cmp1: txtime, cmp2: admin->base_time)) {
523	sched = admin;
524	sched_changed = true;
525	continue;
526	}
527
528	transmit_end_time = ktime_add(txtime, packet_transmit_time);
529	minimum_time = transmit_end_time;
530
531	/ Update the txtime of current entry to the next time it's*
532	* interval starts.
533	*/
534	if (ktime_after(cmp1: transmit_end_time, cmp2: interval_end))
535	entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
536	} while (sched_changed \|\| ktime_after(cmp1: transmit_end_time, cmp2: interval_end));
537
538	entry->next_txtime = transmit_end_time;
539
540	done:
541	rcu_read_unlock();
542	return txtime;
543	}
544
545	/ Devices with full offload are expected to honor this in hardware /
546	static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
547	struct sk_buff *skb)
548	{
549	struct taprio_sched *q = qdisc_priv(sch);
550	struct net_device *dev = qdisc_dev(qdisc: sch);
551	struct sched_gate_list *sched;
552	int prio = skb->priority;
553	bool exceeds = false;
554	u8 tc;
555
556	tc = netdev_get_prio_tc_map(dev, prio);
557
558	rcu_read_lock();
559	sched = rcu_dereference(q->oper_sched);
560	if (sched && skb->len > sched->max_frm_len[tc])
561	exceeds = true;
562	rcu_read_unlock();
563
564	return exceeds;
565	}
566
567	static int taprio_enqueue_one(struct sk_buff skb, struct* Qdisc *sch,
568	struct Qdisc child, struct* sk_buff **to_free)
569	{
570	struct taprio_sched *q = qdisc_priv(sch);
571
572	/ sk_flags are only safe to use on full sockets. /
573	if (skb->sk && sk_fullsock(sk: skb->sk) && sock_flag(sk: skb->sk, flag: SOCK_TXTIME)) {
574	if (!is_valid_interval(skb, sch))
575	return qdisc_drop(skb, sch, to_free);
576	} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
577	skb->tstamp = get_packet_txtime(skb, sch);
578	if (!skb->tstamp)
579	return qdisc_drop(skb, sch, to_free);
580	}
581
582	qdisc_qstats_backlog_inc(sch, skb);
583	sch->q.qlen++;
584
585	return qdisc_enqueue(skb, sch: child, to_free);
586	}
587
588	static int taprio_enqueue_segmented(struct sk_buff skb, struct* Qdisc *sch,
589	struct Qdisc *child,
590	struct sk_buff **to_free)
591	{
592	unsigned int slen = `0`, numsegs = `0`, len = qdisc_pkt_len(skb);
593	netdev_features_t features = netif_skb_features(skb);
594	struct sk_buff segs, nskb;
595	int ret;
596
597	segs = skb_gso_segment(skb, features: features & ~NETIF_F_GSO_MASK);
598	if (IS_ERR_OR_NULL(ptr: segs))
599	return qdisc_drop(skb, sch, to_free);
600
601	skb_list_walk_safe(segs, segs, nskb) {
602	skb_mark_not_on_list(skb: segs);
603	qdisc_skb_cb(skb: segs)->pkt_len = segs->len;
604	slen += segs->len;
605
606	/ FIXME: we should be segmenting to a smaller size*
607	* rather than dropping these
608	*/
609	if (taprio_skb_exceeds_queue_max_sdu(sch, skb: segs))
610	ret = qdisc_drop(skb: segs, sch, to_free);
611	else
612	ret = taprio_enqueue_one(skb: segs, sch, child, to_free);
613
614	if (ret != NET_XMIT_SUCCESS) {
615	if (net_xmit_drop_count(ret))
616	qdisc_qstats_drop(sch);
617	} else {
618	numsegs++;
619	}
620	}
621
622	if (numsegs > `1`)
623	qdisc_tree_reduce_backlog(qdisc: sch, n: `1` - numsegs, len: len - slen);
624	consume_skb(skb);
625
626	return numsegs > `0` ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
627	}
628
629	/ Will not be called in the full offload case, since the TX queues are*
630	* attached to the Qdisc created using qdisc_create_dflt()
631	*/
632	static int taprio_enqueue(struct sk_buff skb, struct* Qdisc *sch,
633	struct sk_buff **to_free)
634	{
635	struct taprio_sched *q = qdisc_priv(sch);
636	struct Qdisc *child;
637	int queue;
638
639	queue = skb_get_queue_mapping(skb);
640
641	child = q->qdiscs[queue];
642	if (unlikely(!child))
643	return qdisc_drop(skb, sch, to_free);
644
645	if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
646	/ Large packets might not be transmitted when the transmission*
647	* duration exceeds any configured interval. Therefore, segment
648	* the skb into smaller chunks. Drivers with full offload are
649	* expected to handle this in hardware.
650	*/
651	if (skb_is_gso(skb))
652	return taprio_enqueue_segmented(skb, sch, child,
653	to_free);
654
655	return qdisc_drop(skb, sch, to_free);
656	}
657
658	return taprio_enqueue_one(skb, sch, child, to_free);
659	}
660
661	static struct sk_buff taprio_peek(struct* Qdisc *sch)
662	{
663	WARN_ONCE(`1`, "taprio only supports operating as root qdisc, peek() not implemented");
664	return NULL;
665	}
666
667	static void taprio_set_budgets(struct taprio_sched *q,
668	struct sched_gate_list *sched,
669	struct sched_entry *entry)
670	{
671	struct net_device *dev = qdisc_dev(qdisc: q->root);
672	int num_tc = netdev_get_num_tc(dev);
673	int tc, budget;
674
675	for (tc = `0`; tc < num_tc; tc++) {
676	/ Traffic classes which never close have infinite budget /
677	if (entry->gate_duration[tc] == sched->cycle_time)
678	budget = INT_MAX;
679	else
680	budget = div64_u64(dividend: (u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
681	divisor: atomic64_read(v: &q->picos_per_byte));
682
683	atomic_set(v: &entry->budget[tc], i: budget);
684	}
685	}
686
687	/ When an skb is sent, it consumes from the budget of all traffic classes /
688	static int taprio_update_budgets(struct sched_entry *entry, size_t len,
689	int tc_consumed, int num_tc)
690	{
691	int tc, budget, new_budget = `0`;
692
693	for (tc = `0`; tc < num_tc; tc++) {
694	budget = atomic_read(v: &entry->budget[tc]);
695	/ Don't consume from infinite budget /
696	if (budget == INT_MAX) {
697	if (tc == tc_consumed)
698	new_budget = budget;
699	continue;
700	}
701
702	if (tc == tc_consumed)
703	new_budget = atomic_sub_return(i: len, v: &entry->budget[tc]);
704	else
705	atomic_sub(i: len, v: &entry->budget[tc]);
706	}
707
708	return new_budget;
709	}
710
711	static struct sk_buff taprio_dequeue_from_txq(struct* Qdisc sch, int* txq,
712	struct sched_entry *entry,
713	u32 gate_mask)
714	{
715	struct taprio_sched *q = qdisc_priv(sch);
716	struct net_device *dev = qdisc_dev(qdisc: sch);
717	struct Qdisc *child = q->qdiscs[txq];
718	int num_tc = netdev_get_num_tc(dev);
719	struct sk_buff *skb;
720	ktime_t guard;
721	int prio;
722	int len;
723	u8 tc;
724
725	if (unlikely(!child))
726	return NULL;
727
728	if (TXTIME_ASSIST_IS_ENABLED(q->flags))
729	goto skip_peek_checks;
730
731	skb = child->ops->peek(child);
732	if (!skb)
733	return NULL;
734
735	prio = skb->priority;
736	tc = netdev_get_prio_tc_map(dev, prio);
737
738	if (!(gate_mask & BIT(tc)))
739	return NULL;
740
741	len = qdisc_pkt_len(skb);
742	guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
743
744	/ In the case that there's no gate entry, there's no*
745	* guard band ...
746	*/
747	if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
748	!taprio_entry_allows_tx(skb_end_time: guard, entry, tc))
749	return NULL;
750
751	/ ... and no budget. /
752	if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
753	taprio_update_budgets(entry, len, tc_consumed: tc, num_tc) < `0`)
754	return NULL;
755
756	skip_peek_checks:
757	skb = child->ops->dequeue(child);
758	if (unlikely(!skb))
759	return NULL;
760
761	qdisc_bstats_update(sch, skb);
762	qdisc_qstats_backlog_dec(sch, skb);
763	sch->q.qlen--;
764
765	return skb;
766	}
767
768	static void taprio_next_tc_txq(struct net_device dev, int* tc, int *txq)
769	{
770	int offset = dev->tc_to_txq[tc].offset;
771	int count = dev->tc_to_txq[tc].count;
772
773	(*txq)++;
774	if (*txq == offset + count)
775	*txq = offset;
776	}
777
778	/ Prioritize higher traffic classes, and select among TXQs belonging to the*
779	* same TC using round robin
780	*/
781	static struct sk_buff taprio_dequeue_tc_priority(struct* Qdisc *sch,
782	struct sched_entry *entry,
783	u32 gate_mask)
784	{
785	struct taprio_sched *q = qdisc_priv(sch);
786	struct net_device *dev = qdisc_dev(qdisc: sch);
787	int num_tc = netdev_get_num_tc(dev);
788	struct sk_buff *skb;
789	int tc;
790
791	for (tc = num_tc - `1`; tc >= `0`; tc--) {
792	int first_txq = q->cur_txq[tc];
793
794	if (!(gate_mask & BIT(tc)))
795	continue;
796
797	do {
798	skb = taprio_dequeue_from_txq(sch, txq: q->cur_txq[tc],
799	entry, gate_mask);
800
801	taprio_next_tc_txq(dev, tc, txq: &q->cur_txq[tc]);
802
803	if (q->cur_txq[tc] >= dev->num_tx_queues)
804	q->cur_txq[tc] = first_txq;
805
806	if (skb)
807	return skb;
808	} while (q->cur_txq[tc] != first_txq);
809	}
810
811	return NULL;
812	}
813
814	/ Broken way of prioritizing smaller TXQ indices and ignoring the traffic*
815	* class other than to determine whether the gate is open or not
816	*/
817	static struct sk_buff taprio_dequeue_txq_priority(struct* Qdisc *sch,
818	struct sched_entry *entry,
819	u32 gate_mask)
820	{
821	struct net_device *dev = qdisc_dev(qdisc: sch);
822	struct sk_buff *skb;
823	int i;
824
825	for (i = `0`; i < dev->num_tx_queues; i++) {
826	skb = taprio_dequeue_from_txq(sch, txq: i, entry, gate_mask);
827	if (skb)
828	return skb;
829	}
830
831	return NULL;
832	}
833
834	/ Will not be called in the full offload case, since the TX queues are*
835	* attached to the Qdisc created using qdisc_create_dflt()
836	*/
837	static struct sk_buff taprio_dequeue(struct* Qdisc *sch)
838	{
839	struct taprio_sched *q = qdisc_priv(sch);
840	struct sk_buff *skb = NULL;
841	struct sched_entry *entry;
842	u32 gate_mask;
843
844	rcu_read_lock();
845	entry = rcu_dereference(q->current_entry);
846	/ if there's no entry, it means that the schedule didn't*
847	* start yet, so force all gates to be open, this is in
848	* accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
849	* "AdminGateStates"
850	*/
851	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
852	if (!gate_mask)
853	goto done;
854
855	if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
856	!static_branch_likely(&taprio_have_working_mqprio)) {
857	/ Single NIC kind which is broken /
858	skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
859	} else if (static_branch_likely(&taprio_have_working_mqprio) &&
860	!static_branch_unlikely(&taprio_have_broken_mqprio)) {
861	/ Single NIC kind which prioritizes properly /
862	skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
863	} else {
864	/ Mixed NIC kinds present in system, need dynamic testing /
865	if (q->broken_mqprio)
866	skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
867	else
868	skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
869	}
870
871	done:
872	rcu_read_unlock();
873
874	return skb;
875	}
876
877	static bool should_restart_cycle(const struct sched_gate_list *oper,
878	const struct sched_entry *entry)
879	{
880	if (list_is_last(list: &entry->list, head: &oper->entries))
881	return true;
882
883	if (ktime_compare(cmp1: entry->end_time, cmp2: oper->cycle_end_time) == `0`)
884	return true;
885
886	return false;
887	}
888
889	static bool should_change_schedules(const struct sched_gate_list *admin,
890	const struct sched_gate_list *oper,
891	ktime_t end_time)
892	{
893	ktime_t next_base_time, extension_time;
894
895	if (!admin)
896	return false;
897
898	next_base_time = sched_base_time(sched: admin);
899
900	/ This is the simple case, the end_time would fall after*
901	* the next schedule base_time.
902	*/
903	if (ktime_compare(cmp1: next_base_time, cmp2: end_time) <= `0`)
904	return true;
905
906	/ This is the cycle_time_extension case, if the end_time*
907	* plus the amount that can be extended would fall after the
908	* next schedule base_time, we can extend the current schedule
909	* for that amount.
910	*/
911	extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
912
913	/ FIXME: the IEEE 802.1Q-2018 Specification isn't clear about*
914	* how precisely the extension should be made. So after
915	* conformance testing, this logic may change.
916	*/
917	if (ktime_compare(cmp1: next_base_time, cmp2: extension_time) <= `0`)
918	return true;
919
920	return false;
921	}
922
923	static enum hrtimer_restart advance_sched(struct hrtimer *timer)
924	{
925	struct taprio_sched q = container_of(timer, struct* taprio_sched,
926	advance_timer);
927	struct net_device *dev = qdisc_dev(qdisc: q->root);
928	struct sched_gate_list oper, admin;
929	int num_tc = netdev_get_num_tc(dev);
930	struct sched_entry entry, next;
931	struct Qdisc *sch = q->root;
932	ktime_t end_time;
933	int tc;
934
935	spin_lock(lock: &q->current_entry_lock);
936	entry = rcu_dereference_protected(q->current_entry,
937	lockdep_is_held(&q->current_entry_lock));
938	oper = rcu_dereference_protected(q->oper_sched,
939	lockdep_is_held(&q->current_entry_lock));
940	admin = rcu_dereference_protected(q->admin_sched,
941	lockdep_is_held(&q->current_entry_lock));
942
943	if (!oper)
944	switch_schedules(q, admin: &admin, oper: &oper);
945
946	/ This can happen in two cases: 1. this is the very first run*
947	* of this function (i.e. we weren't running any schedule
948	* previously); 2. The previous schedule just ended. The first
949	* entry of all schedules are pre-calculated during the
950	* schedule initialization.
951	*/
952	if (unlikely(!entry \|\| entry->end_time == oper->base_time)) {
953	next = list_first_entry(&oper->entries, struct sched_entry,
954	list);
955	end_time = next->end_time;
956	goto first_run;
957	}
958
959	if (should_restart_cycle(oper, entry)) {
960	next = list_first_entry(&oper->entries, struct sched_entry,
961	list);
962	oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
963	oper->cycle_time);
964	} else {
965	next = list_next_entry(entry, list);
966	}
967
968	end_time = ktime_add_ns(entry->end_time, next->interval);
969	end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
970
971	for (tc = `0`; tc < num_tc; tc++) {
972	if (next->gate_duration[tc] == oper->cycle_time)
973	next->gate_close_time[tc] = KTIME_MAX;
974	else
975	next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
976	next->gate_duration[tc]);
977	}
978
979	if (should_change_schedules(admin, oper, end_time)) {
980	/ Set things so the next time this runs, the new*
981	* schedule runs.
982	*/
983	end_time = sched_base_time(sched: admin);
984	switch_schedules(q, admin: &admin, oper: &oper);
985	}
986
987	next->end_time = end_time;
988	taprio_set_budgets(q, sched: oper, entry: next);
989
990	first_run:
991	rcu_assign_pointer(q->current_entry, next);
992	spin_unlock(lock: &q->current_entry_lock);
993
994	hrtimer_set_expires(timer: &q->advance_timer, time: end_time);
995
996	rcu_read_lock();
997	__netif_schedule(q: sch);
998	rcu_read_unlock();
999
1000	return HRTIMER_RESTART;
1001	}
1002
1003	static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + `1`] = {
1004	[TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
1005	[TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
1006	[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
1007	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
1008	};
1009
1010	static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + `1`] = {
1011	[TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 },
1012	[TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
1013	[TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
1014	TC_FP_EXPRESS,
1015	TC_FP_PREEMPTIBLE),
1016	};
1017
1018	static const struct netlink_range_validation_signed taprio_cycle_time_range = {
1019	.min = `0`,
1020	.max = INT_MAX,
1021	};
1022
1023	static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + `1`] = {
1024	[TCA_TAPRIO_ATTR_PRIOMAP] = {
1025	.len = sizeof(struct tc_mqprio_qopt)
1026	},
1027	[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
1028	[TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
1029	[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
1030	[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
1031	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
1032	NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
1033	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
1034	[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
1035	[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
1036	[TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED },
1037	};
1038
1039	static int fill_sched_entry(struct taprio_sched q, struct* nlattr **tb,
1040	struct sched_entry *entry,
1041	struct netlink_ext_ack *extack)
1042	{
1043	int min_duration = length_to_duration(q, ETH_ZLEN);
1044	u32 interval = `0`;
1045
1046	if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
1047	entry->command = nla_get_u8(
1048	nla: tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
1049
1050	if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
1051	entry->gate_mask = nla_get_u32(
1052	nla: tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
1053
1054	if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
1055	interval = nla_get_u32(
1056	nla: tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
1057
1058	/ The interval should allow at least the minimum ethernet*
1059	* frame to go out.
1060	*/
1061	if (interval < min_duration) {
1062	NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
1063	return -EINVAL;
1064	}
1065
1066	entry->interval = interval;
1067
1068	return `0`;
1069	}
1070
1071	static int parse_sched_entry(struct taprio_sched q, struct* nlattr *n,
1072	struct sched_entry entry, int* index,
1073	struct netlink_ext_ack *extack)
1074	{
1075	struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + `1`] = { };
1076	int err;
1077
1078	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, nla: n,
1079	policy: entry_policy, NULL);
1080	if (err < `0`) {
1081	NL_SET_ERR_MSG(extack, "Could not parse nested entry");
1082	return -EINVAL;
1083	}
1084
1085	entry->index = index;
1086
1087	return fill_sched_entry(q, tb, entry, extack);
1088	}
1089
1090	static int parse_sched_list(struct taprio_sched q, struct* nlattr *list,
1091	struct sched_gate_list *sched,
1092	struct netlink_ext_ack *extack)
1093	{
1094	struct nlattr *n;
1095	int err, rem;
1096	int i = `0`;
1097
1098	if (!list)
1099	return -EINVAL;
1100
1101	nla_for_each_nested(n, list, rem) {
1102	struct sched_entry *entry;
1103
1104	if (nla_type(nla: n) != TCA_TAPRIO_SCHED_ENTRY) {
1105	NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
1106	continue;
1107	}
1108
1109	entry = kzalloc(size: sizeof(*entry), GFP_KERNEL);
1110	if (!entry) {
1111	NL_SET_ERR_MSG(extack, "Not enough memory for entry");
1112	return -ENOMEM;
1113	}
1114
1115	err = parse_sched_entry(q, n, entry, index: i, extack);
1116	if (err < `0`) {
1117	kfree(objp: entry);
1118	return err;
1119	}
1120
1121	list_add_tail(new: &entry->list, head: &sched->entries);
1122	i++;
1123	}
1124
1125	sched->num_entries = i;
1126
1127	return i;
1128	}
1129
1130	static int parse_taprio_schedule(struct taprio_sched q, struct* nlattr **tb,
1131	struct sched_gate_list *new,
1132	struct netlink_ext_ack *extack)
1133	{
1134	int err = `0`;
1135
1136	if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
1137	NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
1138	return -ENOTSUPP;
1139	}
1140
1141	if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
1142	new->base_time = nla_get_s64(nla: tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
1143
1144	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
1145	new->cycle_time_extension = nla_get_s64(nla: tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
1146
1147	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
1148	new->cycle_time = nla_get_s64(nla: tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
1149
1150	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
1151	err = parse_sched_list(q, list: tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
1152	sched: new, extack);
1153	if (err < `0`)
1154	return err;
1155
1156	if (!new->cycle_time) {
1157	struct sched_entry *entry;
1158	ktime_t cycle = `0`;
1159
1160	list_for_each_entry(entry, &new->entries, list)
1161	cycle = ktime_add_ns(cycle, entry->interval);
1162
1163	if (!cycle) {
1164	NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
1165	return -EINVAL;
1166	}
1167
1168	if (cycle < `0` \|\| cycle > INT_MAX) {
1169	NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
1170	return -EINVAL;
1171	}
1172
1173	new->cycle_time = cycle;
1174	}
1175
1176	taprio_calculate_gate_durations(q, sched: new);
1177
1178	return `0`;
1179	}
1180
1181	static int taprio_parse_mqprio_opt(struct net_device *dev,
1182	struct tc_mqprio_qopt *qopt,
1183	struct netlink_ext_ack *extack,
1184	u32 taprio_flags)
1185	{
1186	bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
1187
1188	if (!qopt && !dev->num_tc) {
1189	NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
1190	return -EINVAL;
1191	}
1192
1193	/ If num_tc is already set, it means that the user already*
1194	* configured the mqprio part
1195	*/
1196	if (dev->num_tc)
1197	return `0`;
1198
1199	/ taprio imposes that traffic classes map 1:n to tx queues /
1200	if (qopt->num_tc > dev->num_tx_queues) {
1201	NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
1202	return -EINVAL;
1203	}
1204
1205	/ For some reason, in txtime-assist mode, we allow TXQ ranges for*
1206	* different TCs to overlap, and just validate the TXQ ranges.
1207	*/
1208	return mqprio_validate_qopt(dev, qopt, validate_queue_counts: true, allow_overlapping_txqs,
1209	extack);
1210	}
1211
1212	static int taprio_get_start_time(struct Qdisc *sch,
1213	struct sched_gate_list *sched,
1214	ktime_t *start)
1215	{
1216	struct taprio_sched *q = qdisc_priv(sch);
1217	ktime_t now, base, cycle;
1218	s64 n;
1219
1220	base = sched_base_time(sched);
1221	now = taprio_get_time(q);
1222
1223	if (ktime_after(cmp1: base, cmp2: now)) {
1224	*start = base;
1225	return `0`;
1226	}
1227
1228	cycle = sched->cycle_time;
1229
1230	/ The qdisc is expected to have at least one sched_entry. Moreover,*
1231	* any entry must have 'interval' > 0. Thus if the cycle time is zero,
1232	* something went really wrong. In that case, we should warn about this
1233	* inconsistent state and return error.
1234	*/
1235	if (WARN_ON(!cycle))
1236	return -EFAULT;
1237
1238	/ Schedule the start time for the beginning of the next*
1239	* cycle.
1240	*/
1241	n = div64_s64(ktime_sub_ns(now, base), divisor: cycle);
1242	start = ktime_add_ns(base, (n + `1`) cycle);
1243	return `0`;
1244	}
1245
1246	static void setup_first_end_time(struct taprio_sched *q,
1247	struct sched_gate_list *sched, ktime_t base)
1248	{
1249	struct net_device *dev = qdisc_dev(qdisc: q->root);
1250	int num_tc = netdev_get_num_tc(dev);
1251	struct sched_entry *first;
1252	ktime_t cycle;
1253	int tc;
1254
1255	first = list_first_entry(&sched->entries,
1256	struct sched_entry, list);
1257
1258	cycle = sched->cycle_time;
1259
1260	/ FIXME: find a better place to do this /
1261	sched->cycle_end_time = ktime_add_ns(base, cycle);
1262
1263	first->end_time = ktime_add_ns(base, first->interval);
1264	taprio_set_budgets(q, sched, entry: first);
1265
1266	for (tc = `0`; tc < num_tc; tc++) {
1267	if (first->gate_duration[tc] == sched->cycle_time)
1268	first->gate_close_time[tc] = KTIME_MAX;
1269	else
1270	first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
1271	}
1272
1273	rcu_assign_pointer(q->current_entry, NULL);
1274	}
1275
1276	static void taprio_start_sched(struct Qdisc *sch,
1277	ktime_t start, struct sched_gate_list *new)
1278	{
1279	struct taprio_sched *q = qdisc_priv(sch);
1280	ktime_t expires;
1281
1282	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
1283	return;
1284
1285	expires = hrtimer_get_expires(timer: &q->advance_timer);
1286	if (expires == `0`)
1287	expires = KTIME_MAX;
1288
1289	/ If the new schedule starts before the next expiration, we*
1290	* reprogram it to the earliest one, so we change the admin
1291	* schedule to the operational one at the right time.
1292	*/
1293	start = min_t(ktime_t, start, expires);
1294
1295	hrtimer_start(timer: &q->advance_timer, tim: start, mode: HRTIMER_MODE_ABS);
1296	}
1297
1298	static void taprio_set_picos_per_byte(struct net_device *dev,
1299	struct taprio_sched *q)
1300	{
1301	struct ethtool_link_ksettings ecmd;
1302	int speed = SPEED_10;
1303	int picos_per_byte;
1304	int err;
1305
1306	err = __ethtool_get_link_ksettings(dev, link_ksettings: &ecmd);
1307	if (err < `0`)
1308	goto skip;
1309
1310	if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
1311	speed = ecmd.base.speed;
1312
1313	skip:
1314	picos_per_byte = (USEC_PER_SEC * `8`) / speed;
1315
1316	atomic64_set(v: &q->picos_per_byte, i: picos_per_byte);
1317	netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
1318	dev->name, (long long)atomic64_read(&q->picos_per_byte),
1319	ecmd.base.speed);
1320	}
1321
1322	static int taprio_dev_notifier(struct notifier_block nb, unsigned* long event,
1323	void *ptr)
1324	{
1325	struct net_device *dev = netdev_notifier_info_to_dev(info: ptr);
1326	struct sched_gate_list oper, admin;
1327	struct qdisc_size_table *stab;
1328	struct taprio_sched *q;
1329
1330	ASSERT_RTNL();
1331
1332	if (event != NETDEV_UP && event != NETDEV_CHANGE)
1333	return NOTIFY_DONE;
1334
1335	list_for_each_entry(q, &taprio_list, taprio_list) {
1336	if (dev != qdisc_dev(qdisc: q->root))
1337	continue;
1338
1339	taprio_set_picos_per_byte(dev, q);
1340
1341	stab = rtnl_dereference(q->root->stab);
1342
1343	oper = rtnl_dereference(q->oper_sched);
1344	if (oper)
1345	taprio_update_queue_max_sdu(q, sched: oper, stab);
1346
1347	admin = rtnl_dereference(q->admin_sched);
1348	if (admin)
1349	taprio_update_queue_max_sdu(q, sched: admin, stab);
1350
1351	break;
1352	}
1353
1354	return NOTIFY_DONE;
1355	}
1356
1357	static void setup_txtime(struct taprio_sched *q,
1358	struct sched_gate_list *sched, ktime_t base)
1359	{
1360	struct sched_entry *entry;
1361	u64 interval = `0`;
1362
1363	list_for_each_entry(entry, &sched->entries, list) {
1364	entry->next_txtime = ktime_add_ns(base, interval);
1365	interval += entry->interval;
1366	}
1367	}
1368
1369	static struct tc_taprio_qopt_offload taprio_offload_alloc(int* num_entries)
1370	{
1371	struct __tc_taprio_qopt_offload *__offload;
1372
1373	__offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
1374	GFP_KERNEL);
1375	if (!__offload)
1376	return NULL;
1377
1378	refcount_set(r: &__offload->users, n: `1`);
1379
1380	return &__offload->offload;
1381	}
1382
1383	struct tc_taprio_qopt_offload taprio_offload_get(struct* tc_taprio_qopt_offload
1384	*offload)
1385	{
1386	struct __tc_taprio_qopt_offload *__offload;
1387
1388	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
1389	offload);
1390
1391	refcount_inc(r: &__offload->users);
1392
1393	return offload;
1394	}
1395	EXPORT_SYMBOL_GPL(taprio_offload_get);
1396
1397	void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
1398	{
1399	struct __tc_taprio_qopt_offload *__offload;
1400
1401	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
1402	offload);
1403
1404	if (!refcount_dec_and_test(r: &__offload->users))
1405	return;
1406
1407	kfree(objp: __offload);
1408	}
1409	EXPORT_SYMBOL_GPL(taprio_offload_free);
1410
1411	/ The function will only serve to keep the pointers to the "oper" and "admin"*
1412	* schedules valid in relation to their base times, so when calling dump() the
1413	* users looks at the right schedules.
1414	* When using full offload, the admin configuration is promoted to oper at the
1415	* base_time in the PHC time domain. But because the system time is not
1416	* necessarily in sync with that, we can't just trigger a hrtimer to call
1417	* switch_schedules at the right hardware time.
1418	* At the moment we call this by hand right away from taprio, but in the future
1419	* it will be useful to create a mechanism for drivers to notify taprio of the
1420	* offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
1421	* This is left as TODO.
1422	*/
1423	static void taprio_offload_config_changed(struct taprio_sched *q)
1424	{
1425	struct sched_gate_list oper, admin;
1426
1427	oper = rtnl_dereference(q->oper_sched);
1428	admin = rtnl_dereference(q->admin_sched);
1429
1430	switch_schedules(q, admin: &admin, oper: &oper);
1431	}
1432
1433	static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
1434	{
1435	u32 i, queue_mask = `0`;
1436
1437	for (i = `0`; i < dev->num_tc; i++) {
1438	u32 offset, count;
1439
1440	if (!(tc_mask & BIT(i)))
1441	continue;
1442
1443	offset = dev->tc_to_txq[i].offset;
1444	count = dev->tc_to_txq[i].count;
1445
1446	queue_mask \|= GENMASK(offset + count - `1`, offset);
1447	}
1448
1449	return queue_mask;
1450	}
1451
1452	static void taprio_sched_to_offload(struct net_device *dev,
1453	struct sched_gate_list *sched,
1454	struct tc_taprio_qopt_offload *offload,
1455	const struct tc_taprio_caps *caps)
1456	{
1457	struct sched_entry *entry;
1458	int i = `0`;
1459
1460	offload->base_time = sched->base_time;
1461	offload->cycle_time = sched->cycle_time;
1462	offload->cycle_time_extension = sched->cycle_time_extension;
1463
1464	list_for_each_entry(entry, &sched->entries, list) {
1465	struct tc_taprio_sched_entry *e = &offload->entries[i];
1466
1467	e->command = entry->command;
1468	e->interval = entry->interval;
1469	if (caps->gate_mask_per_txq)
1470	e->gate_mask = tc_map_to_queue_mask(dev,
1471	tc_mask: entry->gate_mask);
1472	else
1473	e->gate_mask = entry->gate_mask;
1474
1475	i++;
1476	}
1477
1478	offload->num_entries = i;
1479	}
1480
1481	static void taprio_detect_broken_mqprio(struct taprio_sched *q)
1482	{
1483	struct net_device *dev = qdisc_dev(qdisc: q->root);
1484	struct tc_taprio_caps caps;
1485
1486	qdisc_offload_query_caps(dev, type: TC_SETUP_QDISC_TAPRIO,
1487	caps: &caps, caps_len: sizeof(caps));
1488
1489	q->broken_mqprio = caps.broken_mqprio;
1490	if (q->broken_mqprio)
1491	static_branch_inc(&taprio_have_broken_mqprio);
1492	else
1493	static_branch_inc(&taprio_have_working_mqprio);
1494
1495	q->detected_mqprio = true;
1496	}
1497
1498	static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
1499	{
1500	if (!q->detected_mqprio)
1501	return;
1502
1503	if (q->broken_mqprio)
1504	static_branch_dec(&taprio_have_broken_mqprio);
1505	else
1506	static_branch_dec(&taprio_have_working_mqprio);
1507	}
1508
1509	static int taprio_enable_offload(struct net_device *dev,
1510	struct taprio_sched *q,
1511	struct sched_gate_list *sched,
1512	struct netlink_ext_ack *extack)
1513	{
1514	const struct net_device_ops *ops = dev->netdev_ops;
1515	struct tc_taprio_qopt_offload *offload;
1516	struct tc_taprio_caps caps;
1517	int tc, err = `0`;
1518
1519	if (!ops->ndo_setup_tc) {
1520	NL_SET_ERR_MSG(extack,
1521	"Device does not support taprio offload");
1522	return -EOPNOTSUPP;
1523	}
1524
1525	qdisc_offload_query_caps(dev, type: TC_SETUP_QDISC_TAPRIO,
1526	caps: &caps, caps_len: sizeof(caps));
1527
1528	if (!caps.supports_queue_max_sdu) {
1529	for (tc = `0`; tc < TC_MAX_QUEUE; tc++) {
1530	if (q->max_sdu[tc]) {
1531	NL_SET_ERR_MSG_MOD(extack,
1532	"Device does not handle queueMaxSDU");
1533	return -EOPNOTSUPP;
1534	}
1535	}
1536	}
1537
1538	offload = taprio_offload_alloc(num_entries: sched->num_entries);
1539	if (!offload) {
1540	NL_SET_ERR_MSG(extack,
1541	"Not enough memory for enabling offload mode");
1542	return -ENOMEM;
1543	}
1544	offload->cmd = TAPRIO_CMD_REPLACE;
1545	offload->extack = extack;
1546	mqprio_qopt_reconstruct(dev, qopt: &offload->mqprio.qopt);
1547	offload->mqprio.extack = extack;
1548	taprio_sched_to_offload(dev, sched, offload, caps: &caps);
1549	mqprio_fp_to_offload(fp: q->fp, mqprio: &offload->mqprio);
1550
1551	for (tc = `0`; tc < TC_MAX_QUEUE; tc++)
1552	offload->max_sdu[tc] = q->max_sdu[tc];
1553
1554	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
1555	if (err < `0`) {
1556	NL_SET_ERR_MSG_WEAK(extack,
1557	"Device failed to setup taprio offload");
1558	goto done;
1559	}
1560
1561	q->offloaded = true;
1562
1563	done:
1564	/ The offload structure may linger around via a reference taken by the*
1565	* device driver, so clear up the netlink extack pointer so that the
1566	* driver isn't tempted to dereference data which stopped being valid
1567	*/
1568	offload->extack = NULL;
1569	offload->mqprio.extack = NULL;
1570	taprio_offload_free(offload);
1571
1572	return err;
1573	}
1574
1575	static int taprio_disable_offload(struct net_device *dev,
1576	struct taprio_sched *q,
1577	struct netlink_ext_ack *extack)
1578	{
1579	const struct net_device_ops *ops = dev->netdev_ops;
1580	struct tc_taprio_qopt_offload *offload;
1581	int err;
1582
1583	if (!q->offloaded)
1584	return `0`;
1585
1586	offload = taprio_offload_alloc(num_entries: `0`);
1587	if (!offload) {
1588	NL_SET_ERR_MSG(extack,
1589	"Not enough memory to disable offload mode");
1590	return -ENOMEM;
1591	}
1592	offload->cmd = TAPRIO_CMD_DESTROY;
1593
1594	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
1595	if (err < `0`) {
1596	NL_SET_ERR_MSG(extack,
1597	"Device failed to disable offload");
1598	goto out;
1599	}
1600
1601	q->offloaded = false;
1602
1603	out:
1604	taprio_offload_free(offload);
1605
1606	return err;
1607	}
1608
1609	/ If full offload is enabled, the only possible clockid is the net device's*
1610	* PHC. For that reason, specifying a clockid through netlink is incorrect.
1611	* For txtime-assist, it is implicitly assumed that the device's PHC is kept
1612	* in sync with the specified clockid via a user space daemon such as phc2sys.
1613	* For both software taprio and txtime-assist, the clockid is used for the
1614	* hrtimer that advances the schedule and hence mandatory.
1615	*/
1616	static int taprio_parse_clockid(struct Qdisc sch, struct* nlattr **tb,
1617	struct netlink_ext_ack *extack)
1618	{
1619	struct taprio_sched *q = qdisc_priv(sch);
1620	struct net_device *dev = qdisc_dev(qdisc: sch);
1621	int err = -EINVAL;
1622
1623	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
1624	const struct ethtool_ops *ops = dev->ethtool_ops;
1625	struct ethtool_ts_info info = {
1626	.cmd = ETHTOOL_GET_TS_INFO,
1627	.phc_index = -`1`,
1628	};
1629
1630	if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
1631	NL_SET_ERR_MSG(extack,
1632	"The 'clockid' cannot be specified for full offload");
1633	goto out;
1634	}
1635
1636	if (ops && ops->get_ts_info)
1637	err = ops->get_ts_info(dev, &info);
1638
1639	if (err \|\| info.phc_index < `0`) {
1640	NL_SET_ERR_MSG(extack,
1641	"Device does not have a PTP clock");
1642	err = -ENOTSUPP;
1643	goto out;
1644	}
1645	} else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
1646	int clockid = nla_get_s32(nla: tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
1647	enum tk_offsets tk_offset;
1648
1649	/ We only support static clockids and we don't allow*
1650	* for it to be modified after the first init.
1651	*/
1652	if (clockid < `0` \|\|
1653	(q->clockid != -`1` && q->clockid != clockid)) {
1654	NL_SET_ERR_MSG(extack,
1655	"Changing the 'clockid' of a running schedule is not supported");
1656	err = -ENOTSUPP;
1657	goto out;
1658	}
1659
1660	switch (clockid) {
1661	case CLOCK_REALTIME:
1662	tk_offset = TK_OFFS_REAL;
1663	break;
1664	case CLOCK_MONOTONIC:
1665	tk_offset = TK_OFFS_MAX;
1666	break;
1667	case CLOCK_BOOTTIME:
1668	tk_offset = TK_OFFS_BOOT;
1669	break;
1670	case CLOCK_TAI:
1671	tk_offset = TK_OFFS_TAI;
1672	break;
1673	default:
1674	NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
1675	err = -EINVAL;
1676	goto out;
1677	}
1678	/ This pairs with READ_ONCE() in taprio_mono_to_any /
1679	WRITE_ONCE(q->tk_offset, tk_offset);
1680
1681	q->clockid = clockid;
1682	} else {
1683	NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
1684	goto out;
1685	}
1686
1687	/ Everything went ok, return success. /
1688	err = `0`;
1689
1690	out:
1691	return err;
1692	}
1693
1694	static int taprio_parse_tc_entry(struct Qdisc *sch,
1695	struct nlattr *opt,
1696	u32 max_sdu[TC_QOPT_MAX_QUEUE],
1697	u32 fp[TC_QOPT_MAX_QUEUE],
1698	unsigned long *seen_tcs,
1699	struct netlink_ext_ack *extack)
1700	{
1701	struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + `1`] = { };
1702	struct net_device *dev = qdisc_dev(qdisc: sch);
1703	int err, tc;
1704	u32 val;
1705
1706	err = nla_parse_nested(tb, maxtype: TCA_TAPRIO_TC_ENTRY_MAX, nla: opt,
1707	policy: taprio_tc_policy, extack);
1708	if (err < `0`)
1709	return err;
1710
1711	if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
1712	NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
1713	return -EINVAL;
1714	}
1715
1716	tc = nla_get_u32(nla: tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
1717	if (tc >= TC_QOPT_MAX_QUEUE) {
1718	NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
1719	return -ERANGE;
1720	}
1721
1722	if (*seen_tcs & BIT(tc)) {
1723	NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
1724	return -EINVAL;
1725	}
1726
1727	*seen_tcs \|= BIT(tc);
1728
1729	if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
1730	val = nla_get_u32(nla: tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
1731	if (val > dev->max_mtu) {
1732	NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
1733	return -ERANGE;
1734	}
1735
1736	max_sdu[tc] = val;
1737	}
1738
1739	if (tb[TCA_TAPRIO_TC_ENTRY_FP])
1740	fp[tc] = nla_get_u32(nla: tb[TCA_TAPRIO_TC_ENTRY_FP]);
1741
1742	return `0`;
1743	}
1744
1745	static int taprio_parse_tc_entries(struct Qdisc *sch,
1746	struct nlattr *opt,
1747	struct netlink_ext_ack *extack)
1748	{
1749	struct taprio_sched *q = qdisc_priv(sch);
1750	struct net_device *dev = qdisc_dev(qdisc: sch);
1751	u32 max_sdu[TC_QOPT_MAX_QUEUE];
1752	bool have_preemption = false;
1753	unsigned long seen_tcs = `0`;
1754	u32 fp[TC_QOPT_MAX_QUEUE];
1755	struct nlattr *n;
1756	int tc, rem;
1757	int err = `0`;
1758
1759	for (tc = `0`; tc < TC_QOPT_MAX_QUEUE; tc++) {
1760	max_sdu[tc] = q->max_sdu[tc];
1761	fp[tc] = q->fp[tc];
1762	}
1763
1764	nla_for_each_nested(n, opt, rem) {
1765	if (nla_type(nla: n) != TCA_TAPRIO_ATTR_TC_ENTRY)
1766	continue;
1767
1768	err = taprio_parse_tc_entry(sch, opt: n, max_sdu, fp, seen_tcs: &seen_tcs,
1769	extack);
1770	if (err)
1771	return err;
1772	}
1773
1774	for (tc = `0`; tc < TC_QOPT_MAX_QUEUE; tc++) {
1775	q->max_sdu[tc] = max_sdu[tc];
1776	q->fp[tc] = fp[tc];
1777	if (fp[tc] != TC_FP_EXPRESS)
1778	have_preemption = true;
1779	}
1780
1781	if (have_preemption) {
1782	if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
1783	NL_SET_ERR_MSG(extack,
1784	"Preemption only supported with full offload");
1785	return -EOPNOTSUPP;
1786	}
1787
1788	if (!ethtool_dev_mm_supported(dev)) {
1789	NL_SET_ERR_MSG(extack,
1790	"Device does not support preemption");
1791	return -EOPNOTSUPP;
1792	}
1793	}
1794
1795	return err;
1796	}
1797
1798	static int taprio_mqprio_cmp(const struct net_device *dev,
1799	const struct tc_mqprio_qopt *mqprio)
1800	{
1801	int i;
1802
1803	if (!mqprio \|\| mqprio->num_tc != dev->num_tc)
1804	return -`1`;
1805
1806	for (i = `0`; i < mqprio->num_tc; i++)
1807	if (dev->tc_to_txq[i].count != mqprio->count[i] \|\|
1808	dev->tc_to_txq[i].offset != mqprio->offset[i])
1809	return -`1`;
1810
1811	for (i = `0`; i <= TC_BITMASK; i++)
1812	if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
1813	return -`1`;
1814
1815	return `0`;
1816	}
1817
1818	/ The semantics of the 'flags' argument in relation to 'change()'*
1819	* requests, are interpreted following two rules (which are applied in
1820	* this order): (1) an omitted 'flags' argument is interpreted as
1821	* zero; (2) the 'flags' of a "running" taprio instance cannot be
1822	* changed.
1823	*/
1824	static int taprio_new_flags(const struct nlattr *attr, u32 old,
1825	struct netlink_ext_ack *extack)
1826	{
1827	u32 new = `0`;
1828
1829	if (attr)
1830	new = nla_get_u32(nla: attr);
1831
1832	if (old != TAPRIO_FLAGS_INVALID && old != new) {
1833	NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
1834	return -EOPNOTSUPP;
1835	}
1836
1837	if (!taprio_flags_valid(flags: new)) {
1838	NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
1839	return -EINVAL;
1840	}
1841
1842	return new;
1843	}
1844
1845	static int taprio_change(struct Qdisc sch, struct* nlattr *opt,
1846	struct netlink_ext_ack *extack)
1847	{
1848	struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
1849	struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + `1`] = { };
1850	struct sched_gate_list oper, admin, *new_admin;
1851	struct taprio_sched *q = qdisc_priv(sch);
1852	struct net_device *dev = qdisc_dev(qdisc: sch);
1853	struct tc_mqprio_qopt *mqprio = NULL;
1854	unsigned long flags;
1855	ktime_t start;
1856	int i, err;
1857
1858	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, nla: opt,
1859	policy: taprio_policy, extack);
1860	if (err < `0`)
1861	return err;
1862
1863	if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
1864	mqprio = nla_data(nla: tb[TCA_TAPRIO_ATTR_PRIOMAP]);
1865
1866	err = taprio_new_flags(attr: tb[TCA_TAPRIO_ATTR_FLAGS],
1867	old: q->flags, extack);
1868	if (err < `0`)
1869	return err;
1870
1871	q->flags = err;
1872
1873	err = taprio_parse_mqprio_opt(dev, qopt: mqprio, extack, taprio_flags: q->flags);
1874	if (err < `0`)
1875	return err;
1876
1877	err = taprio_parse_tc_entries(sch, opt, extack);
1878	if (err)
1879	return err;
1880
1881	new_admin = kzalloc(size: sizeof(*new_admin), GFP_KERNEL);
1882	if (!new_admin) {
1883	NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
1884	return -ENOMEM;
1885	}
1886	INIT_LIST_HEAD(list: &new_admin->entries);
1887
1888	oper = rtnl_dereference(q->oper_sched);
1889	admin = rtnl_dereference(q->admin_sched);
1890
1891	/ no changes - no new mqprio settings /
1892	if (!taprio_mqprio_cmp(dev, mqprio))
1893	mqprio = NULL;
1894
1895	if (mqprio && (oper \|\| admin)) {
1896	NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
1897	err = -ENOTSUPP;
1898	goto free_sched;
1899	}
1900
1901	if (mqprio) {
1902	err = netdev_set_num_tc(dev, num_tc: mqprio->num_tc);
1903	if (err)
1904	goto free_sched;
1905	for (i = `0`; i < mqprio->num_tc; i++) {
1906	netdev_set_tc_queue(dev, tc: i,
1907	count: mqprio->count[i],
1908	offset: mqprio->offset[i]);
1909	q->cur_txq[i] = mqprio->offset[i];
1910	}
1911
1912	/ Always use supplied priority mappings /
1913	for (i = `0`; i <= TC_BITMASK; i++)
1914	netdev_set_prio_tc_map(dev, prio: i,
1915	tc: mqprio->prio_tc_map[i]);
1916	}
1917
1918	err = parse_taprio_schedule(q, tb, new: new_admin, extack);
1919	if (err < `0`)
1920	goto free_sched;
1921
1922	if (new_admin->num_entries == `0`) {
1923	NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
1924	err = -EINVAL;
1925	goto free_sched;
1926	}
1927
1928	err = taprio_parse_clockid(sch, tb, extack);
1929	if (err < `0`)
1930	goto free_sched;
1931
1932	taprio_set_picos_per_byte(dev, q);
1933	taprio_update_queue_max_sdu(q, sched: new_admin, stab);
1934
1935	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
1936	err = taprio_enable_offload(dev, q, sched: new_admin, extack);
1937	else
1938	err = taprio_disable_offload(dev, q, extack);
1939	if (err)
1940	goto free_sched;
1941
1942	/ Protects against enqueue()/dequeue() /
1943	spin_lock_bh(lock: qdisc_lock(qdisc: sch));
1944
1945	if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
1946	if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
1947	NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
1948	err = -EINVAL;
1949	goto unlock;
1950	}
1951
1952	q->txtime_delay = nla_get_u32(nla: tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
1953	}
1954
1955	if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
1956	!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
1957	!hrtimer_active(timer: &q->advance_timer)) {
1958	hrtimer_init(timer: &q->advance_timer, which_clock: q->clockid, mode: HRTIMER_MODE_ABS);
1959	q->advance_timer.function = advance_sched;
1960	}
1961
1962	err = taprio_get_start_time(sch, sched: new_admin, start: &start);
1963	if (err < `0`) {
1964	NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
1965	goto unlock;
1966	}
1967
1968	setup_txtime(q, sched: new_admin, base: start);
1969
1970	if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
1971	if (!oper) {
1972	rcu_assign_pointer(q->oper_sched, new_admin);
1973	err = `0`;
1974	new_admin = NULL;
1975	goto unlock;
1976	}
1977
1978	rcu_assign_pointer(q->admin_sched, new_admin);
1979	if (admin)
1980	call_rcu(head: &admin->rcu, func: taprio_free_sched_cb);
1981	} else {
1982	setup_first_end_time(q, sched: new_admin, base: start);
1983
1984	/ Protects against advance_sched() /
1985	spin_lock_irqsave(&q->current_entry_lock, flags);
1986
1987	taprio_start_sched(sch, start, new: new_admin);
1988
1989	rcu_assign_pointer(q->admin_sched, new_admin);
1990	if (admin)
1991	call_rcu(head: &admin->rcu, func: taprio_free_sched_cb);
1992
1993	spin_unlock_irqrestore(lock: &q->current_entry_lock, flags);
1994
1995	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
1996	taprio_offload_config_changed(q);
1997	}
1998
1999	new_admin = NULL;
2000	err = `0`;
2001
2002	if (!stab)
2003	NL_SET_ERR_MSG_MOD(extack,
2004	"Size table not specified, frame length estimations may be inaccurate");
2005
2006	unlock:
2007	spin_unlock_bh(lock: qdisc_lock(qdisc: sch));
2008
2009	free_sched:
2010	if (new_admin)
2011	call_rcu(head: &new_admin->rcu, func: taprio_free_sched_cb);
2012
2013	return err;
2014	}
2015
2016	static void taprio_reset(struct Qdisc *sch)
2017	{
2018	struct taprio_sched *q = qdisc_priv(sch);
2019	struct net_device *dev = qdisc_dev(qdisc: sch);
2020	int i;
2021
2022	hrtimer_cancel(timer: &q->advance_timer);
2023
2024	if (q->qdiscs) {
2025	for (i = `0`; i < dev->num_tx_queues; i++)
2026	if (q->qdiscs[i])
2027	qdisc_reset(qdisc: q->qdiscs[i]);
2028	}
2029	}
2030
2031	static void taprio_destroy(struct Qdisc *sch)
2032	{
2033	struct taprio_sched *q = qdisc_priv(sch);
2034	struct net_device *dev = qdisc_dev(qdisc: sch);
2035	struct sched_gate_list oper, admin;
2036	unsigned int i;
2037
2038	list_del(entry: &q->taprio_list);
2039
2040	/ Note that taprio_reset() might not be called if an error*
2041	* happens in qdisc_create(), after taprio_init() has been called.
2042	*/
2043	hrtimer_cancel(timer: &q->advance_timer);
2044	qdisc_synchronize(q: sch);
2045
2046	taprio_disable_offload(dev, q, NULL);
2047
2048	if (q->qdiscs) {
2049	for (i = `0`; i < dev->num_tx_queues; i++)
2050	qdisc_put(qdisc: q->qdiscs[i]);
2051
2052	kfree(objp: q->qdiscs);
2053	}
2054	q->qdiscs = NULL;
2055
2056	netdev_reset_tc(dev);
2057
2058	oper = rtnl_dereference(q->oper_sched);
2059	admin = rtnl_dereference(q->admin_sched);
2060
2061	if (oper)
2062	call_rcu(head: &oper->rcu, func: taprio_free_sched_cb);
2063
2064	if (admin)
2065	call_rcu(head: &admin->rcu, func: taprio_free_sched_cb);
2066
2067	taprio_cleanup_broken_mqprio(q);
2068	}
2069
2070	static int taprio_init(struct Qdisc sch, struct* nlattr *opt,
2071	struct netlink_ext_ack *extack)
2072	{
2073	struct taprio_sched *q = qdisc_priv(sch);
2074	struct net_device *dev = qdisc_dev(qdisc: sch);
2075	int i, tc;
2076
2077	spin_lock_init(&q->current_entry_lock);
2078
2079	hrtimer_init(timer: &q->advance_timer, CLOCK_TAI, mode: HRTIMER_MODE_ABS);
2080	q->advance_timer.function = advance_sched;
2081
2082	q->root = sch;
2083
2084	/ We only support static clockids. Use an invalid value as default*
2085	* and get the valid one on taprio_change().
2086	*/
2087	q->clockid = -`1`;
2088	q->flags = TAPRIO_FLAGS_INVALID;
2089
2090	list_add(new: &q->taprio_list, head: &taprio_list);
2091
2092	if (sch->parent != TC_H_ROOT) {
2093	NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
2094	return -EOPNOTSUPP;
2095	}
2096
2097	if (!netif_is_multiqueue(dev)) {
2098	NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
2099	return -EOPNOTSUPP;
2100	}
2101
2102	q->qdiscs = kcalloc(n: dev->num_tx_queues, size: sizeof(q->qdiscs[`0`]),
2103	GFP_KERNEL);
2104	if (!q->qdiscs)
2105	return -ENOMEM;
2106
2107	if (!opt)
2108	return -EINVAL;
2109
2110	for (i = `0`; i < dev->num_tx_queues; i++) {
2111	struct netdev_queue *dev_queue;
2112	struct Qdisc *qdisc;
2113
2114	dev_queue = netdev_get_tx_queue(dev, index: i);
2115	qdisc = qdisc_create_dflt(dev_queue,
2116	ops: &pfifo_qdisc_ops,
2117	TC_H_MAKE(TC_H_MAJ(sch->handle),
2118	TC_H_MIN(i + `1`)),
2119	extack);
2120	if (!qdisc)
2121	return -ENOMEM;
2122
2123	if (i < dev->real_num_tx_queues)
2124	qdisc_hash_add(q: qdisc, invisible: false);
2125
2126	q->qdiscs[i] = qdisc;
2127	}
2128
2129	for (tc = `0`; tc < TC_QOPT_MAX_QUEUE; tc++)
2130	q->fp[tc] = TC_FP_EXPRESS;
2131
2132	taprio_detect_broken_mqprio(q);
2133
2134	return taprio_change(sch, opt, extack);
2135	}
2136
2137	static void taprio_attach(struct Qdisc *sch)
2138	{
2139	struct taprio_sched *q = qdisc_priv(sch);
2140	struct net_device *dev = qdisc_dev(qdisc: sch);
2141	unsigned int ntx;
2142
2143	/ Attach underlying qdisc /
2144	for (ntx = `0`; ntx < dev->num_tx_queues; ntx++) {
2145	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, index: ntx);
2146	struct Qdisc old, dev_queue_qdisc;
2147
2148	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
2149	struct Qdisc *qdisc = q->qdiscs[ntx];
2150
2151	/ In offload mode, the root taprio qdisc is bypassed*
2152	* and the netdev TX queues see the children directly
2153	*/
2154	qdisc->flags \|= TCQ_F_ONETXQUEUE \| TCQ_F_NOPARENT;
2155	dev_queue_qdisc = qdisc;
2156	} else {
2157	/ In software mode, attach the root taprio qdisc*
2158	* to all netdev TX queues, so that dev_qdisc_enqueue()
2159	* goes through taprio_enqueue().
2160	*/
2161	dev_queue_qdisc = sch;
2162	}
2163	old = dev_graft_qdisc(dev_queue, qdisc: dev_queue_qdisc);
2164	/ The qdisc's refcount requires to be elevated once*
2165	* for each netdev TX queue it is grafted onto
2166	*/
2167	qdisc_refcount_inc(qdisc: dev_queue_qdisc);
2168	if (old)
2169	qdisc_put(qdisc: old);
2170	}
2171	}
2172
2173	static struct netdev_queue taprio_queue_get(struct* Qdisc *sch,
2174	unsigned long cl)
2175	{
2176	struct net_device *dev = qdisc_dev(qdisc: sch);
2177	unsigned long ntx = cl - `1`;
2178
2179	if (ntx >= dev->num_tx_queues)
2180	return NULL;
2181
2182	return netdev_get_tx_queue(dev, index: ntx);
2183	}
2184
2185	static int taprio_graft(struct Qdisc sch, unsigned* long cl,
2186	struct Qdisc new, struct* Qdisc **old,
2187	struct netlink_ext_ack *extack)
2188	{
2189	struct taprio_sched *q = qdisc_priv(sch);
2190	struct net_device *dev = qdisc_dev(qdisc: sch);
2191	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
2192
2193	if (!dev_queue)
2194	return -EINVAL;
2195
2196	if (dev->flags & IFF_UP)
2197	dev_deactivate(dev);
2198
2199	/ In offload mode, the child Qdisc is directly attached to the netdev*
2200	* TX queue, and thus, we need to keep its refcount elevated in order
2201	* to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
2202	* However, save the reference to the new qdisc in the private array in
2203	* both software and offload cases, to have an up-to-date reference to
2204	* our children.
2205	*/
2206	*old = q->qdiscs[cl - `1`];
2207	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
2208	WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
2209	if (new)
2210	qdisc_refcount_inc(qdisc: new);
2211	if (*old)
2212	qdisc_put(qdisc: *old);
2213	}
2214
2215	q->qdiscs[cl - `1`] = new;
2216	if (new)
2217	new->flags \|= TCQ_F_ONETXQUEUE \| TCQ_F_NOPARENT;
2218
2219	if (dev->flags & IFF_UP)
2220	dev_activate(dev);
2221
2222	return `0`;
2223	}
2224
2225	static int dump_entry(struct sk_buff *msg,
2226	const struct sched_entry *entry)
2227	{
2228	struct nlattr *item;
2229
2230	item = nla_nest_start_noflag(skb: msg, attrtype: TCA_TAPRIO_SCHED_ENTRY);
2231	if (!item)
2232	return -ENOSPC;
2233
2234	if (nla_put_u32(skb: msg, attrtype: TCA_TAPRIO_SCHED_ENTRY_INDEX, value: entry->index))
2235	goto nla_put_failure;
2236
2237	if (nla_put_u8(skb: msg, attrtype: TCA_TAPRIO_SCHED_ENTRY_CMD, value: entry->command))
2238	goto nla_put_failure;
2239
2240	if (nla_put_u32(skb: msg, attrtype: TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
2241	value: entry->gate_mask))
2242	goto nla_put_failure;
2243
2244	if (nla_put_u32(skb: msg, attrtype: TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
2245	value: entry->interval))
2246	goto nla_put_failure;
2247
2248	return nla_nest_end(skb: msg, start: item);
2249
2250	nla_put_failure:
2251	nla_nest_cancel(skb: msg, start: item);
2252	return -`1`;
2253	}
2254
2255	static int dump_schedule(struct sk_buff *msg,
2256	const struct sched_gate_list *root)
2257	{
2258	struct nlattr *entry_list;
2259	struct sched_entry *entry;
2260
2261	if (nla_put_s64(skb: msg, attrtype: TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
2262	value: root->base_time, padattr: TCA_TAPRIO_PAD))
2263	return -`1`;
2264
2265	if (nla_put_s64(skb: msg, attrtype: TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
2266	value: root->cycle_time, padattr: TCA_TAPRIO_PAD))
2267	return -`1`;
2268
2269	if (nla_put_s64(skb: msg, attrtype: TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
2270	value: root->cycle_time_extension, padattr: TCA_TAPRIO_PAD))
2271	return -`1`;
2272
2273	entry_list = nla_nest_start_noflag(skb: msg,
2274	attrtype: TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
2275	if (!entry_list)
2276	goto error_nest;
2277
2278	list_for_each_entry(entry, &root->entries, list) {
2279	if (dump_entry(msg, entry) < `0`)
2280	goto error_nest;
2281	}
2282
2283	nla_nest_end(skb: msg, start: entry_list);
2284	return `0`;
2285
2286	error_nest:
2287	nla_nest_cancel(skb: msg, start: entry_list);
2288	return -`1`;
2289	}
2290
2291	static int taprio_dump_tc_entries(struct sk_buff *skb,
2292	struct taprio_sched *q,
2293	struct sched_gate_list *sched)
2294	{
2295	struct nlattr *n;
2296	int tc;
2297
2298	for (tc = `0`; tc < TC_MAX_QUEUE; tc++) {
2299	n = nla_nest_start(skb, attrtype: TCA_TAPRIO_ATTR_TC_ENTRY);
2300	if (!n)
2301	return -EMSGSIZE;
2302
2303	if (nla_put_u32(skb, attrtype: TCA_TAPRIO_TC_ENTRY_INDEX, value: tc))
2304	goto nla_put_failure;
2305
2306	if (nla_put_u32(skb, attrtype: TCA_TAPRIO_TC_ENTRY_MAX_SDU,
2307	value: sched->max_sdu[tc]))
2308	goto nla_put_failure;
2309
2310	if (nla_put_u32(skb, attrtype: TCA_TAPRIO_TC_ENTRY_FP, value: q->fp[tc]))
2311	goto nla_put_failure;
2312
2313	nla_nest_end(skb, start: n);
2314	}
2315
2316	return `0`;
2317
2318	nla_put_failure:
2319	nla_nest_cancel(skb, start: n);
2320	return -EMSGSIZE;
2321	}
2322
2323	static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
2324	{
2325	if (val == TAPRIO_STAT_NOT_SET)
2326	return `0`;
2327	if (nla_put_u64_64bit(skb, attrtype, value: val, padattr: TCA_TAPRIO_OFFLOAD_STATS_PAD))
2328	return -EMSGSIZE;
2329	return `0`;
2330	}
2331
2332	static int taprio_dump_xstats(struct Qdisc sch, struct* gnet_dump *d,
2333	struct tc_taprio_qopt_offload *offload,
2334	struct tc_taprio_qopt_stats *stats)
2335	{
2336	struct net_device *dev = qdisc_dev(qdisc: sch);
2337	const struct net_device_ops *ops;
2338	struct sk_buff *skb = d->skb;
2339	struct nlattr *xstats;
2340	int err;
2341
2342	ops = qdisc_dev(qdisc: sch)->netdev_ops;
2343
2344	/ FIXME I could use qdisc_offload_dump_helper(), but that messes*
2345	* with sch->flags depending on whether the device reports taprio
2346	* stats, and I'm not sure whether that's a good idea, considering
2347	* that stats are optional to the offload itself
2348	*/
2349	if (!ops->ndo_setup_tc)
2350	return `0`;
2351
2352	memset(stats, `0xff`, sizeof(*stats));
2353
2354	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
2355	if (err == -EOPNOTSUPP)
2356	return `0`;
2357	if (err)
2358	return err;
2359
2360	xstats = nla_nest_start(skb, attrtype: TCA_STATS_APP);
2361	if (!xstats)
2362	goto err;
2363
2364	if (taprio_put_stat(skb, val: stats->window_drops,
2365	attrtype: TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) \|\|
2366	taprio_put_stat(skb, val: stats->tx_overruns,
2367	attrtype: TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
2368	goto err_cancel;
2369
2370	nla_nest_end(skb, start: xstats);
2371
2372	return `0`;
2373
2374	err_cancel:
2375	nla_nest_cancel(skb, start: xstats);
2376	err:
2377	return -EMSGSIZE;
2378	}
2379
2380	static int taprio_dump_stats(struct Qdisc sch, struct* gnet_dump *d)
2381	{
2382	struct tc_taprio_qopt_offload offload = {
2383	.cmd = TAPRIO_CMD_STATS,
2384	};
2385
2386	return taprio_dump_xstats(sch, d, offload: &offload, stats: &offload.stats);
2387	}
2388
2389	static int taprio_dump(struct Qdisc sch, struct* sk_buff *skb)
2390	{
2391	struct taprio_sched *q = qdisc_priv(sch);
2392	struct net_device *dev = qdisc_dev(qdisc: sch);
2393	struct sched_gate_list oper, admin;
2394	struct tc_mqprio_qopt opt = { `0` };
2395	struct nlattr nest, sched_nest;
2396
2397	oper = rtnl_dereference(q->oper_sched);
2398	admin = rtnl_dereference(q->admin_sched);
2399
2400	mqprio_qopt_reconstruct(dev, qopt: &opt);
2401
2402	nest = nla_nest_start_noflag(skb, attrtype: TCA_OPTIONS);
2403	if (!nest)
2404	goto start_error;
2405
2406	if (nla_put(skb, attrtype: TCA_TAPRIO_ATTR_PRIOMAP, attrlen: sizeof(opt), data: &opt))
2407	goto options_error;
2408
2409	if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
2410	nla_put_s32(skb, attrtype: TCA_TAPRIO_ATTR_SCHED_CLOCKID, value: q->clockid))
2411	goto options_error;
2412
2413	if (q->flags && nla_put_u32(skb, attrtype: TCA_TAPRIO_ATTR_FLAGS, value: q->flags))
2414	goto options_error;
2415
2416	if (q->txtime_delay &&
2417	nla_put_u32(skb, attrtype: TCA_TAPRIO_ATTR_TXTIME_DELAY, value: q->txtime_delay))
2418	goto options_error;
2419
2420	if (oper && taprio_dump_tc_entries(skb, q, sched: oper))
2421	goto options_error;
2422
2423	if (oper && dump_schedule(msg: skb, root: oper))
2424	goto options_error;
2425
2426	if (!admin)
2427	goto done;
2428
2429	sched_nest = nla_nest_start_noflag(skb, attrtype: TCA_TAPRIO_ATTR_ADMIN_SCHED);
2430	if (!sched_nest)
2431	goto options_error;
2432
2433	if (dump_schedule(msg: skb, root: admin))
2434	goto admin_error;
2435
2436	nla_nest_end(skb, start: sched_nest);
2437
2438	done:
2439	return nla_nest_end(skb, start: nest);
2440
2441	admin_error:
2442	nla_nest_cancel(skb, start: sched_nest);
2443
2444	options_error:
2445	nla_nest_cancel(skb, start: nest);
2446
2447	start_error:
2448	return -ENOSPC;
2449	}
2450
2451	static struct Qdisc taprio_leaf(struct* Qdisc sch, unsigned* long cl)
2452	{
2453	struct taprio_sched *q = qdisc_priv(sch);
2454	struct net_device *dev = qdisc_dev(qdisc: sch);
2455	unsigned int ntx = cl - `1`;
2456
2457	if (ntx >= dev->num_tx_queues)
2458	return NULL;
2459
2460	return q->qdiscs[ntx];
2461	}
2462
2463	static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
2464	{
2465	unsigned int ntx = TC_H_MIN(classid);
2466
2467	if (!taprio_queue_get(sch, cl: ntx))
2468	return `0`;
2469	return ntx;
2470	}
2471
2472	static int taprio_dump_class(struct Qdisc sch, unsigned* long cl,
2473	struct sk_buff skb, struct* tcmsg *tcm)
2474	{
2475	struct Qdisc *child = taprio_leaf(sch, cl);
2476
2477	tcm->tcm_parent = TC_H_ROOT;
2478	tcm->tcm_handle \|= TC_H_MIN(cl);
2479	tcm->tcm_info = child->handle;
2480
2481	return `0`;
2482	}
2483
2484	static int taprio_dump_class_stats(struct Qdisc sch, unsigned* long cl,
2485	struct gnet_dump *d)
2486	__releases(d->lock)
2487	__acquires(d->lock)
2488	{
2489	struct Qdisc *child = taprio_leaf(sch, cl);
2490	struct tc_taprio_qopt_offload offload = {
2491	.cmd = TAPRIO_CMD_QUEUE_STATS,
2492	.queue_stats = {
2493	.queue = cl - `1`,
2494	},
2495	};
2496
2497	if (gnet_stats_copy_basic(d, NULL, b: &child->bstats, running: true) < `0` \|\|
2498	qdisc_qstats_copy(d, sch: child) < `0`)
2499	return -`1`;
2500
2501	return taprio_dump_xstats(sch, d, offload: &offload, stats: &offload.queue_stats.stats);
2502	}
2503
2504	static void taprio_walk(struct Qdisc sch, struct* qdisc_walker *arg)
2505	{
2506	struct net_device *dev = qdisc_dev(qdisc: sch);
2507	unsigned long ntx;
2508
2509	if (arg->stop)
2510	return;
2511
2512	arg->count = arg->skip;
2513	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
2514	if (!tc_qdisc_stats_dump(sch, cl: ntx + `1`, arg))
2515	break;
2516	}
2517	}
2518
2519	static struct netdev_queue taprio_select_queue(struct* Qdisc *sch,
2520	struct tcmsg *tcm)
2521	{
2522	return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
2523	}
2524
2525	static const struct Qdisc_class_ops taprio_class_ops = {
2526	.graft = taprio_graft,
2527	.leaf = taprio_leaf,
2528	.find = taprio_find,
2529	.walk = taprio_walk,
2530	.dump = taprio_dump_class,
2531	.dump_stats = taprio_dump_class_stats,
2532	.select_queue = taprio_select_queue,
2533	};
2534
2535	static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
2536	.cl_ops = &taprio_class_ops,
2537	.id = "taprio",
2538	.priv_size = sizeof(struct taprio_sched),
2539	.init = taprio_init,
2540	.change = taprio_change,
2541	.destroy = taprio_destroy,
2542	.reset = taprio_reset,
2543	.attach = taprio_attach,
2544	.peek = taprio_peek,
2545	.dequeue = taprio_dequeue,
2546	.enqueue = taprio_enqueue,
2547	.dump = taprio_dump,
2548	.dump_stats = taprio_dump_stats,
2549	.owner = THIS_MODULE,
2550	};
2551
2552	static struct notifier_block taprio_device_notifier = {
2553	.notifier_call = taprio_dev_notifier,
2554	};
2555
2556	static int __init taprio_module_init(void)
2557	{
2558	int err = register_netdevice_notifier(nb: &taprio_device_notifier);
2559
2560	if (err)
2561	return err;
2562
2563	return register_qdisc(qops: &taprio_qdisc_ops);
2564	}
2565
2566	static void __exit taprio_module_exit(void)
2567	{
2568	unregister_qdisc(qops: &taprio_qdisc_ops);
2569	unregister_netdevice_notifier(nb: &taprio_device_notifier);
2570	}
2571
2572	module_init(taprio_module_init);
2573	module_exit(taprio_module_exit);
2574	MODULE_LICENSE("GPL");
2575

source code of linux/net/sched/sch_taprio.c