1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _LINUX_NET_QUEUES_H |
3 | #define _LINUX_NET_QUEUES_H |
4 | |
5 | #include <linux/netdevice.h> |
6 | |
7 | /* See the netdev.yaml spec for definition of each statistic */ |
8 | struct netdev_queue_stats_rx { |
9 | u64 bytes; |
10 | u64 packets; |
11 | u64 alloc_fail; |
12 | }; |
13 | |
14 | struct netdev_queue_stats_tx { |
15 | u64 bytes; |
16 | u64 packets; |
17 | }; |
18 | |
19 | /** |
20 | * struct netdev_stat_ops - netdev ops for fine grained stats |
21 | * @get_queue_stats_rx: get stats for a given Rx queue |
22 | * @get_queue_stats_tx: get stats for a given Tx queue |
23 | * @get_base_stats: get base stats (not belonging to any live instance) |
24 | * |
25 | * Query stats for a given object. The values of the statistics are undefined |
26 | * on entry (specifically they are *not* zero-initialized). Drivers should |
27 | * assign values only to the statistics they collect. Statistics which are not |
28 | * collected must be left undefined. |
29 | * |
30 | * Queue objects are not necessarily persistent, and only currently active |
31 | * queues are queried by the per-queue callbacks. This means that per-queue |
32 | * statistics will not generally add up to the total number of events for |
33 | * the device. The @get_base_stats callback allows filling in the delta |
34 | * between events for currently live queues and overall device history. |
35 | * When the statistics for the entire device are queried, first @get_base_stats |
36 | * is issued to collect the delta, and then a series of per-queue callbacks. |
37 | * Only statistics which are set in @get_base_stats will be reported |
38 | * at the device level, meaning that unlike in queue callbacks, setting |
39 | * a statistic to zero in @get_base_stats is a legitimate thing to do. |
40 | * This is because @get_base_stats has a second function of designating which |
41 | * statistics are in fact correct for the entire device (e.g. when history |
42 | * for some of the events is not maintained, and reliable "total" cannot |
43 | * be provided). |
44 | * |
45 | * Device drivers can assume that when collecting total device stats, |
46 | * the @get_base_stats and subsequent per-queue calls are performed |
47 | * "atomically" (without releasing the rtnl_lock). |
48 | * |
49 | * Device drivers are encouraged to reset the per-queue statistics when |
50 | * number of queues change. This is because the primary use case for |
51 | * per-queue statistics is currently to detect traffic imbalance. |
52 | */ |
53 | struct netdev_stat_ops { |
54 | void (*get_queue_stats_rx)(struct net_device *dev, int idx, |
55 | struct netdev_queue_stats_rx *stats); |
56 | void (*get_queue_stats_tx)(struct net_device *dev, int idx, |
57 | struct netdev_queue_stats_tx *stats); |
58 | void (*get_base_stats)(struct net_device *dev, |
59 | struct netdev_queue_stats_rx *rx, |
60 | struct netdev_queue_stats_tx *tx); |
61 | }; |
62 | |
63 | /** |
64 | * DOC: Lockless queue stopping / waking helpers. |
65 | * |
66 | * The netif_txq_maybe_stop() and __netif_txq_completed_wake() |
67 | * macros are designed to safely implement stopping |
68 | * and waking netdev queues without full lock protection. |
69 | * |
70 | * We assume that there can be no concurrent stop attempts and no concurrent |
71 | * wake attempts. The try-stop should happen from the xmit handler, |
72 | * while wake up should be triggered from NAPI poll context. |
73 | * The two may run concurrently (single producer, single consumer). |
74 | * |
75 | * The try-stop side is expected to run from the xmit handler and therefore |
76 | * it does not reschedule Tx (netif_tx_start_queue() instead of |
77 | * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit |
78 | * handler may lead to xmit queue being enabled but not run. |
79 | * The waking side does not have similar context restrictions. |
80 | * |
81 | * The macros guarantee that rings will not remain stopped if there's |
82 | * space available, but they do *not* prevent false wake ups when |
83 | * the ring is full! Drivers should check for ring full at the start |
84 | * for the xmit handler. |
85 | * |
86 | * All descriptor ring indexes (and other relevant shared state) must |
87 | * be updated before invoking the macros. |
88 | */ |
89 | |
90 | #define netif_txq_try_stop(txq, get_desc, start_thrs) \ |
91 | ({ \ |
92 | int _res; \ |
93 | \ |
94 | netif_tx_stop_queue(txq); \ |
95 | /* Producer index and stop bit must be visible \ |
96 | * to consumer before we recheck. \ |
97 | * Pairs with a barrier in __netif_txq_completed_wake(). \ |
98 | */ \ |
99 | smp_mb__after_atomic(); \ |
100 | \ |
101 | /* We need to check again in a case another \ |
102 | * CPU has just made room available. \ |
103 | */ \ |
104 | _res = 0; \ |
105 | if (unlikely(get_desc >= start_thrs)) { \ |
106 | netif_tx_start_queue(txq); \ |
107 | _res = -1; \ |
108 | } \ |
109 | _res; \ |
110 | }) \ |
111 | |
112 | /** |
113 | * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed |
114 | * @txq: struct netdev_queue to stop/start |
115 | * @get_desc: get current number of free descriptors (see requirements below!) |
116 | * @stop_thrs: minimal number of available descriptors for queue to be left |
117 | * enabled |
118 | * @start_thrs: minimal number of descriptors to re-enable the queue, can be |
119 | * equal to @stop_thrs or higher to avoid frequent waking |
120 | * |
121 | * All arguments may be evaluated multiple times, beware of side effects. |
122 | * @get_desc must be a formula or a function call, it must always |
123 | * return up-to-date information when evaluated! |
124 | * Expected to be used from ndo_start_xmit, see the comment on top of the file. |
125 | * |
126 | * Returns: |
127 | * 0 if the queue was stopped |
128 | * 1 if the queue was left enabled |
129 | * -1 if the queue was re-enabled (raced with waking) |
130 | */ |
131 | #define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \ |
132 | ({ \ |
133 | int _res; \ |
134 | \ |
135 | _res = 1; \ |
136 | if (unlikely(get_desc < stop_thrs)) \ |
137 | _res = netif_txq_try_stop(txq, get_desc, start_thrs); \ |
138 | _res; \ |
139 | }) \ |
140 | |
141 | /* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if |
142 | * @bytes != 0, regardless of kernel config. |
143 | */ |
144 | static inline void |
145 | netdev_txq_completed_mb(struct netdev_queue *dev_queue, |
146 | unsigned int pkts, unsigned int bytes) |
147 | { |
148 | if (IS_ENABLED(CONFIG_BQL)) |
149 | netdev_tx_completed_queue(dev_queue, pkts, bytes); |
150 | else if (bytes) |
151 | smp_mb(); |
152 | } |
153 | |
154 | /** |
155 | * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed |
156 | * @txq: struct netdev_queue to stop/start |
157 | * @pkts: number of packets completed |
158 | * @bytes: number of bytes completed |
159 | * @get_desc: get current number of free descriptors (see requirements below!) |
160 | * @start_thrs: minimal number of descriptors to re-enable the queue |
161 | * @down_cond: down condition, predicate indicating that the queue should |
162 | * not be woken up even if descriptors are available |
163 | * |
164 | * All arguments may be evaluated multiple times. |
165 | * @get_desc must be a formula or a function call, it must always |
166 | * return up-to-date information when evaluated! |
167 | * Reports completed pkts/bytes to BQL. |
168 | * |
169 | * Returns: |
170 | * 0 if the queue was woken up |
171 | * 1 if the queue was already enabled (or disabled but @down_cond is true) |
172 | * -1 if the queue was left unchanged (@start_thrs not reached) |
173 | */ |
174 | #define __netif_txq_completed_wake(txq, pkts, bytes, \ |
175 | get_desc, start_thrs, down_cond) \ |
176 | ({ \ |
177 | int _res; \ |
178 | \ |
179 | /* Report to BQL and piggy back on its barrier. \ |
180 | * Barrier makes sure that anybody stopping the queue \ |
181 | * after this point sees the new consumer index. \ |
182 | * Pairs with barrier in netif_txq_try_stop(). \ |
183 | */ \ |
184 | netdev_txq_completed_mb(txq, pkts, bytes); \ |
185 | \ |
186 | _res = -1; \ |
187 | if (pkts && likely(get_desc >= start_thrs)) { \ |
188 | _res = 1; \ |
189 | if (unlikely(netif_tx_queue_stopped(txq)) && \ |
190 | !(down_cond)) { \ |
191 | netif_tx_wake_queue(txq); \ |
192 | _res = 0; \ |
193 | } \ |
194 | } \ |
195 | _res; \ |
196 | }) |
197 | |
198 | #define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \ |
199 | __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false) |
200 | |
201 | /* subqueue variants follow */ |
202 | |
203 | #define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \ |
204 | ({ \ |
205 | struct netdev_queue *txq; \ |
206 | \ |
207 | txq = netdev_get_tx_queue(dev, idx); \ |
208 | netif_txq_try_stop(txq, get_desc, start_thrs); \ |
209 | }) |
210 | |
211 | #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ |
212 | ({ \ |
213 | struct netdev_queue *txq; \ |
214 | \ |
215 | txq = netdev_get_tx_queue(dev, idx); \ |
216 | netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \ |
217 | }) |
218 | |
219 | #define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \ |
220 | get_desc, start_thrs) \ |
221 | ({ \ |
222 | struct netdev_queue *txq; \ |
223 | \ |
224 | txq = netdev_get_tx_queue(dev, idx); \ |
225 | netif_txq_completed_wake(txq, pkts, bytes, \ |
226 | get_desc, start_thrs); \ |
227 | }) |
228 | |
229 | #endif |
230 | |