1 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) |
2 | /* |
3 | * Copyright(c) 2020 Intel Corporation. |
4 | * |
5 | */ |
6 | |
7 | /* |
8 | * This file contains HFI1 support for IPOIB SDMA functionality |
9 | */ |
10 | |
11 | #include <linux/log2.h> |
12 | #include <linux/circ_buf.h> |
13 | |
14 | #include "sdma.h" |
15 | #include "verbs.h" |
16 | #include "trace_ibhdrs.h" |
17 | #include "ipoib.h" |
18 | #include "trace_tx.h" |
19 | |
20 | /* Add a convenience helper */ |
21 | #define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) |
22 | #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) |
23 | #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) |
24 | |
25 | struct ipoib_txparms { |
26 | struct hfi1_devdata *dd; |
27 | struct rdma_ah_attr *ah_attr; |
28 | struct hfi1_ibport *ibp; |
29 | struct hfi1_ipoib_txq *txq; |
30 | union hfi1_ipoib_flow flow; |
31 | u32 dqpn; |
32 | u8 hdr_dwords; |
33 | u8 entropy; |
34 | }; |
35 | |
36 | static struct ipoib_txreq * |
37 | hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx) |
38 | { |
39 | return (struct ipoib_txreq *)(r->items + (idx << r->shift)); |
40 | } |
41 | |
42 | static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) |
43 | { |
44 | return sent - completed; |
45 | } |
46 | |
47 | static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) |
48 | { |
49 | return hfi1_ipoib_txreqs(sent: txq->tx_ring.sent_txreqs, |
50 | completed: txq->tx_ring.complete_txreqs); |
51 | } |
52 | |
53 | static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) |
54 | { |
55 | trace_hfi1_txq_stop(txq); |
56 | if (atomic_inc_return(v: &txq->tx_ring.stops) == 1) |
57 | netif_stop_subqueue(dev: txq->priv->netdev, queue_index: txq->q_idx); |
58 | } |
59 | |
60 | static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) |
61 | { |
62 | trace_hfi1_txq_wake(txq); |
63 | if (atomic_dec_and_test(v: &txq->tx_ring.stops)) |
64 | netif_wake_subqueue(dev: txq->priv->netdev, queue_index: txq->q_idx); |
65 | } |
66 | |
67 | static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq) |
68 | { |
69 | return min_t(uint, txq->priv->netdev->tx_queue_len, |
70 | txq->tx_ring.max_items - 1); |
71 | } |
72 | |
73 | static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq) |
74 | { |
75 | return min_t(uint, txq->priv->netdev->tx_queue_len, |
76 | txq->tx_ring.max_items) >> 1; |
77 | } |
78 | |
79 | static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) |
80 | { |
81 | ++txq->tx_ring.sent_txreqs; |
82 | if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && |
83 | !atomic_xchg(v: &txq->tx_ring.ring_full, new: 1)) { |
84 | trace_hfi1_txq_full(txq); |
85 | hfi1_ipoib_stop_txq(txq); |
86 | } |
87 | } |
88 | |
89 | static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) |
90 | { |
91 | struct net_device *dev = txq->priv->netdev; |
92 | |
93 | /* If shutting down just return as queue state is irrelevant */ |
94 | if (unlikely(dev->reg_state != NETREG_REGISTERED)) |
95 | return; |
96 | |
97 | /* |
98 | * When the queue has been drained to less than half full it will be |
99 | * restarted. |
100 | * The size of the txreq ring is fixed at initialization. |
101 | * The tx queue len can be adjusted upward while the interface is |
102 | * running. |
103 | * The tx queue len can be large enough to overflow the txreq_ring. |
104 | * Use the minimum of the current tx_queue_len or the rings max txreqs |
105 | * to protect against ring overflow. |
106 | */ |
107 | if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && |
108 | atomic_xchg(v: &txq->tx_ring.ring_full, new: 0)) { |
109 | trace_hfi1_txq_xmit_unstopped(txq); |
110 | hfi1_ipoib_wake_txq(txq); |
111 | } |
112 | } |
113 | |
114 | static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) |
115 | { |
116 | struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; |
117 | |
118 | if (likely(!tx->sdma_status)) { |
119 | dev_sw_netstats_tx_add(dev: priv->netdev, packets: 1, len: tx->skb->len); |
120 | } else { |
121 | ++priv->netdev->stats.tx_errors; |
122 | dd_dev_warn(priv->dd, |
123 | "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n" , |
124 | __func__, tx->sdma_status, |
125 | le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx, |
126 | tx->txq->sde->this_idx); |
127 | } |
128 | |
129 | napi_consume_skb(skb: tx->skb, budget); |
130 | tx->skb = NULL; |
131 | sdma_txclean(dd: priv->dd, tx: &tx->txreq); |
132 | } |
133 | |
134 | static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) |
135 | { |
136 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
137 | int i; |
138 | struct ipoib_txreq *tx; |
139 | |
140 | for (i = 0; i < tx_ring->max_items; i++) { |
141 | tx = hfi1_txreq_from_idx(r: tx_ring, idx: i); |
142 | tx->complete = 0; |
143 | dev_kfree_skb_any(skb: tx->skb); |
144 | tx->skb = NULL; |
145 | sdma_txclean(dd: txq->priv->dd, tx: &tx->txreq); |
146 | } |
147 | tx_ring->head = 0; |
148 | tx_ring->tail = 0; |
149 | tx_ring->complete_txreqs = 0; |
150 | tx_ring->sent_txreqs = 0; |
151 | tx_ring->avail = hfi1_ipoib_ring_hwat(txq); |
152 | } |
153 | |
154 | static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) |
155 | { |
156 | struct hfi1_ipoib_txq *txq = |
157 | container_of(napi, struct hfi1_ipoib_txq, napi); |
158 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
159 | u32 head = tx_ring->head; |
160 | u32 max_tx = tx_ring->max_items; |
161 | int work_done; |
162 | struct ipoib_txreq *tx = hfi1_txreq_from_idx(r: tx_ring, idx: head); |
163 | |
164 | trace_hfi1_txq_poll(txq); |
165 | for (work_done = 0; work_done < budget; work_done++) { |
166 | /* See hfi1_ipoib_sdma_complete() */ |
167 | if (!smp_load_acquire(&tx->complete)) |
168 | break; |
169 | tx->complete = 0; |
170 | trace_hfi1_tx_produce(tx, idx: head); |
171 | hfi1_ipoib_free_tx(tx, budget); |
172 | head = CIRC_NEXT(head, max_tx); |
173 | tx = hfi1_txreq_from_idx(r: tx_ring, idx: head); |
174 | } |
175 | tx_ring->complete_txreqs += work_done; |
176 | |
177 | /* Finished freeing tx items so store the head value. */ |
178 | smp_store_release(&tx_ring->head, head); |
179 | |
180 | hfi1_ipoib_check_queue_stopped(txq); |
181 | |
182 | if (work_done < budget) |
183 | napi_complete_done(n: napi, work_done); |
184 | |
185 | return work_done; |
186 | } |
187 | |
188 | static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) |
189 | { |
190 | struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); |
191 | |
192 | trace_hfi1_txq_complete(txq: tx->txq); |
193 | tx->sdma_status = status; |
194 | /* see hfi1_ipoib_poll_tx_ring */ |
195 | smp_store_release(&tx->complete, 1); |
196 | napi_schedule_irqoff(n: &tx->txq->napi); |
197 | } |
198 | |
199 | static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, |
200 | struct ipoib_txparms *txp) |
201 | { |
202 | struct hfi1_devdata *dd = txp->dd; |
203 | struct sdma_txreq *txreq = &tx->txreq; |
204 | struct sk_buff *skb = tx->skb; |
205 | int ret = 0; |
206 | int i; |
207 | |
208 | if (skb_headlen(skb)) { |
209 | ret = sdma_txadd_kvaddr(dd, tx: txreq, kvaddr: skb->data, len: skb_headlen(skb)); |
210 | if (unlikely(ret)) |
211 | return ret; |
212 | } |
213 | |
214 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
215 | const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
216 | |
217 | ret = sdma_txadd_page(dd, |
218 | tx: txreq, |
219 | page: skb_frag_page(frag), |
220 | offset: skb_frag_off(frag), |
221 | len: skb_frag_size(frag), |
222 | NULL, NULL, NULL); |
223 | if (unlikely(ret)) |
224 | break; |
225 | } |
226 | |
227 | return ret; |
228 | } |
229 | |
230 | static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, |
231 | struct ipoib_txparms *txp) |
232 | { |
233 | struct hfi1_devdata *dd = txp->dd; |
234 | struct sdma_txreq *txreq = &tx->txreq; |
235 | struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; |
236 | u16 pkt_bytes = |
237 | sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; |
238 | int ret; |
239 | |
240 | ret = sdma_txinit(tx: txreq, flags: 0, tlen: pkt_bytes, cb: hfi1_ipoib_sdma_complete); |
241 | if (unlikely(ret)) |
242 | return ret; |
243 | |
244 | /* add pbc + headers */ |
245 | ret = sdma_txadd_kvaddr(dd, |
246 | tx: txreq, |
247 | kvaddr: sdma_hdr, |
248 | len: sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2)); |
249 | if (unlikely(ret)) |
250 | return ret; |
251 | |
252 | /* add the ulp payload */ |
253 | return hfi1_ipoib_build_ulp_payload(tx, txp); |
254 | } |
255 | |
256 | static void (struct ipoib_txreq *tx, |
257 | struct ipoib_txparms *txp) |
258 | { |
259 | struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; |
260 | struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; |
261 | struct sk_buff *skb = tx->skb; |
262 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp: txp->ibp); |
263 | struct rdma_ah_attr *ah_attr = txp->ah_attr; |
264 | struct ib_other_headers *ohdr; |
265 | struct ib_grh *grh; |
266 | u16 dwords; |
267 | u16 slid; |
268 | u16 dlid; |
269 | u16 lrh0; |
270 | u32 bth0; |
271 | u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 | |
272 | priv->netdev->dev_addr[2] << 8 | |
273 | priv->netdev->dev_addr[3]); |
274 | u16 payload_dwords; |
275 | u8 pad_cnt; |
276 | |
277 | pad_cnt = -skb->len & 3; |
278 | |
279 | /* Includes ICRC */ |
280 | payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC; |
281 | |
282 | /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ |
283 | txp->hdr_dwords = 7; |
284 | |
285 | if (rdma_ah_get_ah_flags(attr: ah_attr) & IB_AH_GRH) { |
286 | grh = &sdma_hdr->hdr.ibh.u.l.grh; |
287 | txp->hdr_dwords += |
288 | hfi1_make_grh(ibp: txp->ibp, |
289 | hdr: grh, |
290 | grh: rdma_ah_read_grh(attr: ah_attr), |
291 | hwords: txp->hdr_dwords - LRH_9B_DWORDS, |
292 | nwords: payload_dwords); |
293 | lrh0 = HFI1_LRH_GRH; |
294 | ohdr = &sdma_hdr->hdr.ibh.u.l.oth; |
295 | } else { |
296 | lrh0 = HFI1_LRH_BTH; |
297 | ohdr = &sdma_hdr->hdr.ibh.u.oth; |
298 | } |
299 | |
300 | lrh0 |= (rdma_ah_get_sl(attr: ah_attr) & 0xf) << 4; |
301 | lrh0 |= (txp->flow.sc5 & 0xf) << 12; |
302 | |
303 | dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); |
304 | if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { |
305 | slid = be16_to_cpu(IB_LID_PERMISSIVE); |
306 | } else { |
307 | u16 lid = (u16)ppd->lid; |
308 | |
309 | if (lid) { |
310 | lid |= rdma_ah_get_path_bits(attr: ah_attr) & |
311 | ((1 << ppd->lmc) - 1); |
312 | slid = lid; |
313 | } else { |
314 | slid = be16_to_cpu(IB_LID_PERMISSIVE); |
315 | } |
316 | } |
317 | |
318 | /* Includes ICRC */ |
319 | dwords = txp->hdr_dwords + payload_dwords; |
320 | |
321 | /* Build the lrh */ |
322 | sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B; |
323 | hfi1_make_ib_hdr(hdr: &sdma_hdr->hdr.ibh, lrh0, len: dwords, dlid, slid); |
324 | |
325 | /* Build the bth */ |
326 | bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey; |
327 | |
328 | ohdr->bth[0] = cpu_to_be32(bth0); |
329 | ohdr->bth[1] = cpu_to_be32(txp->dqpn); |
330 | ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs)); |
331 | |
332 | /* Build the deth */ |
333 | ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); |
334 | ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy << |
335 | HFI1_IPOIB_ENTROPY_SHIFT) | sqpn); |
336 | |
337 | /* Construct the pbc. */ |
338 | sdma_hdr->pbc = |
339 | cpu_to_le64(create_pbc(ppd, |
340 | ib_is_sc5(txp->flow.sc5) << |
341 | PBC_DC_INFO_SHIFT, |
342 | 0, |
343 | sc_to_vlt(priv->dd, txp->flow.sc5), |
344 | dwords - SIZE_OF_CRC + |
345 | (sizeof(sdma_hdr->pbc) >> 2))); |
346 | } |
347 | |
348 | static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, |
349 | struct sk_buff *skb, |
350 | struct ipoib_txparms *txp) |
351 | { |
352 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
353 | struct hfi1_ipoib_txq *txq = txp->txq; |
354 | struct ipoib_txreq *tx; |
355 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
356 | u32 tail = tx_ring->tail; |
357 | int ret; |
358 | |
359 | if (unlikely(!tx_ring->avail)) { |
360 | u32 head; |
361 | |
362 | if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq)) |
363 | /* This shouldn't happen with a stopped queue */ |
364 | return ERR_PTR(error: -ENOMEM); |
365 | /* See hfi1_ipoib_poll_tx_ring() */ |
366 | head = smp_load_acquire(&tx_ring->head); |
367 | tx_ring->avail = |
368 | min_t(u32, hfi1_ipoib_ring_hwat(txq), |
369 | CIRC_CNT(head, tail, tx_ring->max_items)); |
370 | } else { |
371 | tx_ring->avail--; |
372 | } |
373 | tx = hfi1_txreq_from_idx(r: tx_ring, idx: tail); |
374 | trace_hfi1_txq_alloc_tx(txq); |
375 | |
376 | /* so that we can test if the sdma descriptors are there */ |
377 | tx->txreq.num_desc = 0; |
378 | tx->txq = txq; |
379 | tx->skb = skb; |
380 | INIT_LIST_HEAD(list: &tx->txreq.list); |
381 | |
382 | hfi1_ipoib_build_ib_tx_headers(tx, txp); |
383 | |
384 | ret = hfi1_ipoib_build_tx_desc(tx, txp); |
385 | if (likely(!ret)) { |
386 | if (txq->flow.as_int != txp->flow.as_int) { |
387 | txq->flow.tx_queue = txp->flow.tx_queue; |
388 | txq->flow.sc5 = txp->flow.sc5; |
389 | txq->sde = |
390 | sdma_select_engine_sc(dd: priv->dd, |
391 | selector: txp->flow.tx_queue, |
392 | sc5: txp->flow.sc5); |
393 | trace_hfi1_flow_switch(txq); |
394 | } |
395 | |
396 | return tx; |
397 | } |
398 | |
399 | sdma_txclean(dd: priv->dd, tx: &tx->txreq); |
400 | |
401 | return ERR_PTR(error: ret); |
402 | } |
403 | |
404 | static int hfi1_ipoib_submit_tx_list(struct net_device *dev, |
405 | struct hfi1_ipoib_txq *txq) |
406 | { |
407 | int ret; |
408 | u16 count_out; |
409 | |
410 | ret = sdma_send_txlist(sde: txq->sde, |
411 | wait: iowait_get_ib_work(w: &txq->wait), |
412 | tx_list: &txq->tx_list, |
413 | count_out: &count_out); |
414 | if (likely(!ret) || ret == -EBUSY || ret == -ECOMM) |
415 | return ret; |
416 | |
417 | dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n" , ret); |
418 | |
419 | return ret; |
420 | } |
421 | |
422 | static int hfi1_ipoib_flush_tx_list(struct net_device *dev, |
423 | struct hfi1_ipoib_txq *txq) |
424 | { |
425 | int ret = 0; |
426 | |
427 | if (!list_empty(head: &txq->tx_list)) { |
428 | /* Flush the current list */ |
429 | ret = hfi1_ipoib_submit_tx_list(dev, txq); |
430 | |
431 | if (unlikely(ret)) |
432 | if (ret != -EBUSY) |
433 | ++dev->stats.tx_carrier_errors; |
434 | } |
435 | |
436 | return ret; |
437 | } |
438 | |
439 | static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq, |
440 | struct ipoib_txreq *tx) |
441 | { |
442 | int ret; |
443 | |
444 | ret = sdma_send_txreq(sde: txq->sde, |
445 | wait: iowait_get_ib_work(w: &txq->wait), |
446 | tx: &tx->txreq, |
447 | pkts_sent: txq->pkts_sent); |
448 | if (likely(!ret)) { |
449 | txq->pkts_sent = true; |
450 | iowait_starve_clear(pkts_sent: txq->pkts_sent, w: &txq->wait); |
451 | } |
452 | |
453 | return ret; |
454 | } |
455 | |
456 | static int hfi1_ipoib_send_dma_single(struct net_device *dev, |
457 | struct sk_buff *skb, |
458 | struct ipoib_txparms *txp) |
459 | { |
460 | struct hfi1_ipoib_txq *txq = txp->txq; |
461 | struct hfi1_ipoib_circ_buf *tx_ring; |
462 | struct ipoib_txreq *tx; |
463 | int ret; |
464 | |
465 | tx = hfi1_ipoib_send_dma_common(dev, skb, txp); |
466 | if (IS_ERR(ptr: tx)) { |
467 | int ret = PTR_ERR(ptr: tx); |
468 | |
469 | dev_kfree_skb_any(skb); |
470 | |
471 | if (ret == -ENOMEM) |
472 | ++dev->stats.tx_errors; |
473 | else |
474 | ++dev->stats.tx_carrier_errors; |
475 | |
476 | return NETDEV_TX_OK; |
477 | } |
478 | |
479 | tx_ring = &txq->tx_ring; |
480 | trace_hfi1_tx_consume(tx, idx: tx_ring->tail); |
481 | /* consume tx */ |
482 | smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); |
483 | ret = hfi1_ipoib_submit_tx(txq, tx); |
484 | if (likely(!ret)) { |
485 | tx_ok: |
486 | trace_sdma_output_ibhdr(dd: txq->priv->dd, |
487 | opah: &tx->sdma_hdr->hdr, |
488 | sc5: ib_is_sc5(sc5: txp->flow.sc5)); |
489 | hfi1_ipoib_check_queue_depth(txq); |
490 | return NETDEV_TX_OK; |
491 | } |
492 | |
493 | txq->pkts_sent = false; |
494 | |
495 | if (ret == -EBUSY || ret == -ECOMM) |
496 | goto tx_ok; |
497 | |
498 | /* mark complete and kick napi tx */ |
499 | smp_store_release(&tx->complete, 1); |
500 | napi_schedule(n: &tx->txq->napi); |
501 | |
502 | ++dev->stats.tx_carrier_errors; |
503 | |
504 | return NETDEV_TX_OK; |
505 | } |
506 | |
507 | static int hfi1_ipoib_send_dma_list(struct net_device *dev, |
508 | struct sk_buff *skb, |
509 | struct ipoib_txparms *txp) |
510 | { |
511 | struct hfi1_ipoib_txq *txq = txp->txq; |
512 | struct hfi1_ipoib_circ_buf *tx_ring; |
513 | struct ipoib_txreq *tx; |
514 | |
515 | /* Has the flow change ? */ |
516 | if (txq->flow.as_int != txp->flow.as_int) { |
517 | int ret; |
518 | |
519 | trace_hfi1_flow_flush(txq); |
520 | ret = hfi1_ipoib_flush_tx_list(dev, txq); |
521 | if (unlikely(ret)) { |
522 | if (ret == -EBUSY) |
523 | ++dev->stats.tx_dropped; |
524 | dev_kfree_skb_any(skb); |
525 | return NETDEV_TX_OK; |
526 | } |
527 | } |
528 | tx = hfi1_ipoib_send_dma_common(dev, skb, txp); |
529 | if (IS_ERR(ptr: tx)) { |
530 | int ret = PTR_ERR(ptr: tx); |
531 | |
532 | dev_kfree_skb_any(skb); |
533 | |
534 | if (ret == -ENOMEM) |
535 | ++dev->stats.tx_errors; |
536 | else |
537 | ++dev->stats.tx_carrier_errors; |
538 | |
539 | return NETDEV_TX_OK; |
540 | } |
541 | |
542 | tx_ring = &txq->tx_ring; |
543 | trace_hfi1_tx_consume(tx, idx: tx_ring->tail); |
544 | /* consume tx */ |
545 | smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); |
546 | list_add_tail(new: &tx->txreq.list, head: &txq->tx_list); |
547 | |
548 | hfi1_ipoib_check_queue_depth(txq); |
549 | |
550 | trace_sdma_output_ibhdr(dd: txq->priv->dd, |
551 | opah: &tx->sdma_hdr->hdr, |
552 | sc5: ib_is_sc5(sc5: txp->flow.sc5)); |
553 | |
554 | if (!netdev_xmit_more()) |
555 | (void)hfi1_ipoib_flush_tx_list(dev, txq); |
556 | |
557 | return NETDEV_TX_OK; |
558 | } |
559 | |
560 | static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb) |
561 | { |
562 | if (skb_transport_header_was_set(skb)) { |
563 | u8 *hdr = (u8 *)skb_transport_header(skb); |
564 | |
565 | return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]); |
566 | } |
567 | |
568 | return (u8)skb_get_queue_mapping(skb); |
569 | } |
570 | |
571 | int hfi1_ipoib_send(struct net_device *dev, |
572 | struct sk_buff *skb, |
573 | struct ib_ah *address, |
574 | u32 dqpn) |
575 | { |
576 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
577 | struct ipoib_txparms txp; |
578 | struct rdma_netdev *rn = netdev_priv(dev); |
579 | |
580 | if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) { |
581 | dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n" , |
582 | skb->len, |
583 | rn->mtu + HFI1_IPOIB_ENCAP_LEN); |
584 | ++dev->stats.tx_dropped; |
585 | ++dev->stats.tx_errors; |
586 | dev_kfree_skb_any(skb); |
587 | return NETDEV_TX_OK; |
588 | } |
589 | |
590 | txp.dd = priv->dd; |
591 | txp.ah_attr = &ibah_to_rvtah(ibah: address)->attr; |
592 | txp.ibp = to_iport(ibdev: priv->device, port: priv->port_num); |
593 | txp.txq = &priv->txqs[skb_get_queue_mapping(skb)]; |
594 | txp.dqpn = dqpn; |
595 | txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(attr: txp.ah_attr)]; |
596 | txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb); |
597 | txp.entropy = hfi1_ipoib_calc_entropy(skb); |
598 | |
599 | if (netdev_xmit_more() || !list_empty(head: &txp.txq->tx_list)) |
600 | return hfi1_ipoib_send_dma_list(dev, skb, txp: &txp); |
601 | |
602 | return hfi1_ipoib_send_dma_single(dev, skb, txp: &txp); |
603 | } |
604 | |
605 | /* |
606 | * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function |
607 | * |
608 | * This function gets called from sdma_send_txreq() when there are not enough |
609 | * sdma descriptors available to send the packet. It adds Tx queue's wait |
610 | * structure to sdma engine's dmawait list to be woken up when descriptors |
611 | * become available. |
612 | */ |
613 | static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, |
614 | struct iowait_work *wait, |
615 | struct sdma_txreq *txreq, |
616 | uint seq, |
617 | bool pkts_sent) |
618 | { |
619 | struct hfi1_ipoib_txq *txq = |
620 | container_of(wait->iow, struct hfi1_ipoib_txq, wait); |
621 | |
622 | write_seqlock(sl: &sde->waitlock); |
623 | |
624 | if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) { |
625 | if (sdma_progress(sde, seq, tx: txreq)) { |
626 | write_sequnlock(sl: &sde->waitlock); |
627 | return -EAGAIN; |
628 | } |
629 | |
630 | if (list_empty(head: &txreq->list)) |
631 | /* came from non-list submit */ |
632 | list_add_tail(new: &txreq->list, head: &txq->tx_list); |
633 | if (list_empty(head: &txq->wait.list)) { |
634 | struct hfi1_ibport *ibp = &sde->ppd->ibport_data; |
635 | |
636 | if (!atomic_xchg(v: &txq->tx_ring.no_desc, new: 1)) { |
637 | trace_hfi1_txq_queued(txq); |
638 | hfi1_ipoib_stop_txq(txq); |
639 | } |
640 | ibp->rvp.n_dmawait++; |
641 | iowait_queue(pkts_sent, w: wait->iow, wait_head: &sde->dmawait); |
642 | } |
643 | |
644 | write_sequnlock(sl: &sde->waitlock); |
645 | return -EBUSY; |
646 | } |
647 | |
648 | write_sequnlock(sl: &sde->waitlock); |
649 | return -EINVAL; |
650 | } |
651 | |
652 | /* |
653 | * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function |
654 | * |
655 | * This function gets called when SDMA descriptors becomes available and Tx |
656 | * queue's wait structure was previously added to sdma engine's dmawait list. |
657 | */ |
658 | static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason) |
659 | { |
660 | struct hfi1_ipoib_txq *txq = |
661 | container_of(wait, struct hfi1_ipoib_txq, wait); |
662 | |
663 | trace_hfi1_txq_wakeup(txq); |
664 | if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) |
665 | iowait_schedule(wait, wq: system_highpri_wq, cpu: WORK_CPU_UNBOUND); |
666 | } |
667 | |
668 | static void hfi1_ipoib_flush_txq(struct work_struct *work) |
669 | { |
670 | struct iowait_work *ioww = |
671 | container_of(work, struct iowait_work, iowork); |
672 | struct iowait *wait = iowait_ioww_to_iow(w: ioww); |
673 | struct hfi1_ipoib_txq *txq = |
674 | container_of(wait, struct hfi1_ipoib_txq, wait); |
675 | struct net_device *dev = txq->priv->netdev; |
676 | |
677 | if (likely(dev->reg_state == NETREG_REGISTERED) && |
678 | likely(!hfi1_ipoib_flush_tx_list(dev, txq))) |
679 | if (atomic_xchg(v: &txq->tx_ring.no_desc, new: 0)) |
680 | hfi1_ipoib_wake_txq(txq); |
681 | } |
682 | |
683 | int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) |
684 | { |
685 | struct net_device *dev = priv->netdev; |
686 | u32 tx_ring_size, tx_item_size; |
687 | struct hfi1_ipoib_circ_buf *tx_ring; |
688 | int i, j; |
689 | |
690 | /* |
691 | * Ring holds 1 less than tx_ring_size |
692 | * Round up to next power of 2 in order to hold at least tx_queue_len |
693 | */ |
694 | tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1); |
695 | tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq)); |
696 | |
697 | priv->txqs = kcalloc_node(n: dev->num_tx_queues, |
698 | size: sizeof(struct hfi1_ipoib_txq), |
699 | GFP_KERNEL, |
700 | node: priv->dd->node); |
701 | if (!priv->txqs) |
702 | return -ENOMEM; |
703 | |
704 | for (i = 0; i < dev->num_tx_queues; i++) { |
705 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
706 | struct ipoib_txreq *tx; |
707 | |
708 | tx_ring = &txq->tx_ring; |
709 | iowait_init(wait: &txq->wait, |
710 | tx_limit: 0, |
711 | func: hfi1_ipoib_flush_txq, |
712 | NULL, |
713 | sleep: hfi1_ipoib_sdma_sleep, |
714 | wakeup: hfi1_ipoib_sdma_wakeup, |
715 | NULL, |
716 | NULL); |
717 | txq->priv = priv; |
718 | txq->sde = NULL; |
719 | INIT_LIST_HEAD(list: &txq->tx_list); |
720 | atomic_set(v: &txq->tx_ring.stops, i: 0); |
721 | atomic_set(v: &txq->tx_ring.ring_full, i: 0); |
722 | atomic_set(v: &txq->tx_ring.no_desc, i: 0); |
723 | txq->q_idx = i; |
724 | txq->flow.tx_queue = 0xff; |
725 | txq->flow.sc5 = 0xff; |
726 | txq->pkts_sent = false; |
727 | |
728 | netdev_queue_numa_node_write(q: netdev_get_tx_queue(dev, index: i), |
729 | node: priv->dd->node); |
730 | |
731 | txq->tx_ring.items = |
732 | kvzalloc_node(array_size(tx_ring_size, tx_item_size), |
733 | GFP_KERNEL, node: priv->dd->node); |
734 | if (!txq->tx_ring.items) |
735 | goto free_txqs; |
736 | |
737 | txq->tx_ring.max_items = tx_ring_size; |
738 | txq->tx_ring.shift = ilog2(tx_item_size); |
739 | txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); |
740 | tx_ring = &txq->tx_ring; |
741 | for (j = 0; j < tx_ring_size; j++) { |
742 | hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr = |
743 | kzalloc_node(size: sizeof(*tx->sdma_hdr), |
744 | GFP_KERNEL, node: priv->dd->node); |
745 | if (!hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr) |
746 | goto free_txqs; |
747 | } |
748 | |
749 | netif_napi_add_tx(dev, napi: &txq->napi, poll: hfi1_ipoib_poll_tx_ring); |
750 | } |
751 | |
752 | return 0; |
753 | |
754 | free_txqs: |
755 | for (i--; i >= 0; i--) { |
756 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
757 | |
758 | netif_napi_del(napi: &txq->napi); |
759 | tx_ring = &txq->tx_ring; |
760 | for (j = 0; j < tx_ring_size; j++) |
761 | kfree(objp: hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr); |
762 | kvfree(addr: tx_ring->items); |
763 | } |
764 | |
765 | kfree(objp: priv->txqs); |
766 | priv->txqs = NULL; |
767 | return -ENOMEM; |
768 | } |
769 | |
770 | static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) |
771 | { |
772 | struct sdma_txreq *txreq; |
773 | struct sdma_txreq *txreq_tmp; |
774 | |
775 | list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { |
776 | struct ipoib_txreq *tx = |
777 | container_of(txreq, struct ipoib_txreq, txreq); |
778 | |
779 | list_del(entry: &txreq->list); |
780 | sdma_txclean(dd: txq->priv->dd, tx: &tx->txreq); |
781 | dev_kfree_skb_any(skb: tx->skb); |
782 | tx->skb = NULL; |
783 | txq->tx_ring.complete_txreqs++; |
784 | } |
785 | |
786 | if (hfi1_ipoib_used(txq)) |
787 | dd_dev_warn(txq->priv->dd, |
788 | "txq %d not empty found %u requests\n" , |
789 | txq->q_idx, |
790 | hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, |
791 | txq->tx_ring.complete_txreqs)); |
792 | } |
793 | |
794 | void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) |
795 | { |
796 | int i, j; |
797 | |
798 | for (i = 0; i < priv->netdev->num_tx_queues; i++) { |
799 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
800 | struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; |
801 | |
802 | iowait_cancel_work(w: &txq->wait); |
803 | iowait_sdma_drain(wait: &txq->wait); |
804 | hfi1_ipoib_drain_tx_list(txq); |
805 | netif_napi_del(napi: &txq->napi); |
806 | hfi1_ipoib_drain_tx_ring(txq); |
807 | for (j = 0; j < tx_ring->max_items; j++) |
808 | kfree(objp: hfi1_txreq_from_idx(r: tx_ring, idx: j)->sdma_hdr); |
809 | kvfree(addr: tx_ring->items); |
810 | } |
811 | |
812 | kfree(objp: priv->txqs); |
813 | priv->txqs = NULL; |
814 | } |
815 | |
816 | void hfi1_ipoib_napi_tx_enable(struct net_device *dev) |
817 | { |
818 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
819 | int i; |
820 | |
821 | for (i = 0; i < dev->num_tx_queues; i++) { |
822 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
823 | |
824 | napi_enable(n: &txq->napi); |
825 | } |
826 | } |
827 | |
828 | void hfi1_ipoib_napi_tx_disable(struct net_device *dev) |
829 | { |
830 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
831 | int i; |
832 | |
833 | for (i = 0; i < dev->num_tx_queues; i++) { |
834 | struct hfi1_ipoib_txq *txq = &priv->txqs[i]; |
835 | |
836 | napi_disable(n: &txq->napi); |
837 | hfi1_ipoib_drain_tx_ring(txq); |
838 | } |
839 | } |
840 | |
841 | void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) |
842 | { |
843 | struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); |
844 | struct hfi1_ipoib_txq *txq = &priv->txqs[q]; |
845 | |
846 | dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n" , |
847 | txq, q, |
848 | __netif_subqueue_stopped(dev, txq->q_idx), |
849 | atomic_read(&txq->tx_ring.stops), |
850 | atomic_read(&txq->tx_ring.no_desc), |
851 | atomic_read(&txq->tx_ring.ring_full)); |
852 | dd_dev_info(priv->dd, "sde %p engine %u\n" , |
853 | txq->sde, |
854 | txq->sde ? txq->sde->this_idx : 0); |
855 | dd_dev_info(priv->dd, "flow %x\n" , txq->flow.as_int); |
856 | dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n" , |
857 | txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs, |
858 | hfi1_ipoib_used(txq)); |
859 | dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n" , |
860 | dev->tx_queue_len, txq->tx_ring.max_items); |
861 | dd_dev_info(priv->dd, "head %u tail %u\n" , |
862 | txq->tx_ring.head, txq->tx_ring.tail); |
863 | dd_dev_info(priv->dd, "wait queued %u\n" , |
864 | !list_empty(&txq->wait.list)); |
865 | dd_dev_info(priv->dd, "tx_list empty %u\n" , |
866 | list_empty(&txq->tx_list)); |
867 | } |
868 | |
869 | |