1 | /* |
2 | * Copyright (c) 2006 Mellanox Technologies. All rights reserved |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | */ |
32 | |
33 | #include <rdma/ib_cm.h> |
34 | #include <net/dst.h> |
35 | #include <net/icmp.h> |
36 | #include <linux/icmpv6.h> |
37 | #include <linux/delay.h> |
38 | #include <linux/slab.h> |
39 | #include <linux/vmalloc.h> |
40 | #include <linux/moduleparam.h> |
41 | #include <linux/sched/signal.h> |
42 | #include <linux/sched/mm.h> |
43 | |
44 | #include "ipoib.h" |
45 | |
46 | int ipoib_max_conn_qp = 128; |
47 | |
48 | module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444); |
49 | MODULE_PARM_DESC(max_nonsrq_conn_qp, |
50 | "Max number of connected-mode QPs per interface " |
51 | "(applied only if shared receive queue is not available)" ); |
52 | |
53 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA |
54 | static int data_debug_level; |
55 | |
56 | module_param_named(cm_data_debug_level, data_debug_level, int, 0644); |
57 | MODULE_PARM_DESC(cm_data_debug_level, |
58 | "Enable data path debug tracing for connected mode if > 0" ); |
59 | #endif |
60 | |
61 | #define IPOIB_CM_IETF_ID 0x1000000000000000ULL |
62 | |
63 | #define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) |
64 | #define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) |
65 | #define IPOIB_CM_RX_DELAY (3 * 256 * HZ) |
66 | #define IPOIB_CM_RX_UPDATE_MASK (0x3) |
67 | |
68 | #define IPOIB_CM_RX_RESERVE (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN) |
69 | |
70 | static struct ib_qp_attr ipoib_cm_err_attr = { |
71 | .qp_state = IB_QPS_ERR |
72 | }; |
73 | |
74 | #define IPOIB_CM_RX_DRAIN_WRID 0xffffffff |
75 | |
76 | static struct ib_send_wr ipoib_cm_rx_drain_wr = { |
77 | .opcode = IB_WR_SEND, |
78 | }; |
79 | |
80 | static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, |
81 | const struct ib_cm_event *event); |
82 | |
83 | static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, |
84 | u64 mapping[IPOIB_CM_RX_SG]) |
85 | { |
86 | int i; |
87 | |
88 | ib_dma_unmap_single(dev: priv->ca, addr: mapping[0], size: IPOIB_CM_HEAD_SIZE, direction: DMA_FROM_DEVICE); |
89 | |
90 | for (i = 0; i < frags; ++i) |
91 | ib_dma_unmap_page(dev: priv->ca, addr: mapping[i + 1], PAGE_SIZE, direction: DMA_FROM_DEVICE); |
92 | } |
93 | |
94 | static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) |
95 | { |
96 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
97 | int i, ret; |
98 | |
99 | priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; |
100 | |
101 | for (i = 0; i < priv->cm.num_frags; ++i) |
102 | priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; |
103 | |
104 | ret = ib_post_srq_recv(srq: priv->cm.srq, recv_wr: &priv->cm.rx_wr, NULL); |
105 | if (unlikely(ret)) { |
106 | ipoib_warn(priv, "post srq failed for buf %d (%d)\n" , id, ret); |
107 | ipoib_cm_dma_unmap_rx(priv, frags: priv->cm.num_frags - 1, |
108 | mapping: priv->cm.srq_ring[id].mapping); |
109 | dev_kfree_skb_any(skb: priv->cm.srq_ring[id].skb); |
110 | priv->cm.srq_ring[id].skb = NULL; |
111 | } |
112 | |
113 | return ret; |
114 | } |
115 | |
116 | static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, |
117 | struct ipoib_cm_rx *rx, |
118 | struct ib_recv_wr *wr, |
119 | struct ib_sge *sge, int id) |
120 | { |
121 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
122 | int i, ret; |
123 | |
124 | wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; |
125 | |
126 | for (i = 0; i < IPOIB_CM_RX_SG; ++i) |
127 | sge[i].addr = rx->rx_ring[id].mapping[i]; |
128 | |
129 | ret = ib_post_recv(qp: rx->qp, recv_wr: wr, NULL); |
130 | if (unlikely(ret)) { |
131 | ipoib_warn(priv, "post recv failed for buf %d (%d)\n" , id, ret); |
132 | ipoib_cm_dma_unmap_rx(priv, frags: IPOIB_CM_RX_SG - 1, |
133 | mapping: rx->rx_ring[id].mapping); |
134 | dev_kfree_skb_any(skb: rx->rx_ring[id].skb); |
135 | rx->rx_ring[id].skb = NULL; |
136 | } |
137 | |
138 | return ret; |
139 | } |
140 | |
141 | static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, |
142 | struct ipoib_cm_rx_buf *rx_ring, |
143 | int id, int frags, |
144 | u64 mapping[IPOIB_CM_RX_SG], |
145 | gfp_t gfp) |
146 | { |
147 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
148 | struct sk_buff *skb; |
149 | int i; |
150 | |
151 | skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16)); |
152 | if (unlikely(!skb)) |
153 | return NULL; |
154 | |
155 | /* |
156 | * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the |
157 | * IP header to a multiple of 16. |
158 | */ |
159 | skb_reserve(skb, IPOIB_CM_RX_RESERVE); |
160 | |
161 | mapping[0] = ib_dma_map_single(dev: priv->ca, cpu_addr: skb->data, size: IPOIB_CM_HEAD_SIZE, |
162 | direction: DMA_FROM_DEVICE); |
163 | if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { |
164 | dev_kfree_skb_any(skb); |
165 | return NULL; |
166 | } |
167 | |
168 | for (i = 0; i < frags; i++) { |
169 | struct page *page = alloc_page(gfp); |
170 | |
171 | if (!page) |
172 | goto partial_error; |
173 | skb_fill_page_desc(skb, i, page, off: 0, PAGE_SIZE); |
174 | |
175 | mapping[i + 1] = ib_dma_map_page(dev: priv->ca, page, |
176 | offset: 0, PAGE_SIZE, direction: DMA_FROM_DEVICE); |
177 | if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) |
178 | goto partial_error; |
179 | } |
180 | |
181 | rx_ring[id].skb = skb; |
182 | return skb; |
183 | |
184 | partial_error: |
185 | |
186 | ib_dma_unmap_single(dev: priv->ca, addr: mapping[0], size: IPOIB_CM_HEAD_SIZE, direction: DMA_FROM_DEVICE); |
187 | |
188 | for (; i > 0; --i) |
189 | ib_dma_unmap_page(dev: priv->ca, addr: mapping[i], PAGE_SIZE, direction: DMA_FROM_DEVICE); |
190 | |
191 | dev_kfree_skb_any(skb); |
192 | return NULL; |
193 | } |
194 | |
195 | static void ipoib_cm_free_rx_ring(struct net_device *dev, |
196 | struct ipoib_cm_rx_buf *rx_ring) |
197 | { |
198 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
199 | int i; |
200 | |
201 | for (i = 0; i < ipoib_recvq_size; ++i) |
202 | if (rx_ring[i].skb) { |
203 | ipoib_cm_dma_unmap_rx(priv, frags: IPOIB_CM_RX_SG - 1, |
204 | mapping: rx_ring[i].mapping); |
205 | dev_kfree_skb_any(skb: rx_ring[i].skb); |
206 | } |
207 | |
208 | vfree(addr: rx_ring); |
209 | } |
210 | |
211 | static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) |
212 | { |
213 | struct ipoib_cm_rx *p; |
214 | |
215 | /* We only reserved 1 extra slot in CQ for drain WRs, so |
216 | * make sure we have at most 1 outstanding WR. */ |
217 | if (list_empty(head: &priv->cm.rx_flush_list) || |
218 | !list_empty(head: &priv->cm.rx_drain_list)) |
219 | return; |
220 | |
221 | /* |
222 | * QPs on flush list are error state. This way, a "flush |
223 | * error" WC will be immediately generated for each WR we post. |
224 | */ |
225 | p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); |
226 | ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; |
227 | if (ib_post_send(qp: p->qp, send_wr: &ipoib_cm_rx_drain_wr, NULL)) |
228 | ipoib_warn(priv, "failed to post drain wr\n" ); |
229 | |
230 | list_splice_init(list: &priv->cm.rx_flush_list, head: &priv->cm.rx_drain_list); |
231 | } |
232 | |
233 | static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) |
234 | { |
235 | struct ipoib_cm_rx *p = ctx; |
236 | struct ipoib_dev_priv *priv = ipoib_priv(dev: p->dev); |
237 | unsigned long flags; |
238 | |
239 | if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) |
240 | return; |
241 | |
242 | spin_lock_irqsave(&priv->lock, flags); |
243 | list_move(list: &p->list, head: &priv->cm.rx_flush_list); |
244 | p->state = IPOIB_CM_RX_FLUSH; |
245 | ipoib_cm_start_rx_drain(priv); |
246 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
247 | } |
248 | |
249 | static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, |
250 | struct ipoib_cm_rx *p) |
251 | { |
252 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
253 | struct ib_qp_init_attr attr = { |
254 | .event_handler = ipoib_cm_rx_event_handler, |
255 | .send_cq = priv->recv_cq, /* For drain WR */ |
256 | .recv_cq = priv->recv_cq, |
257 | .srq = priv->cm.srq, |
258 | .cap.max_send_wr = 1, /* For drain WR */ |
259 | .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ |
260 | .sq_sig_type = IB_SIGNAL_ALL_WR, |
261 | .qp_type = IB_QPT_RC, |
262 | .qp_context = p, |
263 | }; |
264 | |
265 | if (!ipoib_cm_has_srq(dev)) { |
266 | attr.cap.max_recv_wr = ipoib_recvq_size; |
267 | attr.cap.max_recv_sge = IPOIB_CM_RX_SG; |
268 | } |
269 | |
270 | return ib_create_qp(pd: priv->pd, init_attr: &attr); |
271 | } |
272 | |
273 | static int ipoib_cm_modify_rx_qp(struct net_device *dev, |
274 | struct ib_cm_id *cm_id, struct ib_qp *qp, |
275 | unsigned int psn) |
276 | { |
277 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
278 | struct ib_qp_attr qp_attr; |
279 | int qp_attr_mask, ret; |
280 | |
281 | qp_attr.qp_state = IB_QPS_INIT; |
282 | ret = ib_cm_init_qp_attr(cm_id, qp_attr: &qp_attr, qp_attr_mask: &qp_attr_mask); |
283 | if (ret) { |
284 | ipoib_warn(priv, "failed to init QP attr for INIT: %d\n" , ret); |
285 | return ret; |
286 | } |
287 | ret = ib_modify_qp(qp, qp_attr: &qp_attr, qp_attr_mask); |
288 | if (ret) { |
289 | ipoib_warn(priv, "failed to modify QP to INIT: %d\n" , ret); |
290 | return ret; |
291 | } |
292 | qp_attr.qp_state = IB_QPS_RTR; |
293 | ret = ib_cm_init_qp_attr(cm_id, qp_attr: &qp_attr, qp_attr_mask: &qp_attr_mask); |
294 | if (ret) { |
295 | ipoib_warn(priv, "failed to init QP attr for RTR: %d\n" , ret); |
296 | return ret; |
297 | } |
298 | qp_attr.rq_psn = psn; |
299 | ret = ib_modify_qp(qp, qp_attr: &qp_attr, qp_attr_mask); |
300 | if (ret) { |
301 | ipoib_warn(priv, "failed to modify QP to RTR: %d\n" , ret); |
302 | return ret; |
303 | } |
304 | |
305 | /* |
306 | * Current Mellanox HCA firmware won't generate completions |
307 | * with error for drain WRs unless the QP has been moved to |
308 | * RTS first. This work-around leaves a window where a QP has |
309 | * moved to error asynchronously, but this will eventually get |
310 | * fixed in firmware, so let's not error out if modify QP |
311 | * fails. |
312 | */ |
313 | qp_attr.qp_state = IB_QPS_RTS; |
314 | ret = ib_cm_init_qp_attr(cm_id, qp_attr: &qp_attr, qp_attr_mask: &qp_attr_mask); |
315 | if (ret) { |
316 | ipoib_warn(priv, "failed to init QP attr for RTS: %d\n" , ret); |
317 | return 0; |
318 | } |
319 | ret = ib_modify_qp(qp, qp_attr: &qp_attr, qp_attr_mask); |
320 | if (ret) { |
321 | ipoib_warn(priv, "failed to modify QP to RTS: %d\n" , ret); |
322 | return 0; |
323 | } |
324 | |
325 | return 0; |
326 | } |
327 | |
328 | static void ipoib_cm_init_rx_wr(struct net_device *dev, |
329 | struct ib_recv_wr *wr, |
330 | struct ib_sge *sge) |
331 | { |
332 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
333 | int i; |
334 | |
335 | for (i = 0; i < priv->cm.num_frags; ++i) |
336 | sge[i].lkey = priv->pd->local_dma_lkey; |
337 | |
338 | sge[0].length = IPOIB_CM_HEAD_SIZE; |
339 | for (i = 1; i < priv->cm.num_frags; ++i) |
340 | sge[i].length = PAGE_SIZE; |
341 | |
342 | wr->next = NULL; |
343 | wr->sg_list = sge; |
344 | wr->num_sge = priv->cm.num_frags; |
345 | } |
346 | |
347 | static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, |
348 | struct ipoib_cm_rx *rx) |
349 | { |
350 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
351 | struct { |
352 | struct ib_recv_wr wr; |
353 | struct ib_sge sge[IPOIB_CM_RX_SG]; |
354 | } *t; |
355 | int ret; |
356 | int i; |
357 | |
358 | rx->rx_ring = vzalloc(array_size(ipoib_recvq_size, |
359 | sizeof(*rx->rx_ring))); |
360 | if (!rx->rx_ring) |
361 | return -ENOMEM; |
362 | |
363 | t = kmalloc(size: sizeof(*t), GFP_KERNEL); |
364 | if (!t) { |
365 | ret = -ENOMEM; |
366 | goto err_free_1; |
367 | } |
368 | |
369 | ipoib_cm_init_rx_wr(dev, wr: &t->wr, sge: t->sge); |
370 | |
371 | spin_lock_irq(lock: &priv->lock); |
372 | |
373 | if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { |
374 | spin_unlock_irq(lock: &priv->lock); |
375 | ib_send_cm_rej(cm_id, reason: IB_CM_REJ_NO_QP, NULL, ari_length: 0, NULL, private_data_len: 0); |
376 | ret = -EINVAL; |
377 | goto err_free; |
378 | } else |
379 | ++priv->cm.nonsrq_conn_qp; |
380 | |
381 | spin_unlock_irq(lock: &priv->lock); |
382 | |
383 | for (i = 0; i < ipoib_recvq_size; ++i) { |
384 | if (!ipoib_cm_alloc_rx_skb(dev, rx_ring: rx->rx_ring, id: i, frags: IPOIB_CM_RX_SG - 1, |
385 | mapping: rx->rx_ring[i].mapping, |
386 | GFP_KERNEL)) { |
387 | ipoib_warn(priv, "failed to allocate receive buffer %d\n" , i); |
388 | ret = -ENOMEM; |
389 | goto err_count; |
390 | } |
391 | ret = ipoib_cm_post_receive_nonsrq(dev, rx, wr: &t->wr, sge: t->sge, id: i); |
392 | if (ret) { |
393 | ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " |
394 | "failed for buf %d\n" , i); |
395 | ret = -EIO; |
396 | goto err_count; |
397 | } |
398 | } |
399 | |
400 | rx->recv_count = ipoib_recvq_size; |
401 | |
402 | kfree(objp: t); |
403 | |
404 | return 0; |
405 | |
406 | err_count: |
407 | spin_lock_irq(lock: &priv->lock); |
408 | --priv->cm.nonsrq_conn_qp; |
409 | spin_unlock_irq(lock: &priv->lock); |
410 | |
411 | err_free: |
412 | kfree(objp: t); |
413 | |
414 | err_free_1: |
415 | ipoib_cm_free_rx_ring(dev, rx_ring: rx->rx_ring); |
416 | |
417 | return ret; |
418 | } |
419 | |
420 | static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, |
421 | struct ib_qp *qp, |
422 | const struct ib_cm_req_event_param *req, |
423 | unsigned int psn) |
424 | { |
425 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
426 | struct ipoib_cm_data data = {}; |
427 | struct ib_cm_rep_param rep = {}; |
428 | |
429 | data.qpn = cpu_to_be32(priv->qp->qp_num); |
430 | data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); |
431 | |
432 | rep.private_data = &data; |
433 | rep.private_data_len = sizeof(data); |
434 | rep.flow_control = 0; |
435 | rep.rnr_retry_count = req->rnr_retry_count; |
436 | rep.srq = ipoib_cm_has_srq(dev); |
437 | rep.qp_num = qp->qp_num; |
438 | rep.starting_psn = psn; |
439 | return ib_send_cm_rep(cm_id, param: &rep); |
440 | } |
441 | |
442 | static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, |
443 | const struct ib_cm_event *event) |
444 | { |
445 | struct net_device *dev = cm_id->context; |
446 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
447 | struct ipoib_cm_rx *p; |
448 | unsigned int psn; |
449 | int ret; |
450 | |
451 | ipoib_dbg(priv, "REQ arrived\n" ); |
452 | p = kzalloc(size: sizeof(*p), GFP_KERNEL); |
453 | if (!p) |
454 | return -ENOMEM; |
455 | p->dev = dev; |
456 | p->id = cm_id; |
457 | cm_id->context = p; |
458 | p->state = IPOIB_CM_RX_LIVE; |
459 | p->jiffies = jiffies; |
460 | INIT_LIST_HEAD(list: &p->list); |
461 | |
462 | p->qp = ipoib_cm_create_rx_qp(dev, p); |
463 | if (IS_ERR(ptr: p->qp)) { |
464 | ret = PTR_ERR(ptr: p->qp); |
465 | goto err_qp; |
466 | } |
467 | |
468 | psn = get_random_u32() & 0xffffff; |
469 | ret = ipoib_cm_modify_rx_qp(dev, cm_id, qp: p->qp, psn); |
470 | if (ret) |
471 | goto err_modify; |
472 | |
473 | if (!ipoib_cm_has_srq(dev)) { |
474 | ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, rx: p); |
475 | if (ret) |
476 | goto err_modify; |
477 | } |
478 | |
479 | spin_lock_irq(lock: &priv->lock); |
480 | queue_delayed_work(wq: priv->wq, |
481 | dwork: &priv->cm.stale_task, IPOIB_CM_RX_DELAY); |
482 | /* Add this entry to passive ids list head, but do not re-add it |
483 | * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ |
484 | p->jiffies = jiffies; |
485 | if (p->state == IPOIB_CM_RX_LIVE) |
486 | list_move(list: &p->list, head: &priv->cm.passive_ids); |
487 | spin_unlock_irq(lock: &priv->lock); |
488 | |
489 | ret = ipoib_cm_send_rep(dev, cm_id, qp: p->qp, req: &event->param.req_rcvd, psn); |
490 | if (ret) { |
491 | ipoib_warn(priv, "failed to send REP: %d\n" , ret); |
492 | if (ib_modify_qp(qp: p->qp, qp_attr: &ipoib_cm_err_attr, qp_attr_mask: IB_QP_STATE)) |
493 | ipoib_warn(priv, "unable to move qp to error state\n" ); |
494 | } |
495 | return 0; |
496 | |
497 | err_modify: |
498 | ib_destroy_qp(qp: p->qp); |
499 | err_qp: |
500 | kfree(objp: p); |
501 | return ret; |
502 | } |
503 | |
504 | static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, |
505 | const struct ib_cm_event *event) |
506 | { |
507 | struct ipoib_cm_rx *p; |
508 | struct ipoib_dev_priv *priv; |
509 | |
510 | switch (event->event) { |
511 | case IB_CM_REQ_RECEIVED: |
512 | return ipoib_cm_req_handler(cm_id, event); |
513 | case IB_CM_DREQ_RECEIVED: |
514 | ib_send_cm_drep(cm_id, NULL, private_data_len: 0); |
515 | fallthrough; |
516 | case IB_CM_REJ_RECEIVED: |
517 | p = cm_id->context; |
518 | priv = ipoib_priv(dev: p->dev); |
519 | if (ib_modify_qp(qp: p->qp, qp_attr: &ipoib_cm_err_attr, qp_attr_mask: IB_QP_STATE)) |
520 | ipoib_warn(priv, "unable to move qp to error state\n" ); |
521 | fallthrough; |
522 | default: |
523 | return 0; |
524 | } |
525 | } |
526 | /* Adjust length of skb with fragments to match received data */ |
527 | static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, |
528 | unsigned int length, struct sk_buff *toskb) |
529 | { |
530 | int i, num_frags; |
531 | unsigned int size; |
532 | |
533 | /* put header into skb */ |
534 | size = min(length, hdr_space); |
535 | skb->tail += size; |
536 | skb->len += size; |
537 | length -= size; |
538 | |
539 | num_frags = skb_shinfo(skb)->nr_frags; |
540 | for (i = 0; i < num_frags; i++) { |
541 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
542 | |
543 | if (length == 0) { |
544 | /* don't need this page */ |
545 | skb_fill_page_desc(skb: toskb, i, page: skb_frag_page(frag), |
546 | off: 0, PAGE_SIZE); |
547 | --skb_shinfo(skb)->nr_frags; |
548 | } else { |
549 | size = min_t(unsigned int, length, PAGE_SIZE); |
550 | |
551 | skb_frag_size_set(frag, size); |
552 | skb->data_len += size; |
553 | skb->truesize += size; |
554 | skb->len += size; |
555 | length -= size; |
556 | } |
557 | } |
558 | } |
559 | |
560 | void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) |
561 | { |
562 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
563 | struct ipoib_cm_rx_buf *rx_ring; |
564 | unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); |
565 | struct sk_buff *skb, *newskb; |
566 | struct ipoib_cm_rx *p; |
567 | unsigned long flags; |
568 | u64 mapping[IPOIB_CM_RX_SG]; |
569 | int frags; |
570 | int has_srq; |
571 | struct sk_buff *small_skb; |
572 | |
573 | ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n" , |
574 | wr_id, wc->status); |
575 | |
576 | if (unlikely(wr_id >= ipoib_recvq_size)) { |
577 | if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { |
578 | spin_lock_irqsave(&priv->lock, flags); |
579 | list_splice_init(list: &priv->cm.rx_drain_list, head: &priv->cm.rx_reap_list); |
580 | ipoib_cm_start_rx_drain(priv); |
581 | queue_work(wq: priv->wq, work: &priv->cm.rx_reap_task); |
582 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
583 | } else |
584 | ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n" , |
585 | wr_id, ipoib_recvq_size); |
586 | return; |
587 | } |
588 | |
589 | p = wc->qp->qp_context; |
590 | |
591 | has_srq = ipoib_cm_has_srq(dev); |
592 | rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring; |
593 | |
594 | skb = rx_ring[wr_id].skb; |
595 | |
596 | if (unlikely(wc->status != IB_WC_SUCCESS)) { |
597 | ipoib_dbg(priv, |
598 | "cm recv error (status=%d, wrid=%d vend_err %#x)\n" , |
599 | wc->status, wr_id, wc->vendor_err); |
600 | ++dev->stats.rx_dropped; |
601 | if (has_srq) |
602 | goto repost; |
603 | else { |
604 | if (!--p->recv_count) { |
605 | spin_lock_irqsave(&priv->lock, flags); |
606 | list_move(list: &p->list, head: &priv->cm.rx_reap_list); |
607 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
608 | queue_work(wq: priv->wq, work: &priv->cm.rx_reap_task); |
609 | } |
610 | return; |
611 | } |
612 | } |
613 | |
614 | if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { |
615 | if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { |
616 | spin_lock_irqsave(&priv->lock, flags); |
617 | p->jiffies = jiffies; |
618 | /* Move this entry to list head, but do not re-add it |
619 | * if it has been moved out of list. */ |
620 | if (p->state == IPOIB_CM_RX_LIVE) |
621 | list_move(list: &p->list, head: &priv->cm.passive_ids); |
622 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
623 | } |
624 | } |
625 | |
626 | if (wc->byte_len < IPOIB_CM_COPYBREAK) { |
627 | int dlen = wc->byte_len; |
628 | |
629 | small_skb = dev_alloc_skb(length: dlen + IPOIB_CM_RX_RESERVE); |
630 | if (small_skb) { |
631 | skb_reserve(skb: small_skb, IPOIB_CM_RX_RESERVE); |
632 | ib_dma_sync_single_for_cpu(dev: priv->ca, addr: rx_ring[wr_id].mapping[0], |
633 | size: dlen, dir: DMA_FROM_DEVICE); |
634 | skb_copy_from_linear_data(skb, to: small_skb->data, len: dlen); |
635 | ib_dma_sync_single_for_device(dev: priv->ca, addr: rx_ring[wr_id].mapping[0], |
636 | size: dlen, dir: DMA_FROM_DEVICE); |
637 | skb_put(skb: small_skb, len: dlen); |
638 | skb = small_skb; |
639 | goto copied; |
640 | } |
641 | } |
642 | |
643 | frags = PAGE_ALIGN(wc->byte_len - |
644 | min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) / |
645 | PAGE_SIZE; |
646 | |
647 | newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, id: wr_id, frags, |
648 | mapping, GFP_ATOMIC); |
649 | if (unlikely(!newskb)) { |
650 | /* |
651 | * If we can't allocate a new RX buffer, dump |
652 | * this packet and reuse the old buffer. |
653 | */ |
654 | ipoib_dbg(priv, "failed to allocate receive buffer %d\n" , wr_id); |
655 | ++dev->stats.rx_dropped; |
656 | goto repost; |
657 | } |
658 | |
659 | ipoib_cm_dma_unmap_rx(priv, frags, mapping: rx_ring[wr_id].mapping); |
660 | memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping)); |
661 | |
662 | ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n" , |
663 | wc->byte_len, wc->slid); |
664 | |
665 | skb_put_frags(skb, hdr_space: IPOIB_CM_HEAD_SIZE, length: wc->byte_len, toskb: newskb); |
666 | |
667 | copied: |
668 | skb->protocol = ((struct ipoib_header *) skb->data)->proto; |
669 | skb_add_pseudo_hdr(skb); |
670 | |
671 | ++dev->stats.rx_packets; |
672 | dev->stats.rx_bytes += skb->len; |
673 | |
674 | skb->dev = dev; |
675 | /* XXX get correct PACKET_ type here */ |
676 | skb->pkt_type = PACKET_HOST; |
677 | netif_receive_skb(skb); |
678 | |
679 | repost: |
680 | if (has_srq) { |
681 | if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id))) |
682 | ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " |
683 | "for buf %d\n" , wr_id); |
684 | } else { |
685 | if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, |
686 | &priv->cm.rx_wr, |
687 | priv->cm.rx_sge, |
688 | wr_id))) { |
689 | --p->recv_count; |
690 | ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " |
691 | "for buf %d\n" , wr_id); |
692 | } |
693 | } |
694 | } |
695 | |
696 | static inline int post_send(struct ipoib_dev_priv *priv, |
697 | struct ipoib_cm_tx *tx, |
698 | unsigned int wr_id, |
699 | struct ipoib_tx_buf *tx_req) |
700 | { |
701 | ipoib_build_sge(priv, tx_req); |
702 | |
703 | priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; |
704 | |
705 | return ib_post_send(qp: tx->qp, send_wr: &priv->tx_wr.wr, NULL); |
706 | } |
707 | |
708 | void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) |
709 | { |
710 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
711 | struct ipoib_tx_buf *tx_req; |
712 | int rc; |
713 | unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb); |
714 | |
715 | if (unlikely(skb->len > tx->mtu)) { |
716 | ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n" , |
717 | skb->len, tx->mtu); |
718 | ++dev->stats.tx_dropped; |
719 | ++dev->stats.tx_errors; |
720 | ipoib_cm_skb_too_long(dev, skb, mtu: tx->mtu - IPOIB_ENCAP_LEN); |
721 | return; |
722 | } |
723 | if (skb_shinfo(skb)->nr_frags > usable_sge) { |
724 | if (skb_linearize(skb) < 0) { |
725 | ipoib_warn(priv, "skb could not be linearized\n" ); |
726 | ++dev->stats.tx_dropped; |
727 | ++dev->stats.tx_errors; |
728 | dev_kfree_skb_any(skb); |
729 | return; |
730 | } |
731 | /* Does skb_linearize return ok without reducing nr_frags? */ |
732 | if (skb_shinfo(skb)->nr_frags > usable_sge) { |
733 | ipoib_warn(priv, "too many frags after skb linearize\n" ); |
734 | ++dev->stats.tx_dropped; |
735 | ++dev->stats.tx_errors; |
736 | dev_kfree_skb_any(skb); |
737 | return; |
738 | } |
739 | } |
740 | ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n" , |
741 | tx->tx_head, skb->len, tx->qp->qp_num); |
742 | |
743 | /* |
744 | * We put the skb into the tx_ring _before_ we call post_send() |
745 | * because it's entirely possible that the completion handler will |
746 | * run before we execute anything after the post_send(). That |
747 | * means we have to make sure everything is properly recorded and |
748 | * our state is consistent before we call post_send(). |
749 | */ |
750 | tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; |
751 | tx_req->skb = skb; |
752 | |
753 | if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { |
754 | ++dev->stats.tx_errors; |
755 | dev_kfree_skb_any(skb); |
756 | return; |
757 | } |
758 | |
759 | if ((priv->global_tx_head - priv->global_tx_tail) == |
760 | ipoib_sendq_size - 1) { |
761 | ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n" , |
762 | tx->qp->qp_num); |
763 | netif_stop_queue(dev); |
764 | } |
765 | |
766 | skb_orphan(skb); |
767 | skb_dst_drop(skb); |
768 | |
769 | if (netif_queue_stopped(dev)) { |
770 | rc = ib_req_notify_cq(cq: priv->send_cq, flags: IB_CQ_NEXT_COMP | |
771 | IB_CQ_REPORT_MISSED_EVENTS); |
772 | if (unlikely(rc < 0)) |
773 | ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n" ); |
774 | else if (rc) |
775 | napi_schedule(n: &priv->send_napi); |
776 | } |
777 | |
778 | rc = post_send(priv, tx, wr_id: tx->tx_head & (ipoib_sendq_size - 1), tx_req); |
779 | if (unlikely(rc)) { |
780 | ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n" , rc); |
781 | ++dev->stats.tx_errors; |
782 | ipoib_dma_unmap_tx(priv, tx_req); |
783 | dev_kfree_skb_any(skb); |
784 | |
785 | if (netif_queue_stopped(dev)) |
786 | netif_wake_queue(dev); |
787 | } else { |
788 | netif_trans_update(dev); |
789 | ++tx->tx_head; |
790 | ++priv->global_tx_head; |
791 | } |
792 | } |
793 | |
794 | void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) |
795 | { |
796 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
797 | struct ipoib_cm_tx *tx = wc->qp->qp_context; |
798 | unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; |
799 | struct ipoib_tx_buf *tx_req; |
800 | unsigned long flags; |
801 | |
802 | ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n" , |
803 | wr_id, wc->status); |
804 | |
805 | if (unlikely(wr_id >= ipoib_sendq_size)) { |
806 | ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n" , |
807 | wr_id, ipoib_sendq_size); |
808 | return; |
809 | } |
810 | |
811 | tx_req = &tx->tx_ring[wr_id]; |
812 | |
813 | ipoib_dma_unmap_tx(priv, tx_req); |
814 | |
815 | /* FIXME: is this right? Shouldn't we only increment on success? */ |
816 | ++dev->stats.tx_packets; |
817 | dev->stats.tx_bytes += tx_req->skb->len; |
818 | |
819 | dev_kfree_skb_any(skb: tx_req->skb); |
820 | |
821 | netif_tx_lock(dev); |
822 | |
823 | ++tx->tx_tail; |
824 | ++priv->global_tx_tail; |
825 | |
826 | if (unlikely(netif_queue_stopped(dev) && |
827 | ((priv->global_tx_head - priv->global_tx_tail) <= |
828 | ipoib_sendq_size >> 1) && |
829 | test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) |
830 | netif_wake_queue(dev); |
831 | |
832 | if (wc->status != IB_WC_SUCCESS && |
833 | wc->status != IB_WC_WR_FLUSH_ERR) { |
834 | struct ipoib_neigh *neigh; |
835 | |
836 | /* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle, |
837 | * so don't make waves. |
838 | */ |
839 | if (wc->status == IB_WC_RNR_RETRY_EXC_ERR || |
840 | wc->status == IB_WC_RETRY_EXC_ERR) |
841 | ipoib_dbg(priv, |
842 | "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n" , |
843 | __func__, wc->status, wr_id, wc->vendor_err); |
844 | else |
845 | ipoib_warn(priv, |
846 | "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n" , |
847 | __func__, wc->status, wr_id, wc->vendor_err); |
848 | |
849 | spin_lock_irqsave(&priv->lock, flags); |
850 | neigh = tx->neigh; |
851 | |
852 | if (neigh) { |
853 | neigh->cm = NULL; |
854 | ipoib_neigh_free(neigh); |
855 | |
856 | tx->neigh = NULL; |
857 | } |
858 | |
859 | if (test_and_clear_bit(nr: IPOIB_FLAG_INITIALIZED, addr: &tx->flags)) { |
860 | list_move(list: &tx->list, head: &priv->cm.reap_list); |
861 | queue_work(wq: priv->wq, work: &priv->cm.reap_task); |
862 | } |
863 | |
864 | clear_bit(nr: IPOIB_FLAG_OPER_UP, addr: &tx->flags); |
865 | |
866 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
867 | } |
868 | |
869 | netif_tx_unlock(dev); |
870 | } |
871 | |
872 | int ipoib_cm_dev_open(struct net_device *dev) |
873 | { |
874 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
875 | int ret; |
876 | |
877 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) |
878 | return 0; |
879 | |
880 | priv->cm.id = ib_create_cm_id(device: priv->ca, cm_handler: ipoib_cm_rx_handler, context: dev); |
881 | if (IS_ERR(ptr: priv->cm.id)) { |
882 | pr_warn("%s: failed to create CM ID\n" , priv->ca->name); |
883 | ret = PTR_ERR(ptr: priv->cm.id); |
884 | goto err_cm; |
885 | } |
886 | |
887 | ret = ib_cm_listen(cm_id: priv->cm.id, |
888 | cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num)); |
889 | if (ret) { |
890 | pr_warn("%s: failed to listen on ID 0x%llx\n" , priv->ca->name, |
891 | IPOIB_CM_IETF_ID | priv->qp->qp_num); |
892 | goto err_listen; |
893 | } |
894 | |
895 | return 0; |
896 | |
897 | err_listen: |
898 | ib_destroy_cm_id(cm_id: priv->cm.id); |
899 | err_cm: |
900 | priv->cm.id = NULL; |
901 | return ret; |
902 | } |
903 | |
904 | static void ipoib_cm_free_rx_reap_list(struct net_device *dev) |
905 | { |
906 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
907 | struct ipoib_cm_rx *rx, *n; |
908 | LIST_HEAD(list); |
909 | |
910 | spin_lock_irq(lock: &priv->lock); |
911 | list_splice_init(list: &priv->cm.rx_reap_list, head: &list); |
912 | spin_unlock_irq(lock: &priv->lock); |
913 | |
914 | list_for_each_entry_safe(rx, n, &list, list) { |
915 | ib_destroy_cm_id(cm_id: rx->id); |
916 | ib_destroy_qp(qp: rx->qp); |
917 | if (!ipoib_cm_has_srq(dev)) { |
918 | ipoib_cm_free_rx_ring(dev: priv->dev, rx_ring: rx->rx_ring); |
919 | spin_lock_irq(lock: &priv->lock); |
920 | --priv->cm.nonsrq_conn_qp; |
921 | spin_unlock_irq(lock: &priv->lock); |
922 | } |
923 | kfree(objp: rx); |
924 | } |
925 | } |
926 | |
927 | void ipoib_cm_dev_stop(struct net_device *dev) |
928 | { |
929 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
930 | struct ipoib_cm_rx *p; |
931 | unsigned long begin; |
932 | int ret; |
933 | |
934 | if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) |
935 | return; |
936 | |
937 | ib_destroy_cm_id(cm_id: priv->cm.id); |
938 | priv->cm.id = NULL; |
939 | |
940 | spin_lock_irq(lock: &priv->lock); |
941 | while (!list_empty(head: &priv->cm.passive_ids)) { |
942 | p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); |
943 | list_move(list: &p->list, head: &priv->cm.rx_error_list); |
944 | p->state = IPOIB_CM_RX_ERROR; |
945 | spin_unlock_irq(lock: &priv->lock); |
946 | ret = ib_modify_qp(qp: p->qp, qp_attr: &ipoib_cm_err_attr, qp_attr_mask: IB_QP_STATE); |
947 | if (ret) |
948 | ipoib_warn(priv, "unable to move qp to error state: %d\n" , ret); |
949 | spin_lock_irq(lock: &priv->lock); |
950 | } |
951 | |
952 | /* Wait for all RX to be drained */ |
953 | begin = jiffies; |
954 | |
955 | while (!list_empty(head: &priv->cm.rx_error_list) || |
956 | !list_empty(head: &priv->cm.rx_flush_list) || |
957 | !list_empty(head: &priv->cm.rx_drain_list)) { |
958 | if (time_after(jiffies, begin + 5 * HZ)) { |
959 | ipoib_warn(priv, "RX drain timing out\n" ); |
960 | |
961 | /* |
962 | * assume the HW is wedged and just free up everything. |
963 | */ |
964 | list_splice_init(list: &priv->cm.rx_flush_list, |
965 | head: &priv->cm.rx_reap_list); |
966 | list_splice_init(list: &priv->cm.rx_error_list, |
967 | head: &priv->cm.rx_reap_list); |
968 | list_splice_init(list: &priv->cm.rx_drain_list, |
969 | head: &priv->cm.rx_reap_list); |
970 | break; |
971 | } |
972 | spin_unlock_irq(lock: &priv->lock); |
973 | usleep_range(min: 1000, max: 2000); |
974 | ipoib_drain_cq(dev); |
975 | spin_lock_irq(lock: &priv->lock); |
976 | } |
977 | |
978 | spin_unlock_irq(lock: &priv->lock); |
979 | |
980 | ipoib_cm_free_rx_reap_list(dev); |
981 | |
982 | cancel_delayed_work(dwork: &priv->cm.stale_task); |
983 | } |
984 | |
985 | static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, |
986 | const struct ib_cm_event *event) |
987 | { |
988 | struct ipoib_cm_tx *p = cm_id->context; |
989 | struct ipoib_dev_priv *priv = ipoib_priv(dev: p->dev); |
990 | struct ipoib_cm_data *data = event->private_data; |
991 | struct sk_buff_head skqueue; |
992 | struct ib_qp_attr qp_attr; |
993 | int qp_attr_mask, ret; |
994 | struct sk_buff *skb; |
995 | |
996 | p->mtu = be32_to_cpu(data->mtu); |
997 | |
998 | if (p->mtu <= IPOIB_ENCAP_LEN) { |
999 | ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n" , |
1000 | p->mtu, IPOIB_ENCAP_LEN); |
1001 | return -EINVAL; |
1002 | } |
1003 | |
1004 | qp_attr.qp_state = IB_QPS_RTR; |
1005 | ret = ib_cm_init_qp_attr(cm_id, qp_attr: &qp_attr, qp_attr_mask: &qp_attr_mask); |
1006 | if (ret) { |
1007 | ipoib_warn(priv, "failed to init QP attr for RTR: %d\n" , ret); |
1008 | return ret; |
1009 | } |
1010 | |
1011 | qp_attr.rq_psn = 0 /* FIXME */; |
1012 | ret = ib_modify_qp(qp: p->qp, qp_attr: &qp_attr, qp_attr_mask); |
1013 | if (ret) { |
1014 | ipoib_warn(priv, "failed to modify QP to RTR: %d\n" , ret); |
1015 | return ret; |
1016 | } |
1017 | |
1018 | qp_attr.qp_state = IB_QPS_RTS; |
1019 | ret = ib_cm_init_qp_attr(cm_id, qp_attr: &qp_attr, qp_attr_mask: &qp_attr_mask); |
1020 | if (ret) { |
1021 | ipoib_warn(priv, "failed to init QP attr for RTS: %d\n" , ret); |
1022 | return ret; |
1023 | } |
1024 | ret = ib_modify_qp(qp: p->qp, qp_attr: &qp_attr, qp_attr_mask); |
1025 | if (ret) { |
1026 | ipoib_warn(priv, "failed to modify QP to RTS: %d\n" , ret); |
1027 | return ret; |
1028 | } |
1029 | |
1030 | skb_queue_head_init(list: &skqueue); |
1031 | |
1032 | netif_tx_lock_bh(dev: p->dev); |
1033 | spin_lock_irq(lock: &priv->lock); |
1034 | set_bit(nr: IPOIB_FLAG_OPER_UP, addr: &p->flags); |
1035 | if (p->neigh) |
1036 | while ((skb = __skb_dequeue(list: &p->neigh->queue))) |
1037 | __skb_queue_tail(list: &skqueue, newsk: skb); |
1038 | spin_unlock_irq(lock: &priv->lock); |
1039 | netif_tx_unlock_bh(dev: p->dev); |
1040 | |
1041 | while ((skb = __skb_dequeue(list: &skqueue))) { |
1042 | skb->dev = p->dev; |
1043 | ret = dev_queue_xmit(skb); |
1044 | if (ret) |
1045 | ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n" , |
1046 | __func__, ret); |
1047 | } |
1048 | |
1049 | ret = ib_send_cm_rtu(cm_id, NULL, private_data_len: 0); |
1050 | if (ret) { |
1051 | ipoib_warn(priv, "failed to send RTU: %d\n" , ret); |
1052 | return ret; |
1053 | } |
1054 | return 0; |
1055 | } |
1056 | |
1057 | static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx) |
1058 | { |
1059 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1060 | struct ib_qp_init_attr attr = { |
1061 | .send_cq = priv->send_cq, |
1062 | .recv_cq = priv->recv_cq, |
1063 | .srq = priv->cm.srq, |
1064 | .cap.max_send_wr = ipoib_sendq_size, |
1065 | .cap.max_send_sge = 1, |
1066 | .sq_sig_type = IB_SIGNAL_ALL_WR, |
1067 | .qp_type = IB_QPT_RC, |
1068 | .qp_context = tx, |
1069 | .create_flags = 0 |
1070 | }; |
1071 | struct ib_qp *tx_qp; |
1072 | |
1073 | if (dev->features & NETIF_F_SG) |
1074 | attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, |
1075 | MAX_SKB_FRAGS + 1); |
1076 | |
1077 | tx_qp = ib_create_qp(pd: priv->pd, init_attr: &attr); |
1078 | tx->max_send_sge = attr.cap.max_send_sge; |
1079 | return tx_qp; |
1080 | } |
1081 | |
1082 | static int ipoib_cm_send_req(struct net_device *dev, |
1083 | struct ib_cm_id *id, struct ib_qp *qp, |
1084 | u32 qpn, |
1085 | struct sa_path_rec *pathrec) |
1086 | { |
1087 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1088 | struct ipoib_cm_data data = {}; |
1089 | struct ib_cm_req_param req = {}; |
1090 | |
1091 | data.qpn = cpu_to_be32(priv->qp->qp_num); |
1092 | data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); |
1093 | |
1094 | req.primary_path = pathrec; |
1095 | req.alternate_path = NULL; |
1096 | req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); |
1097 | req.qp_num = qp->qp_num; |
1098 | req.qp_type = qp->qp_type; |
1099 | req.private_data = &data; |
1100 | req.private_data_len = sizeof(data); |
1101 | req.flow_control = 0; |
1102 | |
1103 | req.starting_psn = 0; /* FIXME */ |
1104 | |
1105 | /* |
1106 | * Pick some arbitrary defaults here; we could make these |
1107 | * module parameters if anyone cared about setting them. |
1108 | */ |
1109 | req.responder_resources = 4; |
1110 | req.remote_cm_response_timeout = 20; |
1111 | req.local_cm_response_timeout = 20; |
1112 | req.retry_count = 0; /* RFC draft warns against retries */ |
1113 | req.rnr_retry_count = 0; /* RFC draft warns against retries */ |
1114 | req.max_cm_retries = 15; |
1115 | req.srq = ipoib_cm_has_srq(dev); |
1116 | return ib_send_cm_req(cm_id: id, param: &req); |
1117 | } |
1118 | |
1119 | static int ipoib_cm_modify_tx_init(struct net_device *dev, |
1120 | struct ib_cm_id *cm_id, struct ib_qp *qp) |
1121 | { |
1122 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1123 | struct ib_qp_attr qp_attr; |
1124 | int qp_attr_mask, ret; |
1125 | |
1126 | qp_attr.pkey_index = priv->pkey_index; |
1127 | qp_attr.qp_state = IB_QPS_INIT; |
1128 | qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; |
1129 | qp_attr.port_num = priv->port; |
1130 | qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; |
1131 | |
1132 | ret = ib_modify_qp(qp, qp_attr: &qp_attr, qp_attr_mask); |
1133 | if (ret) { |
1134 | ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n" , ret); |
1135 | return ret; |
1136 | } |
1137 | return 0; |
1138 | } |
1139 | |
1140 | static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, |
1141 | struct sa_path_rec *pathrec) |
1142 | { |
1143 | struct ipoib_dev_priv *priv = ipoib_priv(dev: p->dev); |
1144 | unsigned int noio_flag; |
1145 | int ret; |
1146 | |
1147 | noio_flag = memalloc_noio_save(); |
1148 | p->tx_ring = vzalloc(array_size(ipoib_sendq_size, sizeof(*p->tx_ring))); |
1149 | if (!p->tx_ring) { |
1150 | memalloc_noio_restore(flags: noio_flag); |
1151 | ret = -ENOMEM; |
1152 | goto err_tx; |
1153 | } |
1154 | |
1155 | p->qp = ipoib_cm_create_tx_qp(dev: p->dev, tx: p); |
1156 | memalloc_noio_restore(flags: noio_flag); |
1157 | if (IS_ERR(ptr: p->qp)) { |
1158 | ret = PTR_ERR(ptr: p->qp); |
1159 | ipoib_warn(priv, "failed to create tx qp: %d\n" , ret); |
1160 | goto err_qp; |
1161 | } |
1162 | |
1163 | p->id = ib_create_cm_id(device: priv->ca, cm_handler: ipoib_cm_tx_handler, context: p); |
1164 | if (IS_ERR(ptr: p->id)) { |
1165 | ret = PTR_ERR(ptr: p->id); |
1166 | ipoib_warn(priv, "failed to create tx cm id: %d\n" , ret); |
1167 | goto err_id; |
1168 | } |
1169 | |
1170 | ret = ipoib_cm_modify_tx_init(dev: p->dev, cm_id: p->id, qp: p->qp); |
1171 | if (ret) { |
1172 | ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n" , ret); |
1173 | goto err_modify_send; |
1174 | } |
1175 | |
1176 | ret = ipoib_cm_send_req(dev: p->dev, id: p->id, qp: p->qp, qpn, pathrec); |
1177 | if (ret) { |
1178 | ipoib_warn(priv, "failed to send cm req: %d\n" , ret); |
1179 | goto err_modify_send; |
1180 | } |
1181 | |
1182 | ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n" , |
1183 | p->qp->qp_num, pathrec->dgid.raw, qpn); |
1184 | |
1185 | return 0; |
1186 | |
1187 | err_modify_send: |
1188 | ib_destroy_cm_id(cm_id: p->id); |
1189 | err_id: |
1190 | p->id = NULL; |
1191 | ib_destroy_qp(qp: p->qp); |
1192 | err_qp: |
1193 | p->qp = NULL; |
1194 | vfree(addr: p->tx_ring); |
1195 | err_tx: |
1196 | return ret; |
1197 | } |
1198 | |
1199 | static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) |
1200 | { |
1201 | struct ipoib_dev_priv *priv = ipoib_priv(dev: p->dev); |
1202 | struct ipoib_tx_buf *tx_req; |
1203 | unsigned long begin; |
1204 | |
1205 | ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n" , |
1206 | p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); |
1207 | |
1208 | if (p->id) |
1209 | ib_destroy_cm_id(cm_id: p->id); |
1210 | |
1211 | if (p->tx_ring) { |
1212 | /* Wait for all sends to complete */ |
1213 | begin = jiffies; |
1214 | while ((int) p->tx_tail - (int) p->tx_head < 0) { |
1215 | if (time_after(jiffies, begin + 5 * HZ)) { |
1216 | ipoib_warn(priv, "timing out; %d sends not completed\n" , |
1217 | p->tx_head - p->tx_tail); |
1218 | goto timeout; |
1219 | } |
1220 | |
1221 | usleep_range(min: 1000, max: 2000); |
1222 | } |
1223 | } |
1224 | |
1225 | timeout: |
1226 | |
1227 | while ((int) p->tx_tail - (int) p->tx_head < 0) { |
1228 | tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; |
1229 | ipoib_dma_unmap_tx(priv, tx_req); |
1230 | dev_kfree_skb_any(skb: tx_req->skb); |
1231 | netif_tx_lock_bh(dev: p->dev); |
1232 | ++p->tx_tail; |
1233 | ++priv->global_tx_tail; |
1234 | if (unlikely((priv->global_tx_head - priv->global_tx_tail) <= |
1235 | ipoib_sendq_size >> 1) && |
1236 | netif_queue_stopped(dev: p->dev) && |
1237 | test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) |
1238 | netif_wake_queue(dev: p->dev); |
1239 | netif_tx_unlock_bh(dev: p->dev); |
1240 | } |
1241 | |
1242 | if (p->qp) |
1243 | ib_destroy_qp(qp: p->qp); |
1244 | |
1245 | vfree(addr: p->tx_ring); |
1246 | kfree(objp: p); |
1247 | } |
1248 | |
1249 | static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, |
1250 | const struct ib_cm_event *event) |
1251 | { |
1252 | struct ipoib_cm_tx *tx = cm_id->context; |
1253 | struct ipoib_dev_priv *priv = ipoib_priv(dev: tx->dev); |
1254 | struct net_device *dev = priv->dev; |
1255 | struct ipoib_neigh *neigh; |
1256 | unsigned long flags; |
1257 | int ret; |
1258 | |
1259 | switch (event->event) { |
1260 | case IB_CM_DREQ_RECEIVED: |
1261 | ipoib_dbg(priv, "DREQ received.\n" ); |
1262 | ib_send_cm_drep(cm_id, NULL, private_data_len: 0); |
1263 | break; |
1264 | case IB_CM_REP_RECEIVED: |
1265 | ipoib_dbg(priv, "REP received.\n" ); |
1266 | ret = ipoib_cm_rep_handler(cm_id, event); |
1267 | if (ret) |
1268 | ib_send_cm_rej(cm_id, reason: IB_CM_REJ_CONSUMER_DEFINED, |
1269 | NULL, ari_length: 0, NULL, private_data_len: 0); |
1270 | break; |
1271 | case IB_CM_REQ_ERROR: |
1272 | case IB_CM_REJ_RECEIVED: |
1273 | case IB_CM_TIMEWAIT_EXIT: |
1274 | ipoib_dbg(priv, "CM error %d.\n" , event->event); |
1275 | netif_tx_lock_bh(dev); |
1276 | spin_lock_irqsave(&priv->lock, flags); |
1277 | neigh = tx->neigh; |
1278 | |
1279 | if (neigh) { |
1280 | neigh->cm = NULL; |
1281 | ipoib_neigh_free(neigh); |
1282 | |
1283 | tx->neigh = NULL; |
1284 | } |
1285 | |
1286 | if (test_and_clear_bit(nr: IPOIB_FLAG_INITIALIZED, addr: &tx->flags)) { |
1287 | list_move(list: &tx->list, head: &priv->cm.reap_list); |
1288 | queue_work(wq: priv->wq, work: &priv->cm.reap_task); |
1289 | } |
1290 | |
1291 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1292 | netif_tx_unlock_bh(dev); |
1293 | break; |
1294 | default: |
1295 | break; |
1296 | } |
1297 | |
1298 | return 0; |
1299 | } |
1300 | |
1301 | struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, |
1302 | struct ipoib_neigh *neigh) |
1303 | { |
1304 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1305 | struct ipoib_cm_tx *tx; |
1306 | |
1307 | tx = kzalloc(size: sizeof(*tx), GFP_ATOMIC); |
1308 | if (!tx) |
1309 | return NULL; |
1310 | |
1311 | neigh->cm = tx; |
1312 | tx->neigh = neigh; |
1313 | tx->dev = dev; |
1314 | list_add(new: &tx->list, head: &priv->cm.start_list); |
1315 | set_bit(nr: IPOIB_FLAG_INITIALIZED, addr: &tx->flags); |
1316 | queue_work(wq: priv->wq, work: &priv->cm.start_task); |
1317 | return tx; |
1318 | } |
1319 | |
1320 | void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) |
1321 | { |
1322 | struct ipoib_dev_priv *priv = ipoib_priv(dev: tx->dev); |
1323 | unsigned long flags; |
1324 | if (test_and_clear_bit(nr: IPOIB_FLAG_INITIALIZED, addr: &tx->flags)) { |
1325 | spin_lock_irqsave(&priv->lock, flags); |
1326 | list_move(list: &tx->list, head: &priv->cm.reap_list); |
1327 | queue_work(wq: priv->wq, work: &priv->cm.reap_task); |
1328 | ipoib_dbg(priv, "Reap connection for gid %pI6\n" , |
1329 | tx->neigh->daddr + 4); |
1330 | tx->neigh = NULL; |
1331 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1332 | } |
1333 | } |
1334 | |
1335 | #define QPN_AND_OPTIONS_OFFSET 4 |
1336 | |
1337 | static void ipoib_cm_tx_start(struct work_struct *work) |
1338 | { |
1339 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, |
1340 | cm.start_task); |
1341 | struct net_device *dev = priv->dev; |
1342 | struct ipoib_neigh *neigh; |
1343 | struct ipoib_cm_tx *p; |
1344 | unsigned long flags; |
1345 | struct ipoib_path *path; |
1346 | int ret; |
1347 | |
1348 | struct sa_path_rec pathrec; |
1349 | u32 qpn; |
1350 | |
1351 | netif_tx_lock_bh(dev); |
1352 | spin_lock_irqsave(&priv->lock, flags); |
1353 | |
1354 | while (!list_empty(head: &priv->cm.start_list)) { |
1355 | p = list_entry(priv->cm.start_list.next, typeof(*p), list); |
1356 | list_del_init(entry: &p->list); |
1357 | neigh = p->neigh; |
1358 | |
1359 | qpn = IPOIB_QPN(neigh->daddr); |
1360 | /* |
1361 | * As long as the search is with these 2 locks, |
1362 | * path existence indicates its validity. |
1363 | */ |
1364 | path = __path_find(dev, gid: neigh->daddr + QPN_AND_OPTIONS_OFFSET); |
1365 | if (!path) { |
1366 | pr_info("%s ignore not valid path %pI6\n" , |
1367 | __func__, |
1368 | neigh->daddr + QPN_AND_OPTIONS_OFFSET); |
1369 | goto free_neigh; |
1370 | } |
1371 | memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); |
1372 | |
1373 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1374 | netif_tx_unlock_bh(dev); |
1375 | |
1376 | ret = ipoib_cm_tx_init(p, qpn, pathrec: &pathrec); |
1377 | |
1378 | netif_tx_lock_bh(dev); |
1379 | spin_lock_irqsave(&priv->lock, flags); |
1380 | |
1381 | if (ret) { |
1382 | free_neigh: |
1383 | neigh = p->neigh; |
1384 | if (neigh) { |
1385 | neigh->cm = NULL; |
1386 | ipoib_neigh_free(neigh); |
1387 | } |
1388 | list_del(entry: &p->list); |
1389 | kfree(objp: p); |
1390 | } |
1391 | } |
1392 | |
1393 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1394 | netif_tx_unlock_bh(dev); |
1395 | } |
1396 | |
1397 | static void ipoib_cm_tx_reap(struct work_struct *work) |
1398 | { |
1399 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, |
1400 | cm.reap_task); |
1401 | struct net_device *dev = priv->dev; |
1402 | struct ipoib_cm_tx *p; |
1403 | unsigned long flags; |
1404 | |
1405 | netif_tx_lock_bh(dev); |
1406 | spin_lock_irqsave(&priv->lock, flags); |
1407 | |
1408 | while (!list_empty(head: &priv->cm.reap_list)) { |
1409 | p = list_entry(priv->cm.reap_list.next, typeof(*p), list); |
1410 | list_del_init(entry: &p->list); |
1411 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1412 | netif_tx_unlock_bh(dev); |
1413 | ipoib_cm_tx_destroy(p); |
1414 | netif_tx_lock_bh(dev); |
1415 | spin_lock_irqsave(&priv->lock, flags); |
1416 | } |
1417 | |
1418 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1419 | netif_tx_unlock_bh(dev); |
1420 | } |
1421 | |
1422 | static void ipoib_cm_skb_reap(struct work_struct *work) |
1423 | { |
1424 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, |
1425 | cm.skb_task); |
1426 | struct net_device *dev = priv->dev; |
1427 | struct sk_buff *skb; |
1428 | unsigned long flags; |
1429 | unsigned int mtu = priv->mcast_mtu; |
1430 | |
1431 | netif_tx_lock_bh(dev); |
1432 | spin_lock_irqsave(&priv->lock, flags); |
1433 | |
1434 | while ((skb = skb_dequeue(list: &priv->cm.skb_queue))) { |
1435 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1436 | netif_tx_unlock_bh(dev); |
1437 | |
1438 | if (skb->protocol == htons(ETH_P_IP)) { |
1439 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
1440 | icmp_send(skb_in: skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
1441 | } |
1442 | #if IS_ENABLED(CONFIG_IPV6) |
1443 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
1444 | memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); |
1445 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, code: 0, info: mtu); |
1446 | } |
1447 | #endif |
1448 | dev_kfree_skb_any(skb); |
1449 | |
1450 | netif_tx_lock_bh(dev); |
1451 | spin_lock_irqsave(&priv->lock, flags); |
1452 | } |
1453 | |
1454 | spin_unlock_irqrestore(lock: &priv->lock, flags); |
1455 | netif_tx_unlock_bh(dev); |
1456 | } |
1457 | |
1458 | void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, |
1459 | unsigned int mtu) |
1460 | { |
1461 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1462 | int e = skb_queue_empty(list: &priv->cm.skb_queue); |
1463 | |
1464 | skb_dst_update_pmtu(skb, mtu); |
1465 | |
1466 | skb_queue_tail(list: &priv->cm.skb_queue, newsk: skb); |
1467 | if (e) |
1468 | queue_work(wq: priv->wq, work: &priv->cm.skb_task); |
1469 | } |
1470 | |
1471 | static void ipoib_cm_rx_reap(struct work_struct *work) |
1472 | { |
1473 | ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv, |
1474 | cm.rx_reap_task)->dev); |
1475 | } |
1476 | |
1477 | static void ipoib_cm_stale_task(struct work_struct *work) |
1478 | { |
1479 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, |
1480 | cm.stale_task.work); |
1481 | struct ipoib_cm_rx *p; |
1482 | int ret; |
1483 | |
1484 | spin_lock_irq(lock: &priv->lock); |
1485 | while (!list_empty(head: &priv->cm.passive_ids)) { |
1486 | /* List is sorted by LRU, start from tail, |
1487 | * stop when we see a recently used entry */ |
1488 | p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); |
1489 | if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) |
1490 | break; |
1491 | list_move(list: &p->list, head: &priv->cm.rx_error_list); |
1492 | p->state = IPOIB_CM_RX_ERROR; |
1493 | spin_unlock_irq(lock: &priv->lock); |
1494 | ret = ib_modify_qp(qp: p->qp, qp_attr: &ipoib_cm_err_attr, qp_attr_mask: IB_QP_STATE); |
1495 | if (ret) |
1496 | ipoib_warn(priv, "unable to move qp to error state: %d\n" , ret); |
1497 | spin_lock_irq(lock: &priv->lock); |
1498 | } |
1499 | |
1500 | if (!list_empty(head: &priv->cm.passive_ids)) |
1501 | queue_delayed_work(wq: priv->wq, |
1502 | dwork: &priv->cm.stale_task, IPOIB_CM_RX_DELAY); |
1503 | spin_unlock_irq(lock: &priv->lock); |
1504 | } |
1505 | |
1506 | static ssize_t mode_show(struct device *d, struct device_attribute *attr, |
1507 | char *buf) |
1508 | { |
1509 | struct net_device *dev = to_net_dev(d); |
1510 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1511 | |
1512 | if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) |
1513 | return sysfs_emit(buf, fmt: "connected\n" ); |
1514 | else |
1515 | return sysfs_emit(buf, fmt: "datagram\n" ); |
1516 | } |
1517 | |
1518 | static ssize_t mode_store(struct device *d, struct device_attribute *attr, |
1519 | const char *buf, size_t count) |
1520 | { |
1521 | struct net_device *dev = to_net_dev(d); |
1522 | int ret; |
1523 | |
1524 | if (!rtnl_trylock()) { |
1525 | return restart_syscall(); |
1526 | } |
1527 | |
1528 | if (dev->reg_state != NETREG_REGISTERED) { |
1529 | rtnl_unlock(); |
1530 | return -EPERM; |
1531 | } |
1532 | |
1533 | ret = ipoib_set_mode(dev, buf); |
1534 | |
1535 | /* The assumption is that the function ipoib_set_mode returned |
1536 | * with the rtnl held by it, if not the value -EBUSY returned, |
1537 | * then no need to rtnl_unlock |
1538 | */ |
1539 | if (ret != -EBUSY) |
1540 | rtnl_unlock(); |
1541 | |
1542 | return (!ret || ret == -EBUSY) ? count : ret; |
1543 | } |
1544 | |
1545 | static DEVICE_ATTR_RW(mode); |
1546 | |
1547 | int ipoib_cm_add_mode_attr(struct net_device *dev) |
1548 | { |
1549 | return device_create_file(device: &dev->dev, entry: &dev_attr_mode); |
1550 | } |
1551 | |
1552 | static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) |
1553 | { |
1554 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1555 | struct ib_srq_init_attr srq_init_attr = { |
1556 | .srq_type = IB_SRQT_BASIC, |
1557 | .attr = { |
1558 | .max_wr = ipoib_recvq_size, |
1559 | .max_sge = max_sge |
1560 | } |
1561 | }; |
1562 | |
1563 | priv->cm.srq = ib_create_srq(pd: priv->pd, srq_init_attr: &srq_init_attr); |
1564 | if (IS_ERR(ptr: priv->cm.srq)) { |
1565 | if (PTR_ERR(ptr: priv->cm.srq) != -EOPNOTSUPP) |
1566 | pr_warn("%s: failed to allocate SRQ, error %ld\n" , |
1567 | priv->ca->name, PTR_ERR(priv->cm.srq)); |
1568 | priv->cm.srq = NULL; |
1569 | return; |
1570 | } |
1571 | |
1572 | priv->cm.srq_ring = vzalloc(array_size(ipoib_recvq_size, |
1573 | sizeof(*priv->cm.srq_ring))); |
1574 | if (!priv->cm.srq_ring) { |
1575 | ib_destroy_srq(srq: priv->cm.srq); |
1576 | priv->cm.srq = NULL; |
1577 | return; |
1578 | } |
1579 | |
1580 | } |
1581 | |
1582 | int ipoib_cm_dev_init(struct net_device *dev) |
1583 | { |
1584 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1585 | int max_srq_sge, i; |
1586 | u8 addr; |
1587 | |
1588 | INIT_LIST_HEAD(list: &priv->cm.passive_ids); |
1589 | INIT_LIST_HEAD(list: &priv->cm.reap_list); |
1590 | INIT_LIST_HEAD(list: &priv->cm.start_list); |
1591 | INIT_LIST_HEAD(list: &priv->cm.rx_error_list); |
1592 | INIT_LIST_HEAD(list: &priv->cm.rx_flush_list); |
1593 | INIT_LIST_HEAD(list: &priv->cm.rx_drain_list); |
1594 | INIT_LIST_HEAD(list: &priv->cm.rx_reap_list); |
1595 | INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); |
1596 | INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); |
1597 | INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); |
1598 | INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); |
1599 | INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); |
1600 | |
1601 | skb_queue_head_init(list: &priv->cm.skb_queue); |
1602 | |
1603 | ipoib_dbg(priv, "max_srq_sge=%d\n" , priv->ca->attrs.max_srq_sge); |
1604 | |
1605 | max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge); |
1606 | ipoib_cm_create_srq(dev, max_sge: max_srq_sge); |
1607 | if (ipoib_cm_has_srq(dev)) { |
1608 | priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10; |
1609 | priv->cm.num_frags = max_srq_sge; |
1610 | ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n" , |
1611 | priv->cm.max_cm_mtu, priv->cm.num_frags); |
1612 | } else { |
1613 | priv->cm.max_cm_mtu = IPOIB_CM_MTU; |
1614 | priv->cm.num_frags = IPOIB_CM_RX_SG; |
1615 | } |
1616 | |
1617 | ipoib_cm_init_rx_wr(dev, wr: &priv->cm.rx_wr, sge: priv->cm.rx_sge); |
1618 | |
1619 | if (ipoib_cm_has_srq(dev)) { |
1620 | for (i = 0; i < ipoib_recvq_size; ++i) { |
1621 | if (!ipoib_cm_alloc_rx_skb(dev, rx_ring: priv->cm.srq_ring, id: i, |
1622 | frags: priv->cm.num_frags - 1, |
1623 | mapping: priv->cm.srq_ring[i].mapping, |
1624 | GFP_KERNEL)) { |
1625 | ipoib_warn(priv, "failed to allocate " |
1626 | "receive buffer %d\n" , i); |
1627 | ipoib_cm_dev_cleanup(dev); |
1628 | return -ENOMEM; |
1629 | } |
1630 | |
1631 | if (ipoib_cm_post_receive_srq(dev, id: i)) { |
1632 | ipoib_warn(priv, "ipoib_cm_post_receive_srq " |
1633 | "failed for buf %d\n" , i); |
1634 | ipoib_cm_dev_cleanup(dev); |
1635 | return -EIO; |
1636 | } |
1637 | } |
1638 | } |
1639 | |
1640 | addr = IPOIB_FLAGS_RC; |
1641 | dev_addr_mod(dev, offset: 0, addr: &addr, len: 1); |
1642 | return 0; |
1643 | } |
1644 | |
1645 | void ipoib_cm_dev_cleanup(struct net_device *dev) |
1646 | { |
1647 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1648 | |
1649 | if (!priv->cm.srq) |
1650 | return; |
1651 | |
1652 | ipoib_dbg(priv, "Cleanup ipoib connected mode.\n" ); |
1653 | |
1654 | ib_destroy_srq(srq: priv->cm.srq); |
1655 | priv->cm.srq = NULL; |
1656 | if (!priv->cm.srq_ring) |
1657 | return; |
1658 | |
1659 | ipoib_cm_free_rx_ring(dev, rx_ring: priv->cm.srq_ring); |
1660 | priv->cm.srq_ring = NULL; |
1661 | } |
1662 | |