1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | |
3 | /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ |
4 | /* Fredy Neeser */ |
5 | /* Greg Joyce <greg@opengridcomputing.com> */ |
6 | /* Copyright (c) 2008-2019, IBM Corporation */ |
7 | /* Copyright (c) 2017, Open Grid Computing, Inc. */ |
8 | |
9 | #include <linux/errno.h> |
10 | #include <linux/types.h> |
11 | #include <linux/net.h> |
12 | #include <linux/inetdevice.h> |
13 | #include <net/addrconf.h> |
14 | #include <linux/workqueue.h> |
15 | #include <net/sock.h> |
16 | #include <net/tcp.h> |
17 | #include <linux/inet.h> |
18 | #include <linux/tcp.h> |
19 | #include <trace/events/sock.h> |
20 | |
21 | #include <rdma/iw_cm.h> |
22 | #include <rdma/ib_verbs.h> |
23 | #include <rdma/ib_user_verbs.h> |
24 | |
25 | #include "siw.h" |
26 | #include "siw_cm.h" |
27 | |
28 | /* |
29 | * Set to any combination of |
30 | * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR |
31 | */ |
32 | static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR; |
33 | static const bool relaxed_ird_negotiation = true; |
34 | |
35 | static void siw_cm_llp_state_change(struct sock *s); |
36 | static void siw_cm_llp_data_ready(struct sock *s); |
37 | static void siw_cm_llp_write_space(struct sock *s); |
38 | static void siw_cm_llp_error_report(struct sock *s); |
39 | static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, |
40 | int status); |
41 | |
42 | static void siw_sk_assign_cm_upcalls(struct sock *sk) |
43 | { |
44 | struct siw_cep *cep = sk_to_cep(sk); |
45 | |
46 | write_lock_bh(&sk->sk_callback_lock); |
47 | cep->sk_state_change = sk->sk_state_change; |
48 | cep->sk_data_ready = sk->sk_data_ready; |
49 | cep->sk_write_space = sk->sk_write_space; |
50 | cep->sk_error_report = sk->sk_error_report; |
51 | |
52 | sk->sk_state_change = siw_cm_llp_state_change; |
53 | sk->sk_data_ready = siw_cm_llp_data_ready; |
54 | sk->sk_write_space = siw_cm_llp_write_space; |
55 | sk->sk_error_report = siw_cm_llp_error_report; |
56 | write_unlock_bh(&sk->sk_callback_lock); |
57 | } |
58 | |
59 | static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep) |
60 | { |
61 | sk->sk_state_change = cep->sk_state_change; |
62 | sk->sk_data_ready = cep->sk_data_ready; |
63 | sk->sk_write_space = cep->sk_write_space; |
64 | sk->sk_error_report = cep->sk_error_report; |
65 | sk->sk_user_data = NULL; |
66 | } |
67 | |
68 | static void siw_qp_socket_assoc(struct siw_cep *cep, struct siw_qp *qp) |
69 | { |
70 | struct socket *s = cep->sock; |
71 | struct sock *sk = s->sk; |
72 | |
73 | write_lock_bh(&sk->sk_callback_lock); |
74 | |
75 | qp->attrs.sk = s; |
76 | sk->sk_data_ready = siw_qp_llp_data_ready; |
77 | sk->sk_write_space = siw_qp_llp_write_space; |
78 | |
79 | write_unlock_bh(&sk->sk_callback_lock); |
80 | } |
81 | |
82 | static void siw_socket_disassoc(struct socket *s) |
83 | { |
84 | struct sock *sk = s->sk; |
85 | struct siw_cep *cep; |
86 | |
87 | if (sk) { |
88 | write_lock_bh(&sk->sk_callback_lock); |
89 | cep = sk_to_cep(sk); |
90 | if (cep) { |
91 | siw_sk_restore_upcalls(sk, cep); |
92 | siw_cep_put(cep); |
93 | } else { |
94 | pr_warn("siw: cannot restore sk callbacks: no ep\n" ); |
95 | } |
96 | write_unlock_bh(&sk->sk_callback_lock); |
97 | } else { |
98 | pr_warn("siw: cannot restore sk callbacks: no sk\n" ); |
99 | } |
100 | } |
101 | |
102 | static void siw_rtr_data_ready(struct sock *sk) |
103 | { |
104 | struct siw_cep *cep; |
105 | struct siw_qp *qp = NULL; |
106 | read_descriptor_t rd_desc; |
107 | |
108 | trace_sk_data_ready(sk); |
109 | |
110 | read_lock(&sk->sk_callback_lock); |
111 | |
112 | cep = sk_to_cep(sk); |
113 | if (!cep) { |
114 | WARN(1, "No connection endpoint\n" ); |
115 | goto out; |
116 | } |
117 | qp = sk_to_qp(sk); |
118 | |
119 | memset(&rd_desc, 0, sizeof(rd_desc)); |
120 | rd_desc.arg.data = qp; |
121 | rd_desc.count = 1; |
122 | |
123 | tcp_read_sock(sk, desc: &rd_desc, recv_actor: siw_tcp_rx_data); |
124 | /* |
125 | * Check if first frame was successfully processed. |
126 | * Signal connection full establishment if yes. |
127 | * Failed data processing would have already scheduled |
128 | * connection drop. |
129 | */ |
130 | if (!qp->rx_stream.rx_suspend) |
131 | siw_cm_upcall(cep, reason: IW_CM_EVENT_ESTABLISHED, status: 0); |
132 | out: |
133 | read_unlock(&sk->sk_callback_lock); |
134 | if (qp) |
135 | siw_qp_socket_assoc(cep, qp); |
136 | } |
137 | |
138 | static void siw_sk_assign_rtr_upcalls(struct siw_cep *cep) |
139 | { |
140 | struct sock *sk = cep->sock->sk; |
141 | |
142 | write_lock_bh(&sk->sk_callback_lock); |
143 | sk->sk_data_ready = siw_rtr_data_ready; |
144 | sk->sk_write_space = siw_qp_llp_write_space; |
145 | write_unlock_bh(&sk->sk_callback_lock); |
146 | } |
147 | |
148 | static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) |
149 | { |
150 | cep->sock = s; |
151 | siw_cep_get(cep); |
152 | s->sk->sk_user_data = cep; |
153 | |
154 | siw_sk_assign_cm_upcalls(sk: s->sk); |
155 | } |
156 | |
157 | static struct siw_cep *siw_cep_alloc(struct siw_device *sdev) |
158 | { |
159 | struct siw_cep *cep = kzalloc(size: sizeof(*cep), GFP_KERNEL); |
160 | unsigned long flags; |
161 | |
162 | if (!cep) |
163 | return NULL; |
164 | |
165 | INIT_LIST_HEAD(list: &cep->listenq); |
166 | INIT_LIST_HEAD(list: &cep->devq); |
167 | INIT_LIST_HEAD(list: &cep->work_freelist); |
168 | |
169 | kref_init(kref: &cep->ref); |
170 | cep->state = SIW_EPSTATE_IDLE; |
171 | init_waitqueue_head(&cep->waitq); |
172 | spin_lock_init(&cep->lock); |
173 | cep->sdev = sdev; |
174 | cep->enhanced_rdma_conn_est = false; |
175 | |
176 | spin_lock_irqsave(&sdev->lock, flags); |
177 | list_add_tail(new: &cep->devq, head: &sdev->cep_list); |
178 | spin_unlock_irqrestore(lock: &sdev->lock, flags); |
179 | |
180 | siw_dbg_cep(cep, "new endpoint\n" ); |
181 | return cep; |
182 | } |
183 | |
184 | static void siw_cm_free_work(struct siw_cep *cep) |
185 | { |
186 | struct list_head *w, *tmp; |
187 | struct siw_cm_work *work; |
188 | |
189 | list_for_each_safe(w, tmp, &cep->work_freelist) { |
190 | work = list_entry(w, struct siw_cm_work, list); |
191 | list_del(entry: &work->list); |
192 | kfree(objp: work); |
193 | } |
194 | } |
195 | |
196 | static void siw_cancel_mpatimer(struct siw_cep *cep) |
197 | { |
198 | spin_lock_bh(lock: &cep->lock); |
199 | if (cep->mpa_timer) { |
200 | if (cancel_delayed_work(dwork: &cep->mpa_timer->work)) { |
201 | siw_cep_put(cep); |
202 | kfree(objp: cep->mpa_timer); /* not needed again */ |
203 | } |
204 | cep->mpa_timer = NULL; |
205 | } |
206 | spin_unlock_bh(lock: &cep->lock); |
207 | } |
208 | |
209 | static void siw_put_work(struct siw_cm_work *work) |
210 | { |
211 | INIT_LIST_HEAD(list: &work->list); |
212 | spin_lock_bh(lock: &work->cep->lock); |
213 | list_add(new: &work->list, head: &work->cep->work_freelist); |
214 | spin_unlock_bh(lock: &work->cep->lock); |
215 | } |
216 | |
217 | static void siw_cep_set_inuse(struct siw_cep *cep) |
218 | { |
219 | unsigned long flags; |
220 | retry: |
221 | spin_lock_irqsave(&cep->lock, flags); |
222 | |
223 | if (cep->in_use) { |
224 | spin_unlock_irqrestore(lock: &cep->lock, flags); |
225 | wait_event_interruptible(cep->waitq, !cep->in_use); |
226 | if (signal_pending(current)) |
227 | flush_signals(current); |
228 | goto retry; |
229 | } else { |
230 | cep->in_use = 1; |
231 | spin_unlock_irqrestore(lock: &cep->lock, flags); |
232 | } |
233 | } |
234 | |
235 | static void siw_cep_set_free(struct siw_cep *cep) |
236 | { |
237 | unsigned long flags; |
238 | |
239 | spin_lock_irqsave(&cep->lock, flags); |
240 | cep->in_use = 0; |
241 | spin_unlock_irqrestore(lock: &cep->lock, flags); |
242 | |
243 | wake_up(&cep->waitq); |
244 | } |
245 | |
246 | static void __siw_cep_dealloc(struct kref *ref) |
247 | { |
248 | struct siw_cep *cep = container_of(ref, struct siw_cep, ref); |
249 | struct siw_device *sdev = cep->sdev; |
250 | unsigned long flags; |
251 | |
252 | WARN_ON(cep->listen_cep); |
253 | |
254 | /* kfree(NULL) is safe */ |
255 | kfree(objp: cep->mpa.pdata); |
256 | spin_lock_bh(lock: &cep->lock); |
257 | if (!list_empty(head: &cep->work_freelist)) |
258 | siw_cm_free_work(cep); |
259 | spin_unlock_bh(lock: &cep->lock); |
260 | |
261 | spin_lock_irqsave(&sdev->lock, flags); |
262 | list_del(entry: &cep->devq); |
263 | spin_unlock_irqrestore(lock: &sdev->lock, flags); |
264 | |
265 | siw_dbg_cep(cep, "free endpoint\n" ); |
266 | kfree(objp: cep); |
267 | } |
268 | |
269 | static struct siw_cm_work *siw_get_work(struct siw_cep *cep) |
270 | { |
271 | struct siw_cm_work *work = NULL; |
272 | |
273 | spin_lock_bh(lock: &cep->lock); |
274 | if (!list_empty(head: &cep->work_freelist)) { |
275 | work = list_entry(cep->work_freelist.next, struct siw_cm_work, |
276 | list); |
277 | list_del_init(entry: &work->list); |
278 | } |
279 | spin_unlock_bh(lock: &cep->lock); |
280 | return work; |
281 | } |
282 | |
283 | static int siw_cm_alloc_work(struct siw_cep *cep, int num) |
284 | { |
285 | struct siw_cm_work *work; |
286 | |
287 | while (num--) { |
288 | work = kmalloc(size: sizeof(*work), GFP_KERNEL); |
289 | if (!work) { |
290 | if (!(list_empty(head: &cep->work_freelist))) |
291 | siw_cm_free_work(cep); |
292 | return -ENOMEM; |
293 | } |
294 | work->cep = cep; |
295 | INIT_LIST_HEAD(list: &work->list); |
296 | list_add(new: &work->list, head: &cep->work_freelist); |
297 | } |
298 | return 0; |
299 | } |
300 | |
301 | /* |
302 | * siw_cm_upcall() |
303 | * |
304 | * Upcall to IWCM to inform about async connection events |
305 | */ |
306 | static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, |
307 | int status) |
308 | { |
309 | struct iw_cm_event event; |
310 | struct iw_cm_id *id; |
311 | |
312 | memset(&event, 0, sizeof(event)); |
313 | event.status = status; |
314 | event.event = reason; |
315 | |
316 | if (reason == IW_CM_EVENT_CONNECT_REQUEST) { |
317 | event.provider_data = cep; |
318 | id = cep->listen_cep->cm_id; |
319 | } else { |
320 | id = cep->cm_id; |
321 | } |
322 | /* Signal IRD and ORD */ |
323 | if (reason == IW_CM_EVENT_ESTABLISHED || |
324 | reason == IW_CM_EVENT_CONNECT_REPLY) { |
325 | /* Signal negotiated IRD/ORD values we will use */ |
326 | event.ird = cep->ird; |
327 | event.ord = cep->ord; |
328 | } else if (reason == IW_CM_EVENT_CONNECT_REQUEST) { |
329 | event.ird = cep->ord; |
330 | event.ord = cep->ird; |
331 | } |
332 | /* Signal private data and address information */ |
333 | if (reason == IW_CM_EVENT_CONNECT_REQUEST || |
334 | reason == IW_CM_EVENT_CONNECT_REPLY) { |
335 | u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len); |
336 | |
337 | if (pd_len) { |
338 | /* |
339 | * hand over MPA private data |
340 | */ |
341 | event.private_data_len = pd_len; |
342 | event.private_data = cep->mpa.pdata; |
343 | |
344 | /* Hide MPA V2 IRD/ORD control */ |
345 | if (cep->enhanced_rdma_conn_est) { |
346 | event.private_data_len -= |
347 | sizeof(struct mpa_v2_data); |
348 | event.private_data += |
349 | sizeof(struct mpa_v2_data); |
350 | } |
351 | } |
352 | getname_local(s: cep->sock, a: &event.local_addr); |
353 | getname_peer(s: cep->sock, a: &event.remote_addr); |
354 | } |
355 | siw_dbg_cep(cep, "[QP %u]: reason=%d, status=%d\n" , |
356 | cep->qp ? qp_id(cep->qp) : UINT_MAX, reason, status); |
357 | |
358 | return id->event_handler(id, &event); |
359 | } |
360 | |
361 | static void siw_free_cm_id(struct siw_cep *cep) |
362 | { |
363 | if (!cep->cm_id) |
364 | return; |
365 | |
366 | cep->cm_id->rem_ref(cep->cm_id); |
367 | cep->cm_id = NULL; |
368 | } |
369 | |
370 | static void siw_destroy_cep_sock(struct siw_cep *cep) |
371 | { |
372 | if (cep->sock) { |
373 | siw_socket_disassoc(s: cep->sock); |
374 | sock_release(sock: cep->sock); |
375 | cep->sock = NULL; |
376 | } |
377 | } |
378 | |
379 | /* |
380 | * siw_qp_cm_drop() |
381 | * |
382 | * Drops established LLP connection if present and not already |
383 | * scheduled for dropping. Called from user context, SQ workqueue |
384 | * or receive IRQ. Caller signals if socket can be immediately |
385 | * closed (basically, if not in IRQ). |
386 | */ |
387 | void siw_qp_cm_drop(struct siw_qp *qp, int schedule) |
388 | { |
389 | struct siw_cep *cep = qp->cep; |
390 | |
391 | qp->rx_stream.rx_suspend = 1; |
392 | qp->tx_ctx.tx_suspend = 1; |
393 | |
394 | if (!qp->cep) |
395 | return; |
396 | |
397 | if (schedule) { |
398 | siw_cm_queue_work(cep, type: SIW_CM_WORK_CLOSE_LLP); |
399 | } else { |
400 | siw_cep_set_inuse(cep); |
401 | |
402 | if (cep->state == SIW_EPSTATE_CLOSED) { |
403 | siw_dbg_cep(cep, "already closed\n" ); |
404 | goto out; |
405 | } |
406 | siw_dbg_cep(cep, "immediate close, state %d\n" , cep->state); |
407 | |
408 | siw_send_terminate(qp); |
409 | |
410 | if (cep->cm_id) { |
411 | switch (cep->state) { |
412 | case SIW_EPSTATE_AWAIT_MPAREP: |
413 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, |
414 | status: -EINVAL); |
415 | break; |
416 | |
417 | case SIW_EPSTATE_RDMA_MODE: |
418 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CLOSE, status: 0); |
419 | break; |
420 | |
421 | case SIW_EPSTATE_IDLE: |
422 | case SIW_EPSTATE_LISTENING: |
423 | case SIW_EPSTATE_CONNECTING: |
424 | case SIW_EPSTATE_AWAIT_MPAREQ: |
425 | case SIW_EPSTATE_RECVD_MPAREQ: |
426 | case SIW_EPSTATE_CLOSED: |
427 | default: |
428 | break; |
429 | } |
430 | siw_free_cm_id(cep); |
431 | siw_cep_put(cep); |
432 | } |
433 | cep->state = SIW_EPSTATE_CLOSED; |
434 | |
435 | siw_destroy_cep_sock(cep); |
436 | if (cep->qp) { |
437 | cep->qp = NULL; |
438 | siw_qp_put(qp); |
439 | } |
440 | out: |
441 | siw_cep_set_free(cep); |
442 | } |
443 | } |
444 | |
445 | void siw_cep_put(struct siw_cep *cep) |
446 | { |
447 | WARN_ON(kref_read(&cep->ref) < 1); |
448 | kref_put(kref: &cep->ref, release: __siw_cep_dealloc); |
449 | } |
450 | |
451 | static void siw_cep_set_free_and_put(struct siw_cep *cep) |
452 | { |
453 | siw_cep_set_free(cep); |
454 | siw_cep_put(cep); |
455 | } |
456 | |
457 | void siw_cep_get(struct siw_cep *cep) |
458 | { |
459 | kref_get(kref: &cep->ref); |
460 | } |
461 | |
462 | /* |
463 | * Expects params->pd_len in host byte order |
464 | */ |
465 | static int siw_send_mpareqrep(struct siw_cep *cep, const void *pdata, u8 pd_len) |
466 | { |
467 | struct socket *s = cep->sock; |
468 | struct mpa_rr *rr = &cep->mpa.hdr; |
469 | struct kvec iov[3]; |
470 | struct msghdr msg; |
471 | int rv; |
472 | int iovec_num = 0; |
473 | int mpa_len; |
474 | |
475 | memset(&msg, 0, sizeof(msg)); |
476 | |
477 | iov[iovec_num].iov_base = rr; |
478 | iov[iovec_num].iov_len = sizeof(*rr); |
479 | mpa_len = sizeof(*rr); |
480 | |
481 | if (cep->enhanced_rdma_conn_est) { |
482 | iovec_num++; |
483 | iov[iovec_num].iov_base = &cep->mpa.v2_ctrl; |
484 | iov[iovec_num].iov_len = sizeof(cep->mpa.v2_ctrl); |
485 | mpa_len += sizeof(cep->mpa.v2_ctrl); |
486 | } |
487 | if (pd_len) { |
488 | iovec_num++; |
489 | iov[iovec_num].iov_base = (char *)pdata; |
490 | iov[iovec_num].iov_len = pd_len; |
491 | mpa_len += pd_len; |
492 | } |
493 | if (cep->enhanced_rdma_conn_est) |
494 | pd_len += sizeof(cep->mpa.v2_ctrl); |
495 | |
496 | rr->params.pd_len = cpu_to_be16(pd_len); |
497 | |
498 | rv = kernel_sendmsg(sock: s, msg: &msg, vec: iov, num: iovec_num + 1, len: mpa_len); |
499 | |
500 | return rv < 0 ? rv : 0; |
501 | } |
502 | |
503 | /* |
504 | * Receive MPA Request/Reply header. |
505 | * |
506 | * Returns 0 if complete MPA Request/Reply header including |
507 | * eventual private data was received. Returns -EAGAIN if |
508 | * header was partially received or negative error code otherwise. |
509 | * |
510 | * Context: May be called in process context only |
511 | */ |
512 | static int siw_recv_mpa_rr(struct siw_cep *cep) |
513 | { |
514 | struct mpa_rr *hdr = &cep->mpa.hdr; |
515 | struct socket *s = cep->sock; |
516 | u16 pd_len; |
517 | int rcvd, to_rcv; |
518 | |
519 | if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) { |
520 | rcvd = ksock_recv(sock: s, buf: (char *)hdr + cep->mpa.bytes_rcvd, |
521 | size: sizeof(struct mpa_rr) - cep->mpa.bytes_rcvd, |
522 | flags: 0); |
523 | if (rcvd <= 0) |
524 | return -ECONNABORTED; |
525 | |
526 | cep->mpa.bytes_rcvd += rcvd; |
527 | |
528 | if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) |
529 | return -EAGAIN; |
530 | |
531 | if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA) |
532 | return -EPROTO; |
533 | } |
534 | pd_len = be16_to_cpu(hdr->params.pd_len); |
535 | |
536 | /* |
537 | * At least the MPA Request/Reply header (frame not including |
538 | * private data) has been received. |
539 | * Receive (or continue receiving) any private data. |
540 | */ |
541 | to_rcv = pd_len - (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr)); |
542 | |
543 | if (!to_rcv) { |
544 | /* |
545 | * We must have hdr->params.pd_len == 0 and thus received a |
546 | * complete MPA Request/Reply frame. |
547 | * Check against peer protocol violation. |
548 | */ |
549 | u32 word; |
550 | |
551 | rcvd = ksock_recv(sock: s, buf: (char *)&word, size: sizeof(word), MSG_DONTWAIT); |
552 | if (rcvd == -EAGAIN) |
553 | return 0; |
554 | |
555 | if (rcvd == 0) { |
556 | siw_dbg_cep(cep, "peer EOF\n" ); |
557 | return -EPIPE; |
558 | } |
559 | if (rcvd < 0) { |
560 | siw_dbg_cep(cep, "error: %d\n" , rcvd); |
561 | return rcvd; |
562 | } |
563 | siw_dbg_cep(cep, "peer sent extra data: %d\n" , rcvd); |
564 | |
565 | return -EPROTO; |
566 | } |
567 | |
568 | /* |
569 | * At this point, we must have hdr->params.pd_len != 0. |
570 | * A private data buffer gets allocated if hdr->params.pd_len != 0. |
571 | */ |
572 | if (!cep->mpa.pdata) { |
573 | cep->mpa.pdata = kmalloc(size: pd_len + 4, GFP_KERNEL); |
574 | if (!cep->mpa.pdata) |
575 | return -ENOMEM; |
576 | } |
577 | rcvd = ksock_recv( |
578 | sock: s, buf: cep->mpa.pdata + cep->mpa.bytes_rcvd - sizeof(struct mpa_rr), |
579 | size: to_rcv + 4, MSG_DONTWAIT); |
580 | |
581 | if (rcvd < 0) |
582 | return rcvd; |
583 | |
584 | if (rcvd > to_rcv) |
585 | return -EPROTO; |
586 | |
587 | cep->mpa.bytes_rcvd += rcvd; |
588 | |
589 | if (to_rcv == rcvd) { |
590 | siw_dbg_cep(cep, "%d bytes private data received\n" , pd_len); |
591 | return 0; |
592 | } |
593 | return -EAGAIN; |
594 | } |
595 | |
596 | /* |
597 | * siw_proc_mpareq() |
598 | * |
599 | * Read MPA Request from socket and signal new connection to IWCM |
600 | * if success. Caller must hold lock on corresponding listening CEP. |
601 | */ |
602 | static int siw_proc_mpareq(struct siw_cep *cep) |
603 | { |
604 | struct mpa_rr *req; |
605 | int version, rv; |
606 | u16 pd_len; |
607 | |
608 | rv = siw_recv_mpa_rr(cep); |
609 | if (rv) |
610 | return rv; |
611 | |
612 | req = &cep->mpa.hdr; |
613 | |
614 | version = __mpa_rr_revision(mpa_rr_bits: req->params.bits); |
615 | pd_len = be16_to_cpu(req->params.pd_len); |
616 | |
617 | if (version > MPA_REVISION_2) |
618 | /* allow for 0, 1, and 2 only */ |
619 | return -EPROTO; |
620 | |
621 | if (memcmp(p: req->key, MPA_KEY_REQ, size: 16)) |
622 | return -EPROTO; |
623 | |
624 | /* Prepare for sending MPA reply */ |
625 | memcpy(req->key, MPA_KEY_REP, 16); |
626 | |
627 | if (version == MPA_REVISION_2 && |
628 | (req->params.bits & MPA_RR_FLAG_ENHANCED)) { |
629 | /* |
630 | * MPA version 2 must signal IRD/ORD values and P2P mode |
631 | * in private data if header flag MPA_RR_FLAG_ENHANCED |
632 | * is set. |
633 | */ |
634 | if (pd_len < sizeof(struct mpa_v2_data)) |
635 | goto reject_conn; |
636 | |
637 | cep->enhanced_rdma_conn_est = true; |
638 | } |
639 | |
640 | /* MPA Markers: currently not supported. Marker TX to be added. */ |
641 | if (req->params.bits & MPA_RR_FLAG_MARKERS) |
642 | goto reject_conn; |
643 | |
644 | if (req->params.bits & MPA_RR_FLAG_CRC) { |
645 | /* |
646 | * RFC 5044, page 27: CRC MUST be used if peer requests it. |
647 | * siw specific: 'mpa_crc_strict' parameter to reject |
648 | * connection with CRC if local CRC off enforced by |
649 | * 'mpa_crc_strict' module parameter. |
650 | */ |
651 | if (!mpa_crc_required && mpa_crc_strict) |
652 | goto reject_conn; |
653 | |
654 | /* Enable CRC if requested by module parameter */ |
655 | if (mpa_crc_required) |
656 | req->params.bits |= MPA_RR_FLAG_CRC; |
657 | } |
658 | if (cep->enhanced_rdma_conn_est) { |
659 | struct mpa_v2_data *v2 = (struct mpa_v2_data *)cep->mpa.pdata; |
660 | |
661 | /* |
662 | * Peer requested ORD becomes requested local IRD, |
663 | * peer requested IRD becomes requested local ORD. |
664 | * IRD and ORD get limited by global maximum values. |
665 | */ |
666 | cep->ord = ntohs(v2->ird) & MPA_IRD_ORD_MASK; |
667 | cep->ord = min(cep->ord, SIW_MAX_ORD_QP); |
668 | cep->ird = ntohs(v2->ord) & MPA_IRD_ORD_MASK; |
669 | cep->ird = min(cep->ird, SIW_MAX_IRD_QP); |
670 | |
671 | /* May get overwritten by locally negotiated values */ |
672 | cep->mpa.v2_ctrl.ird = htons(cep->ird); |
673 | cep->mpa.v2_ctrl.ord = htons(cep->ord); |
674 | |
675 | /* |
676 | * Support for peer sent zero length Write or Read to |
677 | * let local side enter RTS. Writes are preferred. |
678 | * Sends would require pre-posting a Receive and are |
679 | * not supported. |
680 | * Propose zero length Write if none of Read and Write |
681 | * is indicated. |
682 | */ |
683 | if (v2->ird & MPA_V2_PEER_TO_PEER) { |
684 | cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; |
685 | |
686 | if (v2->ord & MPA_V2_RDMA_WRITE_RTR) |
687 | cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; |
688 | else if (v2->ord & MPA_V2_RDMA_READ_RTR) |
689 | cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_READ_RTR; |
690 | else |
691 | cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; |
692 | } |
693 | } |
694 | |
695 | cep->state = SIW_EPSTATE_RECVD_MPAREQ; |
696 | |
697 | /* Keep reference until IWCM accepts/rejects */ |
698 | siw_cep_get(cep); |
699 | rv = siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REQUEST, status: 0); |
700 | if (rv) |
701 | siw_cep_put(cep); |
702 | |
703 | return rv; |
704 | |
705 | reject_conn: |
706 | siw_dbg_cep(cep, "reject: crc %d:%d:%d, m %d:%d\n" , |
707 | req->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, |
708 | mpa_crc_required, mpa_crc_strict, |
709 | req->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); |
710 | |
711 | req->params.bits &= ~MPA_RR_FLAG_MARKERS; |
712 | req->params.bits |= MPA_RR_FLAG_REJECT; |
713 | |
714 | if (!mpa_crc_required && mpa_crc_strict) |
715 | req->params.bits &= ~MPA_RR_FLAG_CRC; |
716 | |
717 | if (pd_len) |
718 | kfree(objp: cep->mpa.pdata); |
719 | |
720 | cep->mpa.pdata = NULL; |
721 | |
722 | siw_send_mpareqrep(cep, NULL, pd_len: 0); |
723 | |
724 | return -EOPNOTSUPP; |
725 | } |
726 | |
727 | static int siw_proc_mpareply(struct siw_cep *cep) |
728 | { |
729 | struct siw_qp_attrs qp_attrs; |
730 | enum siw_qp_attr_mask qp_attr_mask; |
731 | struct siw_qp *qp = cep->qp; |
732 | struct mpa_rr *rep; |
733 | int rv; |
734 | u16 rep_ord; |
735 | u16 rep_ird; |
736 | bool ird_insufficient = false; |
737 | enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; |
738 | |
739 | rv = siw_recv_mpa_rr(cep); |
740 | if (rv) |
741 | goto out_err; |
742 | |
743 | siw_cancel_mpatimer(cep); |
744 | |
745 | rep = &cep->mpa.hdr; |
746 | |
747 | if (__mpa_rr_revision(mpa_rr_bits: rep->params.bits) > MPA_REVISION_2) { |
748 | /* allow for 0, 1, and 2 only */ |
749 | rv = -EPROTO; |
750 | goto out_err; |
751 | } |
752 | if (memcmp(p: rep->key, MPA_KEY_REP, size: 16)) { |
753 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_LLP, etype: LLP_ETYPE_MPA, |
754 | ecode: LLP_ECODE_INVALID_REQ_RESP, in_tx: 0); |
755 | siw_send_terminate(qp); |
756 | rv = -EPROTO; |
757 | goto out_err; |
758 | } |
759 | if (rep->params.bits & MPA_RR_FLAG_REJECT) { |
760 | siw_dbg_cep(cep, "got mpa reject\n" ); |
761 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, status: -ECONNRESET); |
762 | |
763 | return -ECONNRESET; |
764 | } |
765 | if (try_gso && rep->params.bits & MPA_RR_FLAG_GSO_EXP) { |
766 | siw_dbg_cep(cep, "peer allows GSO on TX\n" ); |
767 | qp->tx_ctx.gso_seg_limit = 0; |
768 | } |
769 | if ((rep->params.bits & MPA_RR_FLAG_MARKERS) || |
770 | (mpa_crc_required && !(rep->params.bits & MPA_RR_FLAG_CRC)) || |
771 | (mpa_crc_strict && !mpa_crc_required && |
772 | (rep->params.bits & MPA_RR_FLAG_CRC))) { |
773 | siw_dbg_cep(cep, "reply unsupp: crc %d:%d:%d, m %d:%d\n" , |
774 | rep->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, |
775 | mpa_crc_required, mpa_crc_strict, |
776 | rep->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); |
777 | |
778 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, status: -ECONNREFUSED); |
779 | |
780 | return -EINVAL; |
781 | } |
782 | if (cep->enhanced_rdma_conn_est) { |
783 | struct mpa_v2_data *v2; |
784 | |
785 | if (__mpa_rr_revision(mpa_rr_bits: rep->params.bits) < MPA_REVISION_2 || |
786 | !(rep->params.bits & MPA_RR_FLAG_ENHANCED)) { |
787 | /* |
788 | * Protocol failure: The responder MUST reply with |
789 | * MPA version 2 and MUST set MPA_RR_FLAG_ENHANCED. |
790 | */ |
791 | siw_dbg_cep(cep, "mpa reply error: vers %d, enhcd %d\n" , |
792 | __mpa_rr_revision(rep->params.bits), |
793 | rep->params.bits & MPA_RR_FLAG_ENHANCED ? |
794 | 1 : |
795 | 0); |
796 | |
797 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, |
798 | status: -ECONNRESET); |
799 | return -EINVAL; |
800 | } |
801 | v2 = (struct mpa_v2_data *)cep->mpa.pdata; |
802 | rep_ird = ntohs(v2->ird) & MPA_IRD_ORD_MASK; |
803 | rep_ord = ntohs(v2->ord) & MPA_IRD_ORD_MASK; |
804 | |
805 | if (cep->ird < rep_ord && |
806 | (relaxed_ird_negotiation == false || |
807 | rep_ord > cep->sdev->attrs.max_ird)) { |
808 | siw_dbg_cep(cep, "ird %d, rep_ord %d, max_ord %d\n" , |
809 | cep->ird, rep_ord, |
810 | cep->sdev->attrs.max_ord); |
811 | ird_insufficient = true; |
812 | } |
813 | if (cep->ord > rep_ird && relaxed_ird_negotiation == false) { |
814 | siw_dbg_cep(cep, "ord %d, rep_ird %d\n" , cep->ord, |
815 | rep_ird); |
816 | ird_insufficient = true; |
817 | } |
818 | /* |
819 | * Always report negotiated peer values to user, |
820 | * even if IRD/ORD negotiation failed |
821 | */ |
822 | cep->ird = rep_ord; |
823 | cep->ord = rep_ird; |
824 | |
825 | if (ird_insufficient) { |
826 | /* |
827 | * If the initiator IRD is insuffient for the |
828 | * responder ORD, send a TERM. |
829 | */ |
830 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_LLP, |
831 | etype: LLP_ETYPE_MPA, |
832 | ecode: LLP_ECODE_INSUFFICIENT_IRD, in_tx: 0); |
833 | siw_send_terminate(qp); |
834 | rv = -ENOMEM; |
835 | goto out_err; |
836 | } |
837 | if (cep->mpa.v2_ctrl_req.ird & MPA_V2_PEER_TO_PEER) |
838 | mpa_p2p_mode = |
839 | cep->mpa.v2_ctrl_req.ord & |
840 | (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR); |
841 | |
842 | /* |
843 | * Check if we requested P2P mode, and if peer agrees |
844 | */ |
845 | if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { |
846 | if ((mpa_p2p_mode & v2->ord) == 0) { |
847 | /* |
848 | * We requested RTR mode(s), but the peer |
849 | * did not pick any mode we support. |
850 | */ |
851 | siw_dbg_cep(cep, |
852 | "rtr mode: req %2x, got %2x\n" , |
853 | mpa_p2p_mode, |
854 | v2->ord & (MPA_V2_RDMA_WRITE_RTR | |
855 | MPA_V2_RDMA_READ_RTR)); |
856 | |
857 | siw_init_terminate(qp, layer: TERM_ERROR_LAYER_LLP, |
858 | etype: LLP_ETYPE_MPA, |
859 | ecode: LLP_ECODE_NO_MATCHING_RTR, |
860 | in_tx: 0); |
861 | siw_send_terminate(qp); |
862 | rv = -EPROTO; |
863 | goto out_err; |
864 | } |
865 | mpa_p2p_mode = v2->ord & (MPA_V2_RDMA_WRITE_RTR | |
866 | MPA_V2_RDMA_READ_RTR); |
867 | } |
868 | } |
869 | memset(&qp_attrs, 0, sizeof(qp_attrs)); |
870 | |
871 | if (rep->params.bits & MPA_RR_FLAG_CRC) |
872 | qp_attrs.flags = SIW_MPA_CRC; |
873 | |
874 | qp_attrs.irq_size = cep->ird; |
875 | qp_attrs.orq_size = cep->ord; |
876 | qp_attrs.sk = cep->sock; |
877 | qp_attrs.state = SIW_QP_STATE_RTS; |
878 | |
879 | qp_attr_mask = SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | |
880 | SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA; |
881 | |
882 | /* Move socket RX/TX under QP control */ |
883 | down_write(sem: &qp->state_lock); |
884 | if (qp->attrs.state > SIW_QP_STATE_RTR) { |
885 | rv = -EINVAL; |
886 | up_write(sem: &qp->state_lock); |
887 | goto out_err; |
888 | } |
889 | rv = siw_qp_modify(qp, attr: &qp_attrs, mask: qp_attr_mask); |
890 | |
891 | siw_qp_socket_assoc(cep, qp); |
892 | |
893 | up_write(sem: &qp->state_lock); |
894 | |
895 | /* Send extra RDMA frame to trigger peer RTS if negotiated */ |
896 | if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { |
897 | rv = siw_qp_mpa_rts(qp, ctrl: mpa_p2p_mode); |
898 | if (rv) |
899 | goto out_err; |
900 | } |
901 | if (!rv) { |
902 | rv = siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, status: 0); |
903 | if (!rv) |
904 | cep->state = SIW_EPSTATE_RDMA_MODE; |
905 | |
906 | return 0; |
907 | } |
908 | |
909 | out_err: |
910 | if (rv != -EAGAIN) |
911 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, status: -EINVAL); |
912 | |
913 | return rv; |
914 | } |
915 | |
916 | /* |
917 | * siw_accept_newconn - accept an incoming pending connection |
918 | * |
919 | */ |
920 | static void siw_accept_newconn(struct siw_cep *cep) |
921 | { |
922 | struct socket *s = cep->sock; |
923 | struct socket *new_s = NULL; |
924 | struct siw_cep *new_cep = NULL; |
925 | int rv = 0; /* debug only. should disappear */ |
926 | |
927 | if (cep->state != SIW_EPSTATE_LISTENING) |
928 | goto error; |
929 | |
930 | new_cep = siw_cep_alloc(sdev: cep->sdev); |
931 | if (!new_cep) |
932 | goto error; |
933 | |
934 | /* |
935 | * 4: Allocate a sufficient number of work elements |
936 | * to allow concurrent handling of local + peer close |
937 | * events, MPA header processing + MPA timeout. |
938 | */ |
939 | if (siw_cm_alloc_work(cep: new_cep, num: 4) != 0) |
940 | goto error; |
941 | |
942 | /* |
943 | * Copy saved socket callbacks from listening CEP |
944 | * and assign new socket with new CEP |
945 | */ |
946 | new_cep->sk_state_change = cep->sk_state_change; |
947 | new_cep->sk_data_ready = cep->sk_data_ready; |
948 | new_cep->sk_write_space = cep->sk_write_space; |
949 | new_cep->sk_error_report = cep->sk_error_report; |
950 | |
951 | rv = kernel_accept(sock: s, newsock: &new_s, O_NONBLOCK); |
952 | if (rv != 0) { |
953 | /* |
954 | * Connection already aborted by peer..? |
955 | */ |
956 | siw_dbg_cep(cep, "kernel_accept() error: %d\n" , rv); |
957 | goto error; |
958 | } |
959 | new_cep->sock = new_s; |
960 | siw_cep_get(cep: new_cep); |
961 | new_s->sk->sk_user_data = new_cep; |
962 | |
963 | if (siw_tcp_nagle == false) |
964 | tcp_sock_set_nodelay(sk: new_s->sk); |
965 | new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; |
966 | |
967 | rv = siw_cm_queue_work(cep: new_cep, type: SIW_CM_WORK_MPATIMEOUT); |
968 | if (rv) |
969 | goto error; |
970 | /* |
971 | * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep. |
972 | */ |
973 | new_cep->listen_cep = cep; |
974 | siw_cep_get(cep); |
975 | |
976 | if (atomic_read(v: &new_s->sk->sk_rmem_alloc)) { |
977 | /* |
978 | * MPA REQ already queued |
979 | */ |
980 | siw_dbg_cep(cep, "immediate mpa request\n" ); |
981 | |
982 | siw_cep_set_inuse(cep: new_cep); |
983 | rv = siw_proc_mpareq(cep: new_cep); |
984 | if (rv != -EAGAIN) { |
985 | siw_cep_put(cep); |
986 | new_cep->listen_cep = NULL; |
987 | if (rv) { |
988 | siw_cancel_mpatimer(cep: new_cep); |
989 | siw_cep_set_free(cep: new_cep); |
990 | goto error; |
991 | } |
992 | } |
993 | siw_cep_set_free(cep: new_cep); |
994 | } |
995 | return; |
996 | |
997 | error: |
998 | if (new_cep) |
999 | siw_cep_put(cep: new_cep); |
1000 | |
1001 | if (new_s) { |
1002 | siw_socket_disassoc(s: new_s); |
1003 | sock_release(sock: new_s); |
1004 | new_cep->sock = NULL; |
1005 | } |
1006 | siw_dbg_cep(cep, "error %d\n" , rv); |
1007 | } |
1008 | |
1009 | static void siw_cm_work_handler(struct work_struct *w) |
1010 | { |
1011 | struct siw_cm_work *work; |
1012 | struct siw_cep *cep; |
1013 | int release_cep = 0, rv = 0; |
1014 | |
1015 | work = container_of(w, struct siw_cm_work, work.work); |
1016 | cep = work->cep; |
1017 | |
1018 | siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n" , |
1019 | cep->qp ? qp_id(cep->qp) : UINT_MAX, |
1020 | work->type, cep->state); |
1021 | |
1022 | siw_cep_set_inuse(cep); |
1023 | |
1024 | switch (work->type) { |
1025 | case SIW_CM_WORK_ACCEPT: |
1026 | siw_accept_newconn(cep); |
1027 | break; |
1028 | |
1029 | case SIW_CM_WORK_READ_MPAHDR: |
1030 | if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { |
1031 | if (cep->listen_cep) { |
1032 | siw_cep_set_inuse(cep: cep->listen_cep); |
1033 | |
1034 | if (cep->listen_cep->state == |
1035 | SIW_EPSTATE_LISTENING) |
1036 | rv = siw_proc_mpareq(cep); |
1037 | else |
1038 | rv = -EFAULT; |
1039 | |
1040 | siw_cep_set_free(cep: cep->listen_cep); |
1041 | |
1042 | if (rv != -EAGAIN) { |
1043 | siw_cep_put(cep: cep->listen_cep); |
1044 | cep->listen_cep = NULL; |
1045 | if (rv) |
1046 | siw_cep_put(cep); |
1047 | } |
1048 | } |
1049 | } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { |
1050 | rv = siw_proc_mpareply(cep); |
1051 | } else { |
1052 | /* |
1053 | * CEP already moved out of MPA handshake. |
1054 | * any connection management already done. |
1055 | * silently ignore the mpa packet. |
1056 | */ |
1057 | if (cep->state == SIW_EPSTATE_RDMA_MODE) { |
1058 | cep->sock->sk->sk_data_ready(cep->sock->sk); |
1059 | siw_dbg_cep(cep, "already in RDMA mode" ); |
1060 | } else { |
1061 | siw_dbg_cep(cep, "out of state: %d\n" , |
1062 | cep->state); |
1063 | } |
1064 | } |
1065 | if (rv && rv != -EAGAIN) |
1066 | release_cep = 1; |
1067 | break; |
1068 | |
1069 | case SIW_CM_WORK_CLOSE_LLP: |
1070 | /* |
1071 | * QP scheduled LLP close |
1072 | */ |
1073 | if (cep->qp) |
1074 | siw_send_terminate(qp: cep->qp); |
1075 | |
1076 | if (cep->cm_id) |
1077 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CLOSE, status: 0); |
1078 | |
1079 | release_cep = 1; |
1080 | break; |
1081 | |
1082 | case SIW_CM_WORK_PEER_CLOSE: |
1083 | if (cep->cm_id) { |
1084 | if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { |
1085 | /* |
1086 | * MPA reply not received, but connection drop |
1087 | */ |
1088 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, |
1089 | status: -ECONNRESET); |
1090 | } else if (cep->state == SIW_EPSTATE_RDMA_MODE) { |
1091 | /* |
1092 | * NOTE: IW_CM_EVENT_DISCONNECT is given just |
1093 | * to transition IWCM into CLOSING. |
1094 | */ |
1095 | siw_cm_upcall(cep, reason: IW_CM_EVENT_DISCONNECT, status: 0); |
1096 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CLOSE, status: 0); |
1097 | } |
1098 | /* |
1099 | * for other states there is no connection |
1100 | * known to the IWCM. |
1101 | */ |
1102 | } else { |
1103 | if (cep->state == SIW_EPSTATE_RECVD_MPAREQ) { |
1104 | /* |
1105 | * Wait for the ulp/CM to call accept/reject |
1106 | */ |
1107 | siw_dbg_cep(cep, |
1108 | "mpa req recvd, wait for ULP\n" ); |
1109 | } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { |
1110 | /* |
1111 | * Socket close before MPA request received. |
1112 | */ |
1113 | if (cep->listen_cep) { |
1114 | siw_dbg_cep(cep, |
1115 | "no mpareq: drop listener\n" ); |
1116 | siw_cep_put(cep: cep->listen_cep); |
1117 | cep->listen_cep = NULL; |
1118 | } |
1119 | } |
1120 | } |
1121 | release_cep = 1; |
1122 | break; |
1123 | |
1124 | case SIW_CM_WORK_MPATIMEOUT: |
1125 | cep->mpa_timer = NULL; |
1126 | |
1127 | if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { |
1128 | /* |
1129 | * MPA request timed out: |
1130 | * Hide any partially received private data and signal |
1131 | * timeout |
1132 | */ |
1133 | cep->mpa.hdr.params.pd_len = 0; |
1134 | |
1135 | if (cep->cm_id) |
1136 | siw_cm_upcall(cep, reason: IW_CM_EVENT_CONNECT_REPLY, |
1137 | status: -ETIMEDOUT); |
1138 | release_cep = 1; |
1139 | |
1140 | } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { |
1141 | /* |
1142 | * No MPA request received after peer TCP stream setup. |
1143 | */ |
1144 | if (cep->listen_cep) { |
1145 | siw_cep_put(cep: cep->listen_cep); |
1146 | cep->listen_cep = NULL; |
1147 | } |
1148 | release_cep = 1; |
1149 | } |
1150 | break; |
1151 | |
1152 | default: |
1153 | WARN(1, "Undefined CM work type: %d\n" , work->type); |
1154 | } |
1155 | if (release_cep) { |
1156 | siw_dbg_cep(cep, |
1157 | "release: timer=%s, QP[%u]\n" , |
1158 | cep->mpa_timer ? "y" : "n" , |
1159 | cep->qp ? qp_id(cep->qp) : UINT_MAX); |
1160 | |
1161 | siw_cancel_mpatimer(cep); |
1162 | |
1163 | cep->state = SIW_EPSTATE_CLOSED; |
1164 | |
1165 | if (cep->qp) { |
1166 | struct siw_qp *qp = cep->qp; |
1167 | /* |
1168 | * Serialize a potential race with application |
1169 | * closing the QP and calling siw_qp_cm_drop() |
1170 | */ |
1171 | siw_qp_get(qp); |
1172 | siw_cep_set_free(cep); |
1173 | |
1174 | siw_qp_llp_close(qp); |
1175 | siw_qp_put(qp); |
1176 | |
1177 | siw_cep_set_inuse(cep); |
1178 | cep->qp = NULL; |
1179 | siw_qp_put(qp); |
1180 | } |
1181 | if (cep->sock) { |
1182 | siw_socket_disassoc(s: cep->sock); |
1183 | sock_release(sock: cep->sock); |
1184 | cep->sock = NULL; |
1185 | } |
1186 | if (cep->cm_id) { |
1187 | siw_free_cm_id(cep); |
1188 | siw_cep_put(cep); |
1189 | } |
1190 | } |
1191 | siw_cep_set_free(cep); |
1192 | siw_put_work(work); |
1193 | siw_cep_put(cep); |
1194 | } |
1195 | |
1196 | static struct workqueue_struct *siw_cm_wq; |
1197 | |
1198 | int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) |
1199 | { |
1200 | struct siw_cm_work *work = siw_get_work(cep); |
1201 | unsigned long delay = 0; |
1202 | |
1203 | if (!work) { |
1204 | siw_dbg_cep(cep, "failed with no work available\n" ); |
1205 | return -ENOMEM; |
1206 | } |
1207 | work->type = type; |
1208 | work->cep = cep; |
1209 | |
1210 | siw_cep_get(cep); |
1211 | |
1212 | INIT_DELAYED_WORK(&work->work, siw_cm_work_handler); |
1213 | |
1214 | if (type == SIW_CM_WORK_MPATIMEOUT) { |
1215 | cep->mpa_timer = work; |
1216 | |
1217 | if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) |
1218 | delay = MPAREQ_TIMEOUT; |
1219 | else |
1220 | delay = MPAREP_TIMEOUT; |
1221 | } |
1222 | siw_dbg_cep(cep, "[QP %u]: work type: %d, timeout %lu\n" , |
1223 | cep->qp ? qp_id(cep->qp) : -1, type, delay); |
1224 | |
1225 | queue_delayed_work(wq: siw_cm_wq, dwork: &work->work, delay); |
1226 | |
1227 | return 0; |
1228 | } |
1229 | |
1230 | static void siw_cm_llp_data_ready(struct sock *sk) |
1231 | { |
1232 | struct siw_cep *cep; |
1233 | |
1234 | trace_sk_data_ready(sk); |
1235 | |
1236 | read_lock(&sk->sk_callback_lock); |
1237 | |
1238 | cep = sk_to_cep(sk); |
1239 | if (!cep) |
1240 | goto out; |
1241 | |
1242 | siw_dbg_cep(cep, "cep state: %d, socket state %d\n" , |
1243 | cep->state, sk->sk_state); |
1244 | |
1245 | if (sk->sk_state != TCP_ESTABLISHED) |
1246 | goto out; |
1247 | |
1248 | switch (cep->state) { |
1249 | case SIW_EPSTATE_RDMA_MODE: |
1250 | case SIW_EPSTATE_LISTENING: |
1251 | break; |
1252 | |
1253 | case SIW_EPSTATE_AWAIT_MPAREQ: |
1254 | case SIW_EPSTATE_AWAIT_MPAREP: |
1255 | siw_cm_queue_work(cep, type: SIW_CM_WORK_READ_MPAHDR); |
1256 | break; |
1257 | |
1258 | default: |
1259 | siw_dbg_cep(cep, "unexpected data, state %d\n" , cep->state); |
1260 | break; |
1261 | } |
1262 | out: |
1263 | read_unlock(&sk->sk_callback_lock); |
1264 | } |
1265 | |
1266 | static void siw_cm_llp_write_space(struct sock *sk) |
1267 | { |
1268 | struct siw_cep *cep = sk_to_cep(sk); |
1269 | |
1270 | if (cep) |
1271 | siw_dbg_cep(cep, "state: %d\n" , cep->state); |
1272 | } |
1273 | |
1274 | static void siw_cm_llp_error_report(struct sock *sk) |
1275 | { |
1276 | struct siw_cep *cep = sk_to_cep(sk); |
1277 | |
1278 | if (cep) { |
1279 | siw_dbg_cep(cep, "error %d, socket state: %d, cep state: %d\n" , |
1280 | sk->sk_err, sk->sk_state, cep->state); |
1281 | cep->sk_error_report(sk); |
1282 | } |
1283 | } |
1284 | |
1285 | static void siw_cm_llp_state_change(struct sock *sk) |
1286 | { |
1287 | struct siw_cep *cep; |
1288 | void (*orig_state_change)(struct sock *s); |
1289 | |
1290 | read_lock(&sk->sk_callback_lock); |
1291 | |
1292 | cep = sk_to_cep(sk); |
1293 | if (!cep) { |
1294 | /* endpoint already disassociated */ |
1295 | read_unlock(&sk->sk_callback_lock); |
1296 | return; |
1297 | } |
1298 | orig_state_change = cep->sk_state_change; |
1299 | |
1300 | siw_dbg_cep(cep, "state: %d\n" , cep->state); |
1301 | |
1302 | switch (sk->sk_state) { |
1303 | case TCP_ESTABLISHED: |
1304 | /* |
1305 | * handle accepting socket as special case where only |
1306 | * new connection is possible |
1307 | */ |
1308 | siw_cm_queue_work(cep, type: SIW_CM_WORK_ACCEPT); |
1309 | break; |
1310 | |
1311 | case TCP_CLOSE: |
1312 | case TCP_CLOSE_WAIT: |
1313 | if (cep->qp) |
1314 | cep->qp->tx_ctx.tx_suspend = 1; |
1315 | siw_cm_queue_work(cep, type: SIW_CM_WORK_PEER_CLOSE); |
1316 | break; |
1317 | |
1318 | default: |
1319 | siw_dbg_cep(cep, "unexpected socket state %d\n" , sk->sk_state); |
1320 | } |
1321 | read_unlock(&sk->sk_callback_lock); |
1322 | orig_state_change(sk); |
1323 | } |
1324 | |
1325 | static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, |
1326 | struct sockaddr *raddr, bool afonly) |
1327 | { |
1328 | int rv, flags = 0; |
1329 | size_t size = laddr->sa_family == AF_INET ? |
1330 | sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); |
1331 | |
1332 | /* |
1333 | * Make address available again asap. |
1334 | */ |
1335 | sock_set_reuseaddr(sk: s->sk); |
1336 | |
1337 | if (afonly) { |
1338 | rv = ip6_sock_set_v6only(sk: s->sk); |
1339 | if (rv) |
1340 | return rv; |
1341 | } |
1342 | |
1343 | rv = s->ops->bind(s, laddr, size); |
1344 | if (rv < 0) |
1345 | return rv; |
1346 | |
1347 | rv = s->ops->connect(s, raddr, size, flags); |
1348 | |
1349 | return rv < 0 ? rv : 0; |
1350 | } |
1351 | |
1352 | int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) |
1353 | { |
1354 | struct siw_device *sdev = to_siw_dev(base_dev: id->device); |
1355 | struct siw_qp *qp; |
1356 | struct siw_cep *cep = NULL; |
1357 | struct socket *s = NULL; |
1358 | struct sockaddr *laddr = (struct sockaddr *)&id->local_addr, |
1359 | *raddr = (struct sockaddr *)&id->remote_addr; |
1360 | bool p2p_mode = peer_to_peer, v4 = true; |
1361 | u16 pd_len = params->private_data_len; |
1362 | int version = mpa_version, rv; |
1363 | |
1364 | if (pd_len > MPA_MAX_PRIVDATA) |
1365 | return -EINVAL; |
1366 | |
1367 | if (params->ird > sdev->attrs.max_ird || |
1368 | params->ord > sdev->attrs.max_ord) |
1369 | return -ENOMEM; |
1370 | |
1371 | if (laddr->sa_family == AF_INET6) |
1372 | v4 = false; |
1373 | else if (laddr->sa_family != AF_INET) |
1374 | return -EAFNOSUPPORT; |
1375 | |
1376 | /* |
1377 | * Respect any iwarp port mapping: Use mapped remote address |
1378 | * if valid. Local address must not be mapped, since siw |
1379 | * uses kernel TCP stack. |
1380 | */ |
1381 | if ((v4 && to_sockaddr_in(id->remote_addr).sin_port != 0) || |
1382 | to_sockaddr_in6(id->remote_addr).sin6_port != 0) |
1383 | raddr = (struct sockaddr *)&id->m_remote_addr; |
1384 | |
1385 | qp = siw_qp_id2obj(sdev, id: params->qpn); |
1386 | if (!qp) { |
1387 | WARN(1, "[QP %u] does not exist\n" , params->qpn); |
1388 | rv = -EINVAL; |
1389 | goto error; |
1390 | } |
1391 | siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n" , pd_len, laddr, |
1392 | raddr); |
1393 | |
1394 | rv = sock_create(family: v4 ? AF_INET : AF_INET6, type: SOCK_STREAM, IPPROTO_TCP, res: &s); |
1395 | if (rv < 0) |
1396 | goto error; |
1397 | |
1398 | /* |
1399 | * NOTE: For simplification, connect() is called in blocking |
1400 | * mode. Might be reconsidered for async connection setup at |
1401 | * TCP level. |
1402 | */ |
1403 | rv = kernel_bindconnect(s, laddr, raddr, afonly: id->afonly); |
1404 | if (rv != 0) { |
1405 | siw_dbg_qp(qp, "kernel_bindconnect: error %d\n" , rv); |
1406 | goto error; |
1407 | } |
1408 | if (siw_tcp_nagle == false) |
1409 | tcp_sock_set_nodelay(sk: s->sk); |
1410 | cep = siw_cep_alloc(sdev); |
1411 | if (!cep) { |
1412 | rv = -ENOMEM; |
1413 | goto error; |
1414 | } |
1415 | siw_cep_set_inuse(cep); |
1416 | |
1417 | /* Associate QP with CEP */ |
1418 | siw_cep_get(cep); |
1419 | qp->cep = cep; |
1420 | |
1421 | /* siw_qp_get(qp) already done by QP lookup */ |
1422 | cep->qp = qp; |
1423 | |
1424 | id->add_ref(id); |
1425 | cep->cm_id = id; |
1426 | |
1427 | /* |
1428 | * 4: Allocate a sufficient number of work elements |
1429 | * to allow concurrent handling of local + peer close |
1430 | * events, MPA header processing + MPA timeout. |
1431 | */ |
1432 | rv = siw_cm_alloc_work(cep, num: 4); |
1433 | if (rv != 0) { |
1434 | rv = -ENOMEM; |
1435 | goto error; |
1436 | } |
1437 | cep->ird = params->ird; |
1438 | cep->ord = params->ord; |
1439 | |
1440 | if (p2p_mode && cep->ord == 0) |
1441 | cep->ord = 1; |
1442 | |
1443 | cep->state = SIW_EPSTATE_CONNECTING; |
1444 | |
1445 | /* |
1446 | * Associate CEP with socket |
1447 | */ |
1448 | siw_cep_socket_assoc(cep, s); |
1449 | |
1450 | cep->state = SIW_EPSTATE_AWAIT_MPAREP; |
1451 | |
1452 | /* |
1453 | * Set MPA Request bits: CRC if required, no MPA Markers, |
1454 | * MPA Rev. according to module parameter 'mpa_version', Key 'Request'. |
1455 | */ |
1456 | cep->mpa.hdr.params.bits = 0; |
1457 | if (version > MPA_REVISION_2) { |
1458 | pr_warn("Setting MPA version to %u\n" , MPA_REVISION_2); |
1459 | version = MPA_REVISION_2; |
1460 | /* Adjust also module parameter */ |
1461 | mpa_version = MPA_REVISION_2; |
1462 | } |
1463 | __mpa_rr_set_revision(bits: &cep->mpa.hdr.params.bits, rev: version); |
1464 | |
1465 | if (try_gso) |
1466 | cep->mpa.hdr.params.bits |= MPA_RR_FLAG_GSO_EXP; |
1467 | |
1468 | if (mpa_crc_required) |
1469 | cep->mpa.hdr.params.bits |= MPA_RR_FLAG_CRC; |
1470 | |
1471 | /* |
1472 | * If MPA version == 2: |
1473 | * o Include ORD and IRD. |
1474 | * o Indicate peer-to-peer mode, if required by module |
1475 | * parameter 'peer_to_peer'. |
1476 | */ |
1477 | if (version == MPA_REVISION_2) { |
1478 | cep->enhanced_rdma_conn_est = true; |
1479 | cep->mpa.hdr.params.bits |= MPA_RR_FLAG_ENHANCED; |
1480 | |
1481 | cep->mpa.v2_ctrl.ird = htons(cep->ird); |
1482 | cep->mpa.v2_ctrl.ord = htons(cep->ord); |
1483 | |
1484 | if (p2p_mode) { |
1485 | cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; |
1486 | cep->mpa.v2_ctrl.ord |= rtr_type; |
1487 | } |
1488 | /* Remember own P2P mode requested */ |
1489 | cep->mpa.v2_ctrl_req.ird = cep->mpa.v2_ctrl.ird; |
1490 | cep->mpa.v2_ctrl_req.ord = cep->mpa.v2_ctrl.ord; |
1491 | } |
1492 | memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, 16); |
1493 | |
1494 | rv = siw_send_mpareqrep(cep, pdata: params->private_data, pd_len); |
1495 | /* |
1496 | * Reset private data. |
1497 | */ |
1498 | cep->mpa.hdr.params.pd_len = 0; |
1499 | |
1500 | if (rv >= 0) { |
1501 | rv = siw_cm_queue_work(cep, type: SIW_CM_WORK_MPATIMEOUT); |
1502 | if (!rv) { |
1503 | siw_dbg_cep(cep, "[QP %u]: exit\n" , qp_id(qp)); |
1504 | siw_cep_set_free(cep); |
1505 | return 0; |
1506 | } |
1507 | } |
1508 | error: |
1509 | siw_dbg(id->device, "failed: %d\n" , rv); |
1510 | |
1511 | if (cep) { |
1512 | siw_socket_disassoc(s); |
1513 | sock_release(sock: s); |
1514 | cep->sock = NULL; |
1515 | |
1516 | cep->qp = NULL; |
1517 | |
1518 | cep->cm_id = NULL; |
1519 | id->rem_ref(id); |
1520 | |
1521 | qp->cep = NULL; |
1522 | siw_cep_put(cep); |
1523 | |
1524 | cep->state = SIW_EPSTATE_CLOSED; |
1525 | |
1526 | siw_cep_set_free_and_put(cep); |
1527 | |
1528 | } else if (s) { |
1529 | sock_release(sock: s); |
1530 | } |
1531 | if (qp) |
1532 | siw_qp_put(qp); |
1533 | |
1534 | return rv; |
1535 | } |
1536 | |
1537 | /* |
1538 | * siw_accept - Let SoftiWARP accept an RDMA connection request |
1539 | * |
1540 | * @id: New connection management id to be used for accepted |
1541 | * connection request |
1542 | * @params: Connection parameters provided by ULP for accepting connection |
1543 | * |
1544 | * Transition QP to RTS state, associate new CM id @id with accepted CEP |
1545 | * and get prepared for TCP input by installing socket callbacks. |
1546 | * Then send MPA Reply and generate the "connection established" event. |
1547 | * Socket callbacks must be installed before sending MPA Reply, because |
1548 | * the latter may cause a first RDMA message to arrive from the RDMA Initiator |
1549 | * side very quickly, at which time the socket callbacks must be ready. |
1550 | */ |
1551 | int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) |
1552 | { |
1553 | struct siw_device *sdev = to_siw_dev(base_dev: id->device); |
1554 | struct siw_cep *cep = (struct siw_cep *)id->provider_data; |
1555 | struct siw_qp *qp; |
1556 | struct siw_qp_attrs qp_attrs; |
1557 | int rv = -EINVAL, max_priv_data = MPA_MAX_PRIVDATA; |
1558 | bool wait_for_peer_rts = false; |
1559 | |
1560 | siw_cep_set_inuse(cep); |
1561 | siw_cep_put(cep); |
1562 | |
1563 | /* Free lingering inbound private data */ |
1564 | if (cep->mpa.hdr.params.pd_len) { |
1565 | cep->mpa.hdr.params.pd_len = 0; |
1566 | kfree(objp: cep->mpa.pdata); |
1567 | cep->mpa.pdata = NULL; |
1568 | } |
1569 | siw_cancel_mpatimer(cep); |
1570 | |
1571 | if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { |
1572 | siw_dbg_cep(cep, "out of state\n" ); |
1573 | rv = -ECONNRESET; |
1574 | goto free_cep; |
1575 | } |
1576 | qp = siw_qp_id2obj(sdev, id: params->qpn); |
1577 | if (!qp) { |
1578 | WARN(1, "[QP %d] does not exist\n" , params->qpn); |
1579 | goto free_cep; |
1580 | } |
1581 | down_write(sem: &qp->state_lock); |
1582 | if (qp->attrs.state > SIW_QP_STATE_RTR) |
1583 | goto error_unlock; |
1584 | siw_dbg_cep(cep, "[QP %d]\n" , params->qpn); |
1585 | |
1586 | if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { |
1587 | siw_dbg_cep(cep, "peer allows GSO on TX\n" ); |
1588 | qp->tx_ctx.gso_seg_limit = 0; |
1589 | } |
1590 | if (params->ord > sdev->attrs.max_ord || |
1591 | params->ird > sdev->attrs.max_ird) { |
1592 | siw_dbg_cep( |
1593 | cep, |
1594 | "[QP %u]: ord %d (max %d), ird %d (max %d)\n" , |
1595 | qp_id(qp), params->ord, sdev->attrs.max_ord, |
1596 | params->ird, sdev->attrs.max_ird); |
1597 | goto error_unlock; |
1598 | } |
1599 | if (cep->enhanced_rdma_conn_est) |
1600 | max_priv_data -= sizeof(struct mpa_v2_data); |
1601 | |
1602 | if (params->private_data_len > max_priv_data) { |
1603 | siw_dbg_cep( |
1604 | cep, |
1605 | "[QP %u]: private data length: %d (max %d)\n" , |
1606 | qp_id(qp), params->private_data_len, max_priv_data); |
1607 | goto error_unlock; |
1608 | } |
1609 | if (cep->enhanced_rdma_conn_est) { |
1610 | if (params->ord > cep->ord) { |
1611 | if (relaxed_ird_negotiation) { |
1612 | params->ord = cep->ord; |
1613 | } else { |
1614 | cep->ird = params->ird; |
1615 | cep->ord = params->ord; |
1616 | goto error_unlock; |
1617 | } |
1618 | } |
1619 | if (params->ird < cep->ird) { |
1620 | if (relaxed_ird_negotiation && |
1621 | cep->ird <= sdev->attrs.max_ird) |
1622 | params->ird = cep->ird; |
1623 | else { |
1624 | rv = -ENOMEM; |
1625 | goto error_unlock; |
1626 | } |
1627 | } |
1628 | if (cep->mpa.v2_ctrl.ord & |
1629 | (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR)) |
1630 | wait_for_peer_rts = true; |
1631 | /* |
1632 | * Signal back negotiated IRD and ORD values |
1633 | */ |
1634 | cep->mpa.v2_ctrl.ord = |
1635 | htons(params->ord & MPA_IRD_ORD_MASK) | |
1636 | (cep->mpa.v2_ctrl.ord & ~MPA_V2_MASK_IRD_ORD); |
1637 | cep->mpa.v2_ctrl.ird = |
1638 | htons(params->ird & MPA_IRD_ORD_MASK) | |
1639 | (cep->mpa.v2_ctrl.ird & ~MPA_V2_MASK_IRD_ORD); |
1640 | } |
1641 | cep->ird = params->ird; |
1642 | cep->ord = params->ord; |
1643 | |
1644 | cep->cm_id = id; |
1645 | id->add_ref(id); |
1646 | |
1647 | memset(&qp_attrs, 0, sizeof(qp_attrs)); |
1648 | qp_attrs.orq_size = cep->ord; |
1649 | qp_attrs.irq_size = cep->ird; |
1650 | qp_attrs.sk = cep->sock; |
1651 | if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC) |
1652 | qp_attrs.flags = SIW_MPA_CRC; |
1653 | qp_attrs.state = SIW_QP_STATE_RTS; |
1654 | |
1655 | siw_dbg_cep(cep, "[QP%u]: moving to rts\n" , qp_id(qp)); |
1656 | |
1657 | /* Associate QP with CEP */ |
1658 | siw_cep_get(cep); |
1659 | qp->cep = cep; |
1660 | |
1661 | /* siw_qp_get(qp) already done by QP lookup */ |
1662 | cep->qp = qp; |
1663 | |
1664 | cep->state = SIW_EPSTATE_RDMA_MODE; |
1665 | |
1666 | /* Move socket RX/TX under QP control */ |
1667 | rv = siw_qp_modify(qp, attr: &qp_attrs, |
1668 | mask: SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | |
1669 | SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | |
1670 | SIW_QP_ATTR_MPA); |
1671 | up_write(sem: &qp->state_lock); |
1672 | if (rv) |
1673 | goto error; |
1674 | |
1675 | siw_dbg_cep(cep, "[QP %u]: send mpa reply, %d byte pdata\n" , |
1676 | qp_id(qp), params->private_data_len); |
1677 | |
1678 | rv = siw_send_mpareqrep(cep, pdata: params->private_data, |
1679 | pd_len: params->private_data_len); |
1680 | if (rv != 0) |
1681 | goto error; |
1682 | |
1683 | if (wait_for_peer_rts) { |
1684 | siw_sk_assign_rtr_upcalls(cep); |
1685 | } else { |
1686 | siw_qp_socket_assoc(cep, qp); |
1687 | rv = siw_cm_upcall(cep, reason: IW_CM_EVENT_ESTABLISHED, status: 0); |
1688 | if (rv) |
1689 | goto error; |
1690 | } |
1691 | siw_cep_set_free(cep); |
1692 | |
1693 | return 0; |
1694 | |
1695 | error_unlock: |
1696 | up_write(sem: &qp->state_lock); |
1697 | error: |
1698 | siw_destroy_cep_sock(cep); |
1699 | |
1700 | cep->state = SIW_EPSTATE_CLOSED; |
1701 | |
1702 | siw_free_cm_id(cep); |
1703 | if (qp->cep) { |
1704 | siw_cep_put(cep); |
1705 | qp->cep = NULL; |
1706 | } |
1707 | cep->qp = NULL; |
1708 | siw_qp_put(qp); |
1709 | free_cep: |
1710 | siw_cep_set_free_and_put(cep); |
1711 | return rv; |
1712 | } |
1713 | |
1714 | /* |
1715 | * siw_reject() |
1716 | * |
1717 | * Local connection reject case. Send private data back to peer, |
1718 | * close connection and dereference connection id. |
1719 | */ |
1720 | int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) |
1721 | { |
1722 | struct siw_cep *cep = (struct siw_cep *)id->provider_data; |
1723 | |
1724 | siw_cep_set_inuse(cep); |
1725 | siw_cep_put(cep); |
1726 | |
1727 | siw_cancel_mpatimer(cep); |
1728 | |
1729 | if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { |
1730 | siw_dbg_cep(cep, "out of state\n" ); |
1731 | |
1732 | siw_cep_set_free_and_put(cep); /* put last reference */ |
1733 | |
1734 | return -ECONNRESET; |
1735 | } |
1736 | siw_dbg_cep(cep, "cep->state %d, pd_len %d\n" , cep->state, |
1737 | pd_len); |
1738 | |
1739 | if (__mpa_rr_revision(mpa_rr_bits: cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { |
1740 | cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ |
1741 | siw_send_mpareqrep(cep, pdata, pd_len); |
1742 | } |
1743 | siw_destroy_cep_sock(cep); |
1744 | |
1745 | cep->state = SIW_EPSTATE_CLOSED; |
1746 | |
1747 | siw_cep_set_free_and_put(cep); |
1748 | |
1749 | return 0; |
1750 | } |
1751 | |
1752 | /* |
1753 | * siw_create_listen - Create resources for a listener's IWCM ID @id |
1754 | * |
1755 | * Starts listen on the socket address id->local_addr. |
1756 | * |
1757 | */ |
1758 | int siw_create_listen(struct iw_cm_id *id, int backlog) |
1759 | { |
1760 | struct socket *s; |
1761 | struct siw_cep *cep = NULL; |
1762 | struct siw_device *sdev = to_siw_dev(base_dev: id->device); |
1763 | int addr_family = id->local_addr.ss_family; |
1764 | int rv = 0; |
1765 | |
1766 | if (addr_family != AF_INET && addr_family != AF_INET6) |
1767 | return -EAFNOSUPPORT; |
1768 | |
1769 | rv = sock_create(family: addr_family, type: SOCK_STREAM, IPPROTO_TCP, res: &s); |
1770 | if (rv < 0) |
1771 | return rv; |
1772 | |
1773 | /* |
1774 | * Allow binding local port when still in TIME_WAIT from last close. |
1775 | */ |
1776 | sock_set_reuseaddr(sk: s->sk); |
1777 | |
1778 | if (addr_family == AF_INET) { |
1779 | struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); |
1780 | |
1781 | /* For wildcard addr, limit binding to current device only */ |
1782 | if (ipv4_is_zeronet(addr: laddr->sin_addr.s_addr)) |
1783 | s->sk->sk_bound_dev_if = sdev->netdev->ifindex; |
1784 | |
1785 | rv = s->ops->bind(s, (struct sockaddr *)laddr, |
1786 | sizeof(struct sockaddr_in)); |
1787 | } else { |
1788 | struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); |
1789 | |
1790 | if (id->afonly) { |
1791 | rv = ip6_sock_set_v6only(sk: s->sk); |
1792 | if (rv) { |
1793 | siw_dbg(id->device, |
1794 | "ip6_sock_set_v6only erro: %d\n" , rv); |
1795 | goto error; |
1796 | } |
1797 | } |
1798 | |
1799 | /* For wildcard addr, limit binding to current device only */ |
1800 | if (ipv6_addr_any(a: &laddr->sin6_addr)) |
1801 | s->sk->sk_bound_dev_if = sdev->netdev->ifindex; |
1802 | |
1803 | rv = s->ops->bind(s, (struct sockaddr *)laddr, |
1804 | sizeof(struct sockaddr_in6)); |
1805 | } |
1806 | if (rv) { |
1807 | siw_dbg(id->device, "socket bind error: %d\n" , rv); |
1808 | goto error; |
1809 | } |
1810 | cep = siw_cep_alloc(sdev); |
1811 | if (!cep) { |
1812 | rv = -ENOMEM; |
1813 | goto error; |
1814 | } |
1815 | siw_cep_socket_assoc(cep, s); |
1816 | |
1817 | rv = siw_cm_alloc_work(cep, num: backlog); |
1818 | if (rv) { |
1819 | siw_dbg(id->device, |
1820 | "alloc_work error %d, backlog %d\n" , |
1821 | rv, backlog); |
1822 | goto error; |
1823 | } |
1824 | rv = s->ops->listen(s, backlog); |
1825 | if (rv) { |
1826 | siw_dbg(id->device, "listen error %d\n" , rv); |
1827 | goto error; |
1828 | } |
1829 | cep->cm_id = id; |
1830 | id->add_ref(id); |
1831 | |
1832 | /* |
1833 | * In case of a wildcard rdma_listen on a multi-homed device, |
1834 | * a listener's IWCM id is associated with more than one listening CEP. |
1835 | * |
1836 | * We currently use id->provider_data in three different ways: |
1837 | * |
1838 | * o For a listener's IWCM id, id->provider_data points to |
1839 | * the list_head of the list of listening CEPs. |
1840 | * Uses: siw_create_listen(), siw_destroy_listen() |
1841 | * |
1842 | * o For each accepted passive-side IWCM id, id->provider_data |
1843 | * points to the CEP itself. This is a consequence of |
1844 | * - siw_cm_upcall() setting event.provider_data = cep and |
1845 | * - the IWCM's cm_conn_req_handler() setting provider_data of the |
1846 | * new passive-side IWCM id equal to event.provider_data |
1847 | * Uses: siw_accept(), siw_reject() |
1848 | * |
1849 | * o For an active-side IWCM id, id->provider_data is not used at all. |
1850 | * |
1851 | */ |
1852 | if (!id->provider_data) { |
1853 | id->provider_data = |
1854 | kmalloc(size: sizeof(struct list_head), GFP_KERNEL); |
1855 | if (!id->provider_data) { |
1856 | rv = -ENOMEM; |
1857 | goto error; |
1858 | } |
1859 | INIT_LIST_HEAD(list: (struct list_head *)id->provider_data); |
1860 | } |
1861 | list_add_tail(new: &cep->listenq, head: (struct list_head *)id->provider_data); |
1862 | cep->state = SIW_EPSTATE_LISTENING; |
1863 | |
1864 | siw_dbg(id->device, "Listen at laddr %pISp\n" , &id->local_addr); |
1865 | |
1866 | return 0; |
1867 | |
1868 | error: |
1869 | siw_dbg(id->device, "failed: %d\n" , rv); |
1870 | |
1871 | if (cep) { |
1872 | siw_cep_set_inuse(cep); |
1873 | |
1874 | siw_free_cm_id(cep); |
1875 | cep->sock = NULL; |
1876 | siw_socket_disassoc(s); |
1877 | cep->state = SIW_EPSTATE_CLOSED; |
1878 | |
1879 | siw_cep_set_free_and_put(cep); |
1880 | } |
1881 | sock_release(sock: s); |
1882 | |
1883 | return rv; |
1884 | } |
1885 | |
1886 | static void siw_drop_listeners(struct iw_cm_id *id) |
1887 | { |
1888 | struct list_head *p, *tmp; |
1889 | |
1890 | /* |
1891 | * In case of a wildcard rdma_listen on a multi-homed device, |
1892 | * a listener's IWCM id is associated with more than one listening CEP. |
1893 | */ |
1894 | list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) { |
1895 | struct siw_cep *cep = list_entry(p, struct siw_cep, listenq); |
1896 | |
1897 | list_del(entry: p); |
1898 | |
1899 | siw_dbg_cep(cep, "drop cep, state %d\n" , cep->state); |
1900 | |
1901 | siw_cep_set_inuse(cep); |
1902 | |
1903 | siw_free_cm_id(cep); |
1904 | if (cep->sock) { |
1905 | siw_socket_disassoc(s: cep->sock); |
1906 | sock_release(sock: cep->sock); |
1907 | cep->sock = NULL; |
1908 | } |
1909 | cep->state = SIW_EPSTATE_CLOSED; |
1910 | siw_cep_set_free_and_put(cep); |
1911 | } |
1912 | } |
1913 | |
1914 | int siw_destroy_listen(struct iw_cm_id *id) |
1915 | { |
1916 | if (!id->provider_data) { |
1917 | siw_dbg(id->device, "no cep(s)\n" ); |
1918 | return 0; |
1919 | } |
1920 | siw_drop_listeners(id); |
1921 | kfree(objp: id->provider_data); |
1922 | id->provider_data = NULL; |
1923 | |
1924 | return 0; |
1925 | } |
1926 | |
1927 | int siw_cm_init(void) |
1928 | { |
1929 | /* |
1930 | * create_single_workqueue for strict ordering |
1931 | */ |
1932 | siw_cm_wq = create_singlethread_workqueue("siw_cm_wq" ); |
1933 | if (!siw_cm_wq) |
1934 | return -ENOMEM; |
1935 | |
1936 | return 0; |
1937 | } |
1938 | |
1939 | void siw_cm_exit(void) |
1940 | { |
1941 | if (siw_cm_wq) |
1942 | destroy_workqueue(wq: siw_cm_wq); |
1943 | } |
1944 | |