1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
4 | * |
5 | * Socket Closing - normal and abnormal |
6 | * |
7 | * Copyright IBM Corp. 2016 |
8 | * |
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
10 | */ |
11 | |
12 | #include <linux/workqueue.h> |
13 | #include <linux/sched/signal.h> |
14 | |
15 | #include <net/sock.h> |
16 | #include <net/tcp.h> |
17 | |
18 | #include "smc.h" |
19 | #include "smc_tx.h" |
20 | #include "smc_cdc.h" |
21 | #include "smc_close.h" |
22 | |
23 | /* release the clcsock that is assigned to the smc_sock */ |
24 | void smc_clcsock_release(struct smc_sock *smc) |
25 | { |
26 | struct socket *tcp; |
27 | |
28 | if (smc->listen_smc && current_work() != &smc->smc_listen_work) |
29 | cancel_work_sync(work: &smc->smc_listen_work); |
30 | mutex_lock(&smc->clcsock_release_lock); |
31 | if (smc->clcsock) { |
32 | tcp = smc->clcsock; |
33 | smc->clcsock = NULL; |
34 | sock_release(sock: tcp); |
35 | } |
36 | mutex_unlock(lock: &smc->clcsock_release_lock); |
37 | } |
38 | |
39 | static void smc_close_cleanup_listen(struct sock *parent) |
40 | { |
41 | struct sock *sk; |
42 | |
43 | /* Close non-accepted connections */ |
44 | while ((sk = smc_accept_dequeue(parent, NULL))) |
45 | smc_close_non_accepted(sk); |
46 | } |
47 | |
48 | /* wait for sndbuf data being transmitted */ |
49 | static void smc_close_stream_wait(struct smc_sock *smc, long timeout) |
50 | { |
51 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
52 | struct sock *sk = &smc->sk; |
53 | |
54 | if (!timeout) |
55 | return; |
56 | |
57 | if (!smc_tx_prepared_sends(conn: &smc->conn)) |
58 | return; |
59 | |
60 | /* Send out corked data remaining in sndbuf */ |
61 | smc_tx_pending(conn: &smc->conn); |
62 | |
63 | smc->wait_close_tx_prepared = 1; |
64 | add_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait); |
65 | while (!signal_pending(current) && timeout) { |
66 | int rc; |
67 | |
68 | rc = sk_wait_event(sk, &timeout, |
69 | !smc_tx_prepared_sends(&smc->conn) || |
70 | READ_ONCE(sk->sk_err) == ECONNABORTED || |
71 | READ_ONCE(sk->sk_err) == ECONNRESET || |
72 | smc->conn.killed, |
73 | &wait); |
74 | if (rc) |
75 | break; |
76 | } |
77 | remove_wait_queue(wq_head: sk_sleep(sk), wq_entry: &wait); |
78 | smc->wait_close_tx_prepared = 0; |
79 | } |
80 | |
81 | void smc_close_wake_tx_prepared(struct smc_sock *smc) |
82 | { |
83 | if (smc->wait_close_tx_prepared) |
84 | /* wake up socket closing */ |
85 | smc->sk.sk_state_change(&smc->sk); |
86 | } |
87 | |
88 | static int smc_close_wr(struct smc_connection *conn) |
89 | { |
90 | conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; |
91 | |
92 | return smc_cdc_get_slot_and_msg_send(conn); |
93 | } |
94 | |
95 | static int smc_close_final(struct smc_connection *conn) |
96 | { |
97 | if (atomic_read(v: &conn->bytes_to_rcv)) |
98 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
99 | else |
100 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; |
101 | if (conn->killed) |
102 | return -EPIPE; |
103 | |
104 | return smc_cdc_get_slot_and_msg_send(conn); |
105 | } |
106 | |
107 | int smc_close_abort(struct smc_connection *conn) |
108 | { |
109 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
110 | |
111 | return smc_cdc_get_slot_and_msg_send(conn); |
112 | } |
113 | |
114 | static void smc_close_cancel_work(struct smc_sock *smc) |
115 | { |
116 | struct sock *sk = &smc->sk; |
117 | |
118 | release_sock(sk); |
119 | cancel_work_sync(work: &smc->conn.close_work); |
120 | cancel_delayed_work_sync(dwork: &smc->conn.tx_work); |
121 | lock_sock(sk); |
122 | } |
123 | |
124 | /* terminate smc socket abnormally - active abort |
125 | * link group is terminated, i.e. RDMA communication no longer possible |
126 | */ |
127 | void smc_close_active_abort(struct smc_sock *smc) |
128 | { |
129 | struct sock *sk = &smc->sk; |
130 | bool release_clcsock = false; |
131 | |
132 | if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { |
133 | sk->sk_err = ECONNABORTED; |
134 | if (smc->clcsock && smc->clcsock->sk) |
135 | tcp_abort(sk: smc->clcsock->sk, ECONNABORTED); |
136 | } |
137 | switch (sk->sk_state) { |
138 | case SMC_ACTIVE: |
139 | case SMC_APPCLOSEWAIT1: |
140 | case SMC_APPCLOSEWAIT2: |
141 | sk->sk_state = SMC_PEERABORTWAIT; |
142 | smc_close_cancel_work(smc); |
143 | if (sk->sk_state != SMC_PEERABORTWAIT) |
144 | break; |
145 | sk->sk_state = SMC_CLOSED; |
146 | sock_put(sk); /* (postponed) passive closing */ |
147 | break; |
148 | case SMC_PEERCLOSEWAIT1: |
149 | case SMC_PEERCLOSEWAIT2: |
150 | case SMC_PEERFINCLOSEWAIT: |
151 | sk->sk_state = SMC_PEERABORTWAIT; |
152 | smc_close_cancel_work(smc); |
153 | if (sk->sk_state != SMC_PEERABORTWAIT) |
154 | break; |
155 | sk->sk_state = SMC_CLOSED; |
156 | smc_conn_free(conn: &smc->conn); |
157 | release_clcsock = true; |
158 | sock_put(sk); /* passive closing */ |
159 | break; |
160 | case SMC_PROCESSABORT: |
161 | case SMC_APPFINCLOSEWAIT: |
162 | sk->sk_state = SMC_PEERABORTWAIT; |
163 | smc_close_cancel_work(smc); |
164 | if (sk->sk_state != SMC_PEERABORTWAIT) |
165 | break; |
166 | sk->sk_state = SMC_CLOSED; |
167 | smc_conn_free(conn: &smc->conn); |
168 | release_clcsock = true; |
169 | break; |
170 | case SMC_INIT: |
171 | case SMC_PEERABORTWAIT: |
172 | case SMC_CLOSED: |
173 | break; |
174 | } |
175 | |
176 | sock_set_flag(sk, flag: SOCK_DEAD); |
177 | sk->sk_state_change(sk); |
178 | |
179 | if (release_clcsock) { |
180 | release_sock(sk); |
181 | smc_clcsock_release(smc); |
182 | lock_sock(sk); |
183 | } |
184 | } |
185 | |
186 | static inline bool smc_close_sent_any_close(struct smc_connection *conn) |
187 | { |
188 | return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || |
189 | conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; |
190 | } |
191 | |
192 | int smc_close_active(struct smc_sock *smc) |
193 | { |
194 | struct smc_cdc_conn_state_flags *txflags = |
195 | &smc->conn.local_tx_ctrl.conn_state_flags; |
196 | struct smc_connection *conn = &smc->conn; |
197 | struct sock *sk = &smc->sk; |
198 | int old_state; |
199 | long timeout; |
200 | int rc = 0; |
201 | int rc1 = 0; |
202 | |
203 | timeout = current->flags & PF_EXITING ? |
204 | 0 : sock_flag(sk, flag: SOCK_LINGER) ? |
205 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
206 | |
207 | old_state = sk->sk_state; |
208 | again: |
209 | switch (sk->sk_state) { |
210 | case SMC_INIT: |
211 | sk->sk_state = SMC_CLOSED; |
212 | break; |
213 | case SMC_LISTEN: |
214 | sk->sk_state = SMC_CLOSED; |
215 | sk->sk_state_change(sk); /* wake up accept */ |
216 | if (smc->clcsock && smc->clcsock->sk) { |
217 | write_lock_bh(&smc->clcsock->sk->sk_callback_lock); |
218 | smc_clcsock_restore_cb(target_cb: &smc->clcsock->sk->sk_data_ready, |
219 | saved_cb: &smc->clcsk_data_ready); |
220 | smc->clcsock->sk->sk_user_data = NULL; |
221 | write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); |
222 | rc = kernel_sock_shutdown(sock: smc->clcsock, how: SHUT_RDWR); |
223 | } |
224 | smc_close_cleanup_listen(parent: sk); |
225 | release_sock(sk); |
226 | flush_work(work: &smc->tcp_listen_work); |
227 | lock_sock(sk); |
228 | break; |
229 | case SMC_ACTIVE: |
230 | smc_close_stream_wait(smc, timeout); |
231 | release_sock(sk); |
232 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
233 | lock_sock(sk); |
234 | if (sk->sk_state == SMC_ACTIVE) { |
235 | /* send close request */ |
236 | rc = smc_close_final(conn); |
237 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
238 | |
239 | /* actively shutdown clcsock before peer close it, |
240 | * prevent peer from entering TIME_WAIT state. |
241 | */ |
242 | if (smc->clcsock && smc->clcsock->sk) { |
243 | rc1 = kernel_sock_shutdown(sock: smc->clcsock, |
244 | how: SHUT_RDWR); |
245 | rc = rc ? rc : rc1; |
246 | } |
247 | } else { |
248 | /* peer event has changed the state */ |
249 | goto again; |
250 | } |
251 | break; |
252 | case SMC_APPFINCLOSEWAIT: |
253 | /* socket already shutdown wr or both (active close) */ |
254 | if (txflags->peer_done_writing && |
255 | !smc_close_sent_any_close(conn)) { |
256 | /* just shutdown wr done, send close request */ |
257 | rc = smc_close_final(conn); |
258 | } |
259 | sk->sk_state = SMC_CLOSED; |
260 | break; |
261 | case SMC_APPCLOSEWAIT1: |
262 | case SMC_APPCLOSEWAIT2: |
263 | if (!smc_cdc_rxed_any_close(conn)) |
264 | smc_close_stream_wait(smc, timeout); |
265 | release_sock(sk); |
266 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
267 | lock_sock(sk); |
268 | if (sk->sk_state != SMC_APPCLOSEWAIT1 && |
269 | sk->sk_state != SMC_APPCLOSEWAIT2) |
270 | goto again; |
271 | /* confirm close from peer */ |
272 | rc = smc_close_final(conn); |
273 | if (smc_cdc_rxed_any_close(conn)) { |
274 | /* peer has closed the socket already */ |
275 | sk->sk_state = SMC_CLOSED; |
276 | sock_put(sk); /* postponed passive closing */ |
277 | } else { |
278 | /* peer has just issued a shutdown write */ |
279 | sk->sk_state = SMC_PEERFINCLOSEWAIT; |
280 | } |
281 | break; |
282 | case SMC_PEERCLOSEWAIT1: |
283 | case SMC_PEERCLOSEWAIT2: |
284 | if (txflags->peer_done_writing && |
285 | !smc_close_sent_any_close(conn)) { |
286 | /* just shutdown wr done, send close request */ |
287 | rc = smc_close_final(conn); |
288 | } |
289 | /* peer sending PeerConnectionClosed will cause transition */ |
290 | break; |
291 | case SMC_PEERFINCLOSEWAIT: |
292 | /* peer sending PeerConnectionClosed will cause transition */ |
293 | break; |
294 | case SMC_PROCESSABORT: |
295 | rc = smc_close_abort(conn); |
296 | sk->sk_state = SMC_CLOSED; |
297 | break; |
298 | case SMC_PEERABORTWAIT: |
299 | sk->sk_state = SMC_CLOSED; |
300 | break; |
301 | case SMC_CLOSED: |
302 | /* nothing to do, add tracing in future patch */ |
303 | break; |
304 | } |
305 | |
306 | if (old_state != sk->sk_state) |
307 | sk->sk_state_change(sk); |
308 | return rc; |
309 | } |
310 | |
311 | static void smc_close_passive_abort_received(struct smc_sock *smc) |
312 | { |
313 | struct smc_cdc_conn_state_flags *txflags = |
314 | &smc->conn.local_tx_ctrl.conn_state_flags; |
315 | struct sock *sk = &smc->sk; |
316 | |
317 | switch (sk->sk_state) { |
318 | case SMC_INIT: |
319 | case SMC_ACTIVE: |
320 | case SMC_APPCLOSEWAIT1: |
321 | sk->sk_state = SMC_PROCESSABORT; |
322 | sock_put(sk); /* passive closing */ |
323 | break; |
324 | case SMC_APPFINCLOSEWAIT: |
325 | sk->sk_state = SMC_PROCESSABORT; |
326 | break; |
327 | case SMC_PEERCLOSEWAIT1: |
328 | case SMC_PEERCLOSEWAIT2: |
329 | if (txflags->peer_done_writing && |
330 | !smc_close_sent_any_close(conn: &smc->conn)) |
331 | /* just shutdown, but not yet closed locally */ |
332 | sk->sk_state = SMC_PROCESSABORT; |
333 | else |
334 | sk->sk_state = SMC_CLOSED; |
335 | sock_put(sk); /* passive closing */ |
336 | break; |
337 | case SMC_APPCLOSEWAIT2: |
338 | case SMC_PEERFINCLOSEWAIT: |
339 | sk->sk_state = SMC_CLOSED; |
340 | sock_put(sk); /* passive closing */ |
341 | break; |
342 | case SMC_PEERABORTWAIT: |
343 | sk->sk_state = SMC_CLOSED; |
344 | break; |
345 | case SMC_PROCESSABORT: |
346 | /* nothing to do, add tracing in future patch */ |
347 | break; |
348 | } |
349 | } |
350 | |
351 | /* Either some kind of closing has been received: peer_conn_closed, |
352 | * peer_conn_abort, or peer_done_writing |
353 | * or the link group of the connection terminates abnormally. |
354 | */ |
355 | static void smc_close_passive_work(struct work_struct *work) |
356 | { |
357 | struct smc_connection *conn = container_of(work, |
358 | struct smc_connection, |
359 | close_work); |
360 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
361 | struct smc_cdc_conn_state_flags *rxflags; |
362 | bool release_clcsock = false; |
363 | struct sock *sk = &smc->sk; |
364 | int old_state; |
365 | |
366 | lock_sock(sk); |
367 | old_state = sk->sk_state; |
368 | |
369 | rxflags = &conn->local_rx_ctrl.conn_state_flags; |
370 | if (rxflags->peer_conn_abort) { |
371 | /* peer has not received all data */ |
372 | smc_close_passive_abort_received(smc); |
373 | release_sock(sk); |
374 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
375 | lock_sock(sk); |
376 | goto wakeup; |
377 | } |
378 | |
379 | switch (sk->sk_state) { |
380 | case SMC_INIT: |
381 | sk->sk_state = SMC_APPCLOSEWAIT1; |
382 | break; |
383 | case SMC_ACTIVE: |
384 | sk->sk_state = SMC_APPCLOSEWAIT1; |
385 | /* postpone sock_put() for passive closing to cover |
386 | * received SEND_SHUTDOWN as well |
387 | */ |
388 | break; |
389 | case SMC_PEERCLOSEWAIT1: |
390 | if (rxflags->peer_done_writing) |
391 | sk->sk_state = SMC_PEERCLOSEWAIT2; |
392 | fallthrough; |
393 | /* to check for closing */ |
394 | case SMC_PEERCLOSEWAIT2: |
395 | if (!smc_cdc_rxed_any_close(conn)) |
396 | break; |
397 | if (sock_flag(sk, flag: SOCK_DEAD) && |
398 | smc_close_sent_any_close(conn)) { |
399 | /* smc_release has already been called locally */ |
400 | sk->sk_state = SMC_CLOSED; |
401 | } else { |
402 | /* just shutdown, but not yet closed locally */ |
403 | sk->sk_state = SMC_APPFINCLOSEWAIT; |
404 | } |
405 | sock_put(sk); /* passive closing */ |
406 | break; |
407 | case SMC_PEERFINCLOSEWAIT: |
408 | if (smc_cdc_rxed_any_close(conn)) { |
409 | sk->sk_state = SMC_CLOSED; |
410 | sock_put(sk); /* passive closing */ |
411 | } |
412 | break; |
413 | case SMC_APPCLOSEWAIT1: |
414 | case SMC_APPCLOSEWAIT2: |
415 | /* postpone sock_put() for passive closing to cover |
416 | * received SEND_SHUTDOWN as well |
417 | */ |
418 | break; |
419 | case SMC_APPFINCLOSEWAIT: |
420 | case SMC_PEERABORTWAIT: |
421 | case SMC_PROCESSABORT: |
422 | case SMC_CLOSED: |
423 | /* nothing to do, add tracing in future patch */ |
424 | break; |
425 | } |
426 | |
427 | wakeup: |
428 | sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ |
429 | sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ |
430 | |
431 | if (old_state != sk->sk_state) { |
432 | sk->sk_state_change(sk); |
433 | if ((sk->sk_state == SMC_CLOSED) && |
434 | (sock_flag(sk, flag: SOCK_DEAD) || !sk->sk_socket)) { |
435 | smc_conn_free(conn); |
436 | if (smc->clcsock) |
437 | release_clcsock = true; |
438 | } |
439 | } |
440 | release_sock(sk); |
441 | if (release_clcsock) |
442 | smc_clcsock_release(smc); |
443 | sock_put(sk); /* sock_hold done by schedulers of close_work */ |
444 | } |
445 | |
446 | int smc_close_shutdown_write(struct smc_sock *smc) |
447 | { |
448 | struct smc_connection *conn = &smc->conn; |
449 | struct sock *sk = &smc->sk; |
450 | int old_state; |
451 | long timeout; |
452 | int rc = 0; |
453 | |
454 | timeout = current->flags & PF_EXITING ? |
455 | 0 : sock_flag(sk, flag: SOCK_LINGER) ? |
456 | sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; |
457 | |
458 | old_state = sk->sk_state; |
459 | again: |
460 | switch (sk->sk_state) { |
461 | case SMC_ACTIVE: |
462 | smc_close_stream_wait(smc, timeout); |
463 | release_sock(sk); |
464 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
465 | lock_sock(sk); |
466 | if (sk->sk_state != SMC_ACTIVE) |
467 | goto again; |
468 | /* send close wr request */ |
469 | rc = smc_close_wr(conn); |
470 | sk->sk_state = SMC_PEERCLOSEWAIT1; |
471 | break; |
472 | case SMC_APPCLOSEWAIT1: |
473 | /* passive close */ |
474 | if (!smc_cdc_rxed_any_close(conn)) |
475 | smc_close_stream_wait(smc, timeout); |
476 | release_sock(sk); |
477 | cancel_delayed_work_sync(dwork: &conn->tx_work); |
478 | lock_sock(sk); |
479 | if (sk->sk_state != SMC_APPCLOSEWAIT1) |
480 | goto again; |
481 | /* confirm close from peer */ |
482 | rc = smc_close_wr(conn); |
483 | sk->sk_state = SMC_APPCLOSEWAIT2; |
484 | break; |
485 | case SMC_APPCLOSEWAIT2: |
486 | case SMC_PEERFINCLOSEWAIT: |
487 | case SMC_PEERCLOSEWAIT1: |
488 | case SMC_PEERCLOSEWAIT2: |
489 | case SMC_APPFINCLOSEWAIT: |
490 | case SMC_PROCESSABORT: |
491 | case SMC_PEERABORTWAIT: |
492 | /* nothing to do, add tracing in future patch */ |
493 | break; |
494 | } |
495 | |
496 | if (old_state != sk->sk_state) |
497 | sk->sk_state_change(sk); |
498 | return rc; |
499 | } |
500 | |
501 | /* Initialize close properties on connection establishment. */ |
502 | void smc_close_init(struct smc_sock *smc) |
503 | { |
504 | INIT_WORK(&smc->conn.close_work, smc_close_passive_work); |
505 | } |
506 | |