1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
4 | * |
5 | * Basic Transport Functions exploiting Infiniband API |
6 | * |
7 | * Copyright IBM Corp. 2016 |
8 | * |
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
10 | */ |
11 | |
12 | #include <linux/socket.h> |
13 | #include <linux/if_vlan.h> |
14 | #include <linux/random.h> |
15 | #include <linux/workqueue.h> |
16 | #include <linux/wait.h> |
17 | #include <linux/reboot.h> |
18 | #include <linux/mutex.h> |
19 | #include <linux/list.h> |
20 | #include <linux/smc.h> |
21 | #include <net/tcp.h> |
22 | #include <net/sock.h> |
23 | #include <rdma/ib_verbs.h> |
24 | #include <rdma/ib_cache.h> |
25 | |
26 | #include "smc.h" |
27 | #include "smc_clc.h" |
28 | #include "smc_core.h" |
29 | #include "smc_ib.h" |
30 | #include "smc_wr.h" |
31 | #include "smc_llc.h" |
32 | #include "smc_cdc.h" |
33 | #include "smc_close.h" |
34 | #include "smc_ism.h" |
35 | #include "smc_netlink.h" |
36 | #include "smc_stats.h" |
37 | #include "smc_tracepoint.h" |
38 | |
39 | #define SMC_LGR_NUM_INCR 256 |
40 | #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) |
41 | #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) |
42 | |
43 | struct smc_lgr_list smc_lgr_list = { /* established link groups */ |
44 | .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), |
45 | .list = LIST_HEAD_INIT(smc_lgr_list.list), |
46 | .num = 0, |
47 | }; |
48 | |
49 | static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ |
50 | static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); |
51 | |
52 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
53 | struct smc_buf_desc *buf_desc); |
54 | static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); |
55 | |
56 | static void smc_link_down_work(struct work_struct *work); |
57 | |
58 | /* return head of link group list and its lock for a given link group */ |
59 | static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, |
60 | spinlock_t **lgr_lock) |
61 | { |
62 | if (lgr->is_smcd) { |
63 | *lgr_lock = &lgr->smcd->lgr_lock; |
64 | return &lgr->smcd->lgr_list; |
65 | } |
66 | |
67 | *lgr_lock = &smc_lgr_list.lock; |
68 | return &smc_lgr_list.list; |
69 | } |
70 | |
71 | static void smc_ibdev_cnt_inc(struct smc_link *lnk) |
72 | { |
73 | atomic_inc(v: &lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); |
74 | } |
75 | |
76 | static void smc_ibdev_cnt_dec(struct smc_link *lnk) |
77 | { |
78 | atomic_dec(v: &lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); |
79 | } |
80 | |
81 | static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) |
82 | { |
83 | /* client link group creation always follows the server link group |
84 | * creation. For client use a somewhat higher removal delay time, |
85 | * otherwise there is a risk of out-of-sync link groups. |
86 | */ |
87 | if (!lgr->freeing) { |
88 | mod_delayed_work(wq: system_wq, dwork: &lgr->free_work, |
89 | delay: (!lgr->is_smcd && lgr->role == SMC_CLNT) ? |
90 | SMC_LGR_FREE_DELAY_CLNT : |
91 | SMC_LGR_FREE_DELAY_SERV); |
92 | } |
93 | } |
94 | |
95 | /* Register connection's alert token in our lookup structure. |
96 | * To use rbtrees we have to implement our own insert core. |
97 | * Requires @conns_lock |
98 | * @smc connection to register |
99 | * Returns 0 on success, != otherwise. |
100 | */ |
101 | static void smc_lgr_add_alert_token(struct smc_connection *conn) |
102 | { |
103 | struct rb_node **link, *parent = NULL; |
104 | u32 token = conn->alert_token_local; |
105 | |
106 | link = &conn->lgr->conns_all.rb_node; |
107 | while (*link) { |
108 | struct smc_connection *cur = rb_entry(*link, |
109 | struct smc_connection, alert_node); |
110 | |
111 | parent = *link; |
112 | if (cur->alert_token_local > token) |
113 | link = &parent->rb_left; |
114 | else |
115 | link = &parent->rb_right; |
116 | } |
117 | /* Put the new node there */ |
118 | rb_link_node(node: &conn->alert_node, parent, rb_link: link); |
119 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); |
120 | } |
121 | |
122 | /* assign an SMC-R link to the connection */ |
123 | static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) |
124 | { |
125 | enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : |
126 | SMC_LNK_ACTIVE; |
127 | int i, j; |
128 | |
129 | /* do link balancing */ |
130 | conn->lnk = NULL; /* reset conn->lnk first */ |
131 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
132 | struct smc_link *lnk = &conn->lgr->lnk[i]; |
133 | |
134 | if (lnk->state != expected || lnk->link_is_asym) |
135 | continue; |
136 | if (conn->lgr->role == SMC_CLNT) { |
137 | conn->lnk = lnk; /* temporary, SMC server assigns link*/ |
138 | break; |
139 | } |
140 | if (conn->lgr->conns_num % 2) { |
141 | for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { |
142 | struct smc_link *lnk2; |
143 | |
144 | lnk2 = &conn->lgr->lnk[j]; |
145 | if (lnk2->state == expected && |
146 | !lnk2->link_is_asym) { |
147 | conn->lnk = lnk2; |
148 | break; |
149 | } |
150 | } |
151 | } |
152 | if (!conn->lnk) |
153 | conn->lnk = lnk; |
154 | break; |
155 | } |
156 | if (!conn->lnk) |
157 | return SMC_CLC_DECL_NOACTLINK; |
158 | atomic_inc(v: &conn->lnk->conn_cnt); |
159 | return 0; |
160 | } |
161 | |
162 | /* Register connection in link group by assigning an alert token |
163 | * registered in a search tree. |
164 | * Requires @conns_lock |
165 | * Note that '0' is a reserved value and not assigned. |
166 | */ |
167 | static int smc_lgr_register_conn(struct smc_connection *conn, bool first) |
168 | { |
169 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
170 | static atomic_t nexttoken = ATOMIC_INIT(0); |
171 | int rc; |
172 | |
173 | if (!conn->lgr->is_smcd) { |
174 | rc = smcr_lgr_conn_assign_link(conn, first); |
175 | if (rc) { |
176 | conn->lgr = NULL; |
177 | return rc; |
178 | } |
179 | } |
180 | /* find a new alert_token_local value not yet used by some connection |
181 | * in this link group |
182 | */ |
183 | sock_hold(sk: &smc->sk); /* sock_put in smc_lgr_unregister_conn() */ |
184 | while (!conn->alert_token_local) { |
185 | conn->alert_token_local = atomic_inc_return(v: &nexttoken); |
186 | if (smc_lgr_find_conn(token: conn->alert_token_local, lgr: conn->lgr)) |
187 | conn->alert_token_local = 0; |
188 | } |
189 | smc_lgr_add_alert_token(conn); |
190 | conn->lgr->conns_num++; |
191 | return 0; |
192 | } |
193 | |
194 | /* Unregister connection and reset the alert token of the given connection< |
195 | */ |
196 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) |
197 | { |
198 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
199 | struct smc_link_group *lgr = conn->lgr; |
200 | |
201 | rb_erase(&conn->alert_node, &lgr->conns_all); |
202 | if (conn->lnk) |
203 | atomic_dec(v: &conn->lnk->conn_cnt); |
204 | lgr->conns_num--; |
205 | conn->alert_token_local = 0; |
206 | sock_put(sk: &smc->sk); /* sock_hold in smc_lgr_register_conn() */ |
207 | } |
208 | |
209 | /* Unregister connection from lgr |
210 | */ |
211 | static void smc_lgr_unregister_conn(struct smc_connection *conn) |
212 | { |
213 | struct smc_link_group *lgr = conn->lgr; |
214 | |
215 | if (!smc_conn_lgr_valid(conn)) |
216 | return; |
217 | write_lock_bh(&lgr->conns_lock); |
218 | if (conn->alert_token_local) { |
219 | __smc_lgr_unregister_conn(conn); |
220 | } |
221 | write_unlock_bh(&lgr->conns_lock); |
222 | } |
223 | |
224 | int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) |
225 | { |
226 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
227 | char hostname[SMC_MAX_HOSTNAME_LEN + 1]; |
228 | char smc_seid[SMC_MAX_EID_LEN + 1]; |
229 | struct nlattr *attrs; |
230 | u8 *seid = NULL; |
231 | u8 *host = NULL; |
232 | void *nlh; |
233 | |
234 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
235 | family: &smc_gen_nl_family, NLM_F_MULTI, |
236 | cmd: SMC_NETLINK_GET_SYS_INFO); |
237 | if (!nlh) |
238 | goto errmsg; |
239 | if (cb_ctx->pos[0]) |
240 | goto errout; |
241 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_SYS_INFO); |
242 | if (!attrs) |
243 | goto errout; |
244 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_VER, SMC_V2)) |
245 | goto errattr; |
246 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_REL, SMC_RELEASE)) |
247 | goto errattr; |
248 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_IS_ISM_V2, value: smc_ism_is_v2_capable())) |
249 | goto errattr; |
250 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_IS_SMCR_V2, value: true)) |
251 | goto errattr; |
252 | smc_clc_get_hostname(host: &host); |
253 | if (host) { |
254 | memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); |
255 | hostname[SMC_MAX_HOSTNAME_LEN] = 0; |
256 | if (nla_put_string(skb, attrtype: SMC_NLA_SYS_LOCAL_HOST, str: hostname)) |
257 | goto errattr; |
258 | } |
259 | if (smc_ism_is_v2_capable()) { |
260 | smc_ism_get_system_eid(eid: &seid); |
261 | memcpy(smc_seid, seid, SMC_MAX_EID_LEN); |
262 | smc_seid[SMC_MAX_EID_LEN] = 0; |
263 | if (nla_put_string(skb, attrtype: SMC_NLA_SYS_SEID, str: smc_seid)) |
264 | goto errattr; |
265 | } |
266 | nla_nest_end(skb, start: attrs); |
267 | genlmsg_end(skb, hdr: nlh); |
268 | cb_ctx->pos[0] = 1; |
269 | return skb->len; |
270 | |
271 | errattr: |
272 | nla_nest_cancel(skb, start: attrs); |
273 | errout: |
274 | genlmsg_cancel(skb, hdr: nlh); |
275 | errmsg: |
276 | return skb->len; |
277 | } |
278 | |
279 | /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */ |
280 | static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr, |
281 | struct sk_buff *skb, |
282 | struct netlink_callback *cb, |
283 | struct nlattr *v2_attrs) |
284 | { |
285 | char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; |
286 | char smc_eid[SMC_MAX_EID_LEN + 1]; |
287 | |
288 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_V2_VER, value: lgr->smc_version)) |
289 | goto errv2attr; |
290 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_V2_REL, value: lgr->peer_smc_release)) |
291 | goto errv2attr; |
292 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_V2_OS, value: lgr->peer_os)) |
293 | goto errv2attr; |
294 | memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); |
295 | smc_host[SMC_MAX_HOSTNAME_LEN] = 0; |
296 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_V2_PEER_HOST, str: smc_host)) |
297 | goto errv2attr; |
298 | memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); |
299 | smc_eid[SMC_MAX_EID_LEN] = 0; |
300 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_V2_NEG_EID, str: smc_eid)) |
301 | goto errv2attr; |
302 | |
303 | nla_nest_end(skb, start: v2_attrs); |
304 | return 0; |
305 | |
306 | errv2attr: |
307 | nla_nest_cancel(skb, start: v2_attrs); |
308 | return -EMSGSIZE; |
309 | } |
310 | |
311 | static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr, |
312 | struct sk_buff *skb, |
313 | struct netlink_callback *cb) |
314 | { |
315 | struct nlattr *v2_attrs; |
316 | |
317 | v2_attrs = nla_nest_start(skb, attrtype: SMC_NLA_LGR_R_V2); |
318 | if (!v2_attrs) |
319 | goto errattr; |
320 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_V2_DIRECT, value: !lgr->uses_gateway)) |
321 | goto errv2attr; |
322 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_V2_MAX_CONNS, value: lgr->max_conns)) |
323 | goto errv2attr; |
324 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_V2_MAX_LINKS, value: lgr->max_links)) |
325 | goto errv2attr; |
326 | |
327 | nla_nest_end(skb, start: v2_attrs); |
328 | return 0; |
329 | |
330 | errv2attr: |
331 | nla_nest_cancel(skb, start: v2_attrs); |
332 | errattr: |
333 | return -EMSGSIZE; |
334 | } |
335 | |
336 | static int smc_nl_fill_lgr(struct smc_link_group *lgr, |
337 | struct sk_buff *skb, |
338 | struct netlink_callback *cb) |
339 | { |
340 | char smc_target[SMC_MAX_PNETID_LEN + 1]; |
341 | struct nlattr *attrs, *v2_attrs; |
342 | |
343 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_LGR_SMCR); |
344 | if (!attrs) |
345 | goto errout; |
346 | |
347 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_R_ID, value: *((u32 *)&lgr->id))) |
348 | goto errattr; |
349 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_R_CONNS_NUM, value: lgr->conns_num)) |
350 | goto errattr; |
351 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_ROLE, value: lgr->role)) |
352 | goto errattr; |
353 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_TYPE, value: lgr->type)) |
354 | goto errattr; |
355 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_BUF_TYPE, value: lgr->buf_type)) |
356 | goto errattr; |
357 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_VLAN_ID, value: lgr->vlan_id)) |
358 | goto errattr; |
359 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_R_NET_COOKIE, |
360 | value: lgr->net->net_cookie, padattr: SMC_NLA_LGR_R_PAD)) |
361 | goto errattr; |
362 | memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); |
363 | smc_target[SMC_MAX_PNETID_LEN] = 0; |
364 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_R_PNETID, str: smc_target)) |
365 | goto errattr; |
366 | if (lgr->smc_version > SMC_V1) { |
367 | v2_attrs = nla_nest_start(skb, attrtype: SMC_NLA_LGR_R_V2_COMMON); |
368 | if (!v2_attrs) |
369 | goto errattr; |
370 | if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) |
371 | goto errattr; |
372 | if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb)) |
373 | goto errattr; |
374 | } |
375 | |
376 | nla_nest_end(skb, start: attrs); |
377 | return 0; |
378 | errattr: |
379 | nla_nest_cancel(skb, start: attrs); |
380 | errout: |
381 | return -EMSGSIZE; |
382 | } |
383 | |
384 | static int smc_nl_fill_lgr_link(struct smc_link_group *lgr, |
385 | struct smc_link *link, |
386 | struct sk_buff *skb, |
387 | struct netlink_callback *cb) |
388 | { |
389 | char smc_ibname[IB_DEVICE_NAME_MAX]; |
390 | u8 smc_gid_target[41]; |
391 | struct nlattr *attrs; |
392 | u32 link_uid = 0; |
393 | void *nlh; |
394 | |
395 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
396 | family: &smc_gen_nl_family, NLM_F_MULTI, |
397 | cmd: SMC_NETLINK_GET_LINK_SMCR); |
398 | if (!nlh) |
399 | goto errmsg; |
400 | |
401 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_LINK_SMCR); |
402 | if (!attrs) |
403 | goto errout; |
404 | |
405 | if (nla_put_u8(skb, attrtype: SMC_NLA_LINK_ID, value: link->link_id)) |
406 | goto errattr; |
407 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_STATE, value: link->state)) |
408 | goto errattr; |
409 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_CONN_CNT, |
410 | value: atomic_read(v: &link->conn_cnt))) |
411 | goto errattr; |
412 | if (nla_put_u8(skb, attrtype: SMC_NLA_LINK_IB_PORT, value: link->ibport)) |
413 | goto errattr; |
414 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_NET_DEV, value: link->ndev_ifidx)) |
415 | goto errattr; |
416 | snprintf(buf: smc_ibname, size: sizeof(smc_ibname), fmt: "%s" , link->ibname); |
417 | if (nla_put_string(skb, attrtype: SMC_NLA_LINK_IB_DEV, str: smc_ibname)) |
418 | goto errattr; |
419 | memcpy(&link_uid, link->link_uid, sizeof(link_uid)); |
420 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_UID, value: link_uid)) |
421 | goto errattr; |
422 | memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid)); |
423 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_PEER_UID, value: link_uid)) |
424 | goto errattr; |
425 | memset(smc_gid_target, 0, sizeof(smc_gid_target)); |
426 | smc_gid_be16_convert(buf: smc_gid_target, gid_raw: link->gid); |
427 | if (nla_put_string(skb, attrtype: SMC_NLA_LINK_GID, str: smc_gid_target)) |
428 | goto errattr; |
429 | memset(smc_gid_target, 0, sizeof(smc_gid_target)); |
430 | smc_gid_be16_convert(buf: smc_gid_target, gid_raw: link->peer_gid); |
431 | if (nla_put_string(skb, attrtype: SMC_NLA_LINK_PEER_GID, str: smc_gid_target)) |
432 | goto errattr; |
433 | |
434 | nla_nest_end(skb, start: attrs); |
435 | genlmsg_end(skb, hdr: nlh); |
436 | return 0; |
437 | errattr: |
438 | nla_nest_cancel(skb, start: attrs); |
439 | errout: |
440 | genlmsg_cancel(skb, hdr: nlh); |
441 | errmsg: |
442 | return -EMSGSIZE; |
443 | } |
444 | |
445 | static int smc_nl_handle_lgr(struct smc_link_group *lgr, |
446 | struct sk_buff *skb, |
447 | struct netlink_callback *cb, |
448 | bool list_links) |
449 | { |
450 | void *nlh; |
451 | int i; |
452 | |
453 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
454 | family: &smc_gen_nl_family, NLM_F_MULTI, |
455 | cmd: SMC_NETLINK_GET_LGR_SMCR); |
456 | if (!nlh) |
457 | goto errmsg; |
458 | if (smc_nl_fill_lgr(lgr, skb, cb)) |
459 | goto errout; |
460 | |
461 | genlmsg_end(skb, hdr: nlh); |
462 | if (!list_links) |
463 | goto out; |
464 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
465 | if (!smc_link_usable(lnk: &lgr->lnk[i])) |
466 | continue; |
467 | if (smc_nl_fill_lgr_link(lgr, link: &lgr->lnk[i], skb, cb)) |
468 | goto errout; |
469 | } |
470 | out: |
471 | return 0; |
472 | |
473 | errout: |
474 | genlmsg_cancel(skb, hdr: nlh); |
475 | errmsg: |
476 | return -EMSGSIZE; |
477 | } |
478 | |
479 | static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr, |
480 | struct sk_buff *skb, |
481 | struct netlink_callback *cb, |
482 | bool list_links) |
483 | { |
484 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
485 | struct smc_link_group *lgr; |
486 | int snum = cb_ctx->pos[0]; |
487 | int num = 0; |
488 | |
489 | spin_lock_bh(lock: &smc_lgr->lock); |
490 | list_for_each_entry(lgr, &smc_lgr->list, list) { |
491 | if (num < snum) |
492 | goto next; |
493 | if (smc_nl_handle_lgr(lgr, skb, cb, list_links)) |
494 | goto errout; |
495 | next: |
496 | num++; |
497 | } |
498 | errout: |
499 | spin_unlock_bh(lock: &smc_lgr->lock); |
500 | cb_ctx->pos[0] = num; |
501 | } |
502 | |
503 | static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, |
504 | struct sk_buff *skb, |
505 | struct netlink_callback *cb) |
506 | { |
507 | char smc_pnet[SMC_MAX_PNETID_LEN + 1]; |
508 | struct smcd_dev *smcd = lgr->smcd; |
509 | struct nlattr *attrs; |
510 | void *nlh; |
511 | |
512 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
513 | family: &smc_gen_nl_family, NLM_F_MULTI, |
514 | cmd: SMC_NETLINK_GET_LGR_SMCD); |
515 | if (!nlh) |
516 | goto errmsg; |
517 | |
518 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_LGR_SMCD); |
519 | if (!attrs) |
520 | goto errout; |
521 | |
522 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_D_ID, value: *((u32 *)&lgr->id))) |
523 | goto errattr; |
524 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_D_GID, |
525 | value: smcd->ops->get_local_gid(smcd), |
526 | padattr: SMC_NLA_LGR_D_PAD)) |
527 | goto errattr; |
528 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_D_PEER_GID, value: lgr->peer_gid, |
529 | padattr: SMC_NLA_LGR_D_PAD)) |
530 | goto errattr; |
531 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_D_VLAN_ID, value: lgr->vlan_id)) |
532 | goto errattr; |
533 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_D_CONNS_NUM, value: lgr->conns_num)) |
534 | goto errattr; |
535 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_D_CHID, value: smc_ism_get_chid(dev: lgr->smcd))) |
536 | goto errattr; |
537 | memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN); |
538 | smc_pnet[SMC_MAX_PNETID_LEN] = 0; |
539 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_D_PNETID, str: smc_pnet)) |
540 | goto errattr; |
541 | if (lgr->smc_version > SMC_V1) { |
542 | struct nlattr *v2_attrs; |
543 | |
544 | v2_attrs = nla_nest_start(skb, attrtype: SMC_NLA_LGR_D_V2_COMMON); |
545 | if (!v2_attrs) |
546 | goto errattr; |
547 | if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) |
548 | goto errattr; |
549 | } |
550 | nla_nest_end(skb, start: attrs); |
551 | genlmsg_end(skb, hdr: nlh); |
552 | return 0; |
553 | |
554 | errattr: |
555 | nla_nest_cancel(skb, start: attrs); |
556 | errout: |
557 | genlmsg_cancel(skb, hdr: nlh); |
558 | errmsg: |
559 | return -EMSGSIZE; |
560 | } |
561 | |
562 | static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev, |
563 | struct sk_buff *skb, |
564 | struct netlink_callback *cb) |
565 | { |
566 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
567 | struct smc_link_group *lgr; |
568 | int snum = cb_ctx->pos[1]; |
569 | int rc = 0, num = 0; |
570 | |
571 | spin_lock_bh(lock: &dev->lgr_lock); |
572 | list_for_each_entry(lgr, &dev->lgr_list, list) { |
573 | if (!lgr->is_smcd) |
574 | continue; |
575 | if (num < snum) |
576 | goto next; |
577 | rc = smc_nl_fill_smcd_lgr(lgr, skb, cb); |
578 | if (rc) |
579 | goto errout; |
580 | next: |
581 | num++; |
582 | } |
583 | errout: |
584 | spin_unlock_bh(lock: &dev->lgr_lock); |
585 | cb_ctx->pos[1] = num; |
586 | return rc; |
587 | } |
588 | |
589 | static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list, |
590 | struct sk_buff *skb, |
591 | struct netlink_callback *cb) |
592 | { |
593 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
594 | struct smcd_dev *smcd_dev; |
595 | int snum = cb_ctx->pos[0]; |
596 | int rc = 0, num = 0; |
597 | |
598 | mutex_lock(&dev_list->mutex); |
599 | list_for_each_entry(smcd_dev, &dev_list->list, list) { |
600 | if (list_empty(head: &smcd_dev->lgr_list)) |
601 | continue; |
602 | if (num < snum) |
603 | goto next; |
604 | rc = smc_nl_handle_smcd_lgr(dev: smcd_dev, skb, cb); |
605 | if (rc) |
606 | goto errout; |
607 | next: |
608 | num++; |
609 | } |
610 | errout: |
611 | mutex_unlock(lock: &dev_list->mutex); |
612 | cb_ctx->pos[0] = num; |
613 | return rc; |
614 | } |
615 | |
616 | int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) |
617 | { |
618 | bool list_links = false; |
619 | |
620 | smc_nl_fill_lgr_list(smc_lgr: &smc_lgr_list, skb, cb, list_links); |
621 | return skb->len; |
622 | } |
623 | |
624 | int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb) |
625 | { |
626 | bool list_links = true; |
627 | |
628 | smc_nl_fill_lgr_list(smc_lgr: &smc_lgr_list, skb, cb, list_links); |
629 | return skb->len; |
630 | } |
631 | |
632 | int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) |
633 | { |
634 | smc_nl_fill_smcd_dev(dev_list: &smcd_dev_list, skb, cb); |
635 | return skb->len; |
636 | } |
637 | |
638 | void smc_lgr_cleanup_early(struct smc_link_group *lgr) |
639 | { |
640 | spinlock_t *lgr_lock; |
641 | |
642 | if (!lgr) |
643 | return; |
644 | |
645 | smc_lgr_list_head(lgr, lgr_lock: &lgr_lock); |
646 | spin_lock_bh(lock: lgr_lock); |
647 | /* do not use this link group for new connections */ |
648 | if (!list_empty(head: &lgr->list)) |
649 | list_del_init(entry: &lgr->list); |
650 | spin_unlock_bh(lock: lgr_lock); |
651 | __smc_lgr_terminate(lgr, soft: true); |
652 | } |
653 | |
654 | static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) |
655 | { |
656 | int i; |
657 | |
658 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
659 | struct smc_link *lnk = &lgr->lnk[i]; |
660 | |
661 | if (smc_link_sendable(lnk)) |
662 | lnk->state = SMC_LNK_INACTIVE; |
663 | } |
664 | wake_up_all(&lgr->llc_msg_waiter); |
665 | wake_up_all(&lgr->llc_flow_waiter); |
666 | } |
667 | |
668 | static void smc_lgr_free(struct smc_link_group *lgr); |
669 | |
670 | static void smc_lgr_free_work(struct work_struct *work) |
671 | { |
672 | struct smc_link_group *lgr = container_of(to_delayed_work(work), |
673 | struct smc_link_group, |
674 | free_work); |
675 | spinlock_t *lgr_lock; |
676 | bool conns; |
677 | |
678 | smc_lgr_list_head(lgr, lgr_lock: &lgr_lock); |
679 | spin_lock_bh(lock: lgr_lock); |
680 | if (lgr->freeing) { |
681 | spin_unlock_bh(lock: lgr_lock); |
682 | return; |
683 | } |
684 | read_lock_bh(&lgr->conns_lock); |
685 | conns = RB_EMPTY_ROOT(&lgr->conns_all); |
686 | read_unlock_bh(&lgr->conns_lock); |
687 | if (!conns) { /* number of lgr connections is no longer zero */ |
688 | spin_unlock_bh(lock: lgr_lock); |
689 | return; |
690 | } |
691 | list_del_init(entry: &lgr->list); /* remove from smc_lgr_list */ |
692 | lgr->freeing = 1; /* this instance does the freeing, no new schedule */ |
693 | spin_unlock_bh(lock: lgr_lock); |
694 | cancel_delayed_work(dwork: &lgr->free_work); |
695 | |
696 | if (!lgr->is_smcd && !lgr->terminating) |
697 | smc_llc_send_link_delete_all(lgr, ord: true, |
698 | SMC_LLC_DEL_PROG_INIT_TERM); |
699 | if (lgr->is_smcd && !lgr->terminating) |
700 | smc_ism_signal_shutdown(lgr); |
701 | if (!lgr->is_smcd) |
702 | smcr_lgr_link_deactivate_all(lgr); |
703 | smc_lgr_free(lgr); |
704 | } |
705 | |
706 | static void smc_lgr_terminate_work(struct work_struct *work) |
707 | { |
708 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, |
709 | terminate_work); |
710 | |
711 | __smc_lgr_terminate(lgr, soft: true); |
712 | } |
713 | |
714 | /* return next unique link id for the lgr */ |
715 | static u8 smcr_next_link_id(struct smc_link_group *lgr) |
716 | { |
717 | u8 link_id; |
718 | int i; |
719 | |
720 | while (1) { |
721 | again: |
722 | link_id = ++lgr->next_link_id; |
723 | if (!link_id) /* skip zero as link_id */ |
724 | link_id = ++lgr->next_link_id; |
725 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
726 | if (smc_link_usable(lnk: &lgr->lnk[i]) && |
727 | lgr->lnk[i].link_id == link_id) |
728 | goto again; |
729 | } |
730 | break; |
731 | } |
732 | return link_id; |
733 | } |
734 | |
735 | static void smcr_copy_dev_info_to_link(struct smc_link *link) |
736 | { |
737 | struct smc_ib_device *smcibdev = link->smcibdev; |
738 | |
739 | snprintf(buf: link->ibname, size: sizeof(link->ibname), fmt: "%s" , |
740 | smcibdev->ibdev->name); |
741 | link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1]; |
742 | } |
743 | |
744 | int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, |
745 | u8 link_idx, struct smc_init_info *ini) |
746 | { |
747 | struct smc_ib_device *smcibdev; |
748 | u8 rndvec[3]; |
749 | int rc; |
750 | |
751 | if (lgr->smc_version == SMC_V2) { |
752 | lnk->smcibdev = ini->smcrv2.ib_dev_v2; |
753 | lnk->ibport = ini->smcrv2.ib_port_v2; |
754 | } else { |
755 | lnk->smcibdev = ini->ib_dev; |
756 | lnk->ibport = ini->ib_port; |
757 | } |
758 | get_device(dev: &lnk->smcibdev->ibdev->dev); |
759 | atomic_inc(v: &lnk->smcibdev->lnk_cnt); |
760 | refcount_set(r: &lnk->refcnt, n: 1); /* link refcnt is set to 1 */ |
761 | lnk->clearing = 0; |
762 | lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; |
763 | lnk->link_id = smcr_next_link_id(lgr); |
764 | lnk->lgr = lgr; |
765 | smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ |
766 | lnk->link_idx = link_idx; |
767 | lnk->wr_rx_id_compl = 0; |
768 | smc_ibdev_cnt_inc(lnk); |
769 | smcr_copy_dev_info_to_link(link: lnk); |
770 | atomic_set(v: &lnk->conn_cnt, i: 0); |
771 | smc_llc_link_set_uid(link: lnk); |
772 | INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); |
773 | if (!lnk->smcibdev->initialized) { |
774 | rc = (int)smc_ib_setup_per_ibdev(smcibdev: lnk->smcibdev); |
775 | if (rc) |
776 | goto out; |
777 | } |
778 | get_random_bytes(buf: rndvec, len: sizeof(rndvec)); |
779 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + |
780 | (rndvec[2] << 16); |
781 | rc = smc_ib_determine_gid(smcibdev: lnk->smcibdev, ibport: lnk->ibport, |
782 | vlan_id: ini->vlan_id, gid: lnk->gid, sgid_index: &lnk->sgid_index, |
783 | smcrv2: lgr->smc_version == SMC_V2 ? |
784 | &ini->smcrv2 : NULL); |
785 | if (rc) |
786 | goto out; |
787 | rc = smc_llc_link_init(link: lnk); |
788 | if (rc) |
789 | goto out; |
790 | rc = smc_wr_alloc_link_mem(lnk); |
791 | if (rc) |
792 | goto clear_llc_lnk; |
793 | rc = smc_ib_create_protection_domain(lnk); |
794 | if (rc) |
795 | goto free_link_mem; |
796 | rc = smc_ib_create_queue_pair(lnk); |
797 | if (rc) |
798 | goto dealloc_pd; |
799 | rc = smc_wr_create_link(lnk); |
800 | if (rc) |
801 | goto destroy_qp; |
802 | lnk->state = SMC_LNK_ACTIVATING; |
803 | return 0; |
804 | |
805 | destroy_qp: |
806 | smc_ib_destroy_queue_pair(lnk); |
807 | dealloc_pd: |
808 | smc_ib_dealloc_protection_domain(lnk); |
809 | free_link_mem: |
810 | smc_wr_free_link_mem(lnk); |
811 | clear_llc_lnk: |
812 | smc_llc_link_clear(link: lnk, log: false); |
813 | out: |
814 | smc_ibdev_cnt_dec(lnk); |
815 | put_device(dev: &lnk->smcibdev->ibdev->dev); |
816 | smcibdev = lnk->smcibdev; |
817 | memset(lnk, 0, sizeof(struct smc_link)); |
818 | lnk->state = SMC_LNK_UNUSED; |
819 | if (!atomic_dec_return(v: &smcibdev->lnk_cnt)) |
820 | wake_up(&smcibdev->lnks_deleted); |
821 | smc_lgr_put(lgr); /* lgr_hold above */ |
822 | return rc; |
823 | } |
824 | |
825 | /* create a new SMC link group */ |
826 | static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) |
827 | { |
828 | struct smc_link_group *lgr; |
829 | struct list_head *lgr_list; |
830 | struct smcd_dev *smcd; |
831 | struct smc_link *lnk; |
832 | spinlock_t *lgr_lock; |
833 | u8 link_idx; |
834 | int rc = 0; |
835 | int i; |
836 | |
837 | if (ini->is_smcd && ini->vlan_id) { |
838 | if (smc_ism_get_vlan(dev: ini->ism_dev[ini->ism_selected], |
839 | vlan_id: ini->vlan_id)) { |
840 | rc = SMC_CLC_DECL_ISMVLANERR; |
841 | goto out; |
842 | } |
843 | } |
844 | |
845 | lgr = kzalloc(size: sizeof(*lgr), GFP_KERNEL); |
846 | if (!lgr) { |
847 | rc = SMC_CLC_DECL_MEM; |
848 | goto ism_put_vlan; |
849 | } |
850 | lgr->tx_wq = alloc_workqueue(fmt: "smc_tx_wq-%*phN" , flags: 0, max_active: 0, |
851 | SMC_LGR_ID_SIZE, &lgr->id); |
852 | if (!lgr->tx_wq) { |
853 | rc = -ENOMEM; |
854 | goto free_lgr; |
855 | } |
856 | lgr->is_smcd = ini->is_smcd; |
857 | lgr->sync_err = 0; |
858 | lgr->terminating = 0; |
859 | lgr->freeing = 0; |
860 | lgr->vlan_id = ini->vlan_id; |
861 | refcount_set(r: &lgr->refcnt, n: 1); /* set lgr refcnt to 1 */ |
862 | init_rwsem(&lgr->sndbufs_lock); |
863 | init_rwsem(&lgr->rmbs_lock); |
864 | rwlock_init(&lgr->conns_lock); |
865 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
866 | INIT_LIST_HEAD(list: &lgr->sndbufs[i]); |
867 | INIT_LIST_HEAD(list: &lgr->rmbs[i]); |
868 | } |
869 | lgr->next_link_id = 0; |
870 | smc_lgr_list.num += SMC_LGR_NUM_INCR; |
871 | memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); |
872 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); |
873 | INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); |
874 | lgr->conns_all = RB_ROOT; |
875 | if (ini->is_smcd) { |
876 | /* SMC-D specific settings */ |
877 | smcd = ini->ism_dev[ini->ism_selected]; |
878 | get_device(dev: smcd->ops->get_dev(smcd)); |
879 | lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; |
880 | lgr->smcd = ini->ism_dev[ini->ism_selected]; |
881 | lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; |
882 | lgr_lock = &lgr->smcd->lgr_lock; |
883 | lgr->smc_version = ini->smcd_version; |
884 | lgr->peer_shutdown = 0; |
885 | atomic_inc(v: &ini->ism_dev[ini->ism_selected]->lgr_cnt); |
886 | } else { |
887 | /* SMC-R specific settings */ |
888 | struct smc_ib_device *ibdev; |
889 | int ibport; |
890 | |
891 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
892 | lgr->smc_version = ini->smcr_version; |
893 | memcpy(lgr->peer_systemid, ini->peer_systemid, |
894 | SMC_SYSTEMID_LEN); |
895 | if (lgr->smc_version == SMC_V2) { |
896 | ibdev = ini->smcrv2.ib_dev_v2; |
897 | ibport = ini->smcrv2.ib_port_v2; |
898 | lgr->saddr = ini->smcrv2.saddr; |
899 | lgr->uses_gateway = ini->smcrv2.uses_gateway; |
900 | memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac, |
901 | ETH_ALEN); |
902 | lgr->max_conns = ini->max_conns; |
903 | lgr->max_links = ini->max_links; |
904 | } else { |
905 | ibdev = ini->ib_dev; |
906 | ibport = ini->ib_port; |
907 | lgr->max_conns = SMC_CONN_PER_LGR_MAX; |
908 | lgr->max_links = SMC_LINKS_ADD_LNK_MAX; |
909 | } |
910 | memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], |
911 | SMC_MAX_PNETID_LEN); |
912 | rc = smc_wr_alloc_lgr_mem(lgr); |
913 | if (rc) |
914 | goto free_wq; |
915 | smc_llc_lgr_init(lgr, smc); |
916 | |
917 | link_idx = SMC_SINGLE_LINK; |
918 | lnk = &lgr->lnk[link_idx]; |
919 | rc = smcr_link_init(lgr, lnk, link_idx, ini); |
920 | if (rc) { |
921 | smc_wr_free_lgr_mem(lgr); |
922 | goto free_wq; |
923 | } |
924 | lgr->net = smc_ib_net(smcibdev: lnk->smcibdev); |
925 | lgr_list = &smc_lgr_list.list; |
926 | lgr_lock = &smc_lgr_list.lock; |
927 | lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type; |
928 | atomic_inc(v: &lgr_cnt); |
929 | } |
930 | smc->conn.lgr = lgr; |
931 | spin_lock_bh(lock: lgr_lock); |
932 | list_add_tail(new: &lgr->list, head: lgr_list); |
933 | spin_unlock_bh(lock: lgr_lock); |
934 | return 0; |
935 | |
936 | free_wq: |
937 | destroy_workqueue(wq: lgr->tx_wq); |
938 | free_lgr: |
939 | kfree(objp: lgr); |
940 | ism_put_vlan: |
941 | if (ini->is_smcd && ini->vlan_id) |
942 | smc_ism_put_vlan(dev: ini->ism_dev[ini->ism_selected], vlan_id: ini->vlan_id); |
943 | out: |
944 | if (rc < 0) { |
945 | if (rc == -ENOMEM) |
946 | rc = SMC_CLC_DECL_MEM; |
947 | else |
948 | rc = SMC_CLC_DECL_INTERR; |
949 | } |
950 | return rc; |
951 | } |
952 | |
953 | static int smc_write_space(struct smc_connection *conn) |
954 | { |
955 | int buffer_len = conn->peer_rmbe_size; |
956 | union smc_host_cursor prod; |
957 | union smc_host_cursor cons; |
958 | int space; |
959 | |
960 | smc_curs_copy(tgt: &prod, src: &conn->local_tx_ctrl.prod, conn); |
961 | smc_curs_copy(tgt: &cons, src: &conn->local_rx_ctrl.cons, conn); |
962 | /* determine rx_buf space */ |
963 | space = buffer_len - smc_curs_diff(size: buffer_len, old: &cons, new: &prod); |
964 | return space; |
965 | } |
966 | |
967 | static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, |
968 | struct smc_wr_buf *wr_buf) |
969 | { |
970 | struct smc_connection *conn = &smc->conn; |
971 | union smc_host_cursor cons, fin; |
972 | int rc = 0; |
973 | int diff; |
974 | |
975 | smc_curs_copy(tgt: &conn->tx_curs_sent, src: &conn->tx_curs_fin, conn); |
976 | smc_curs_copy(tgt: &fin, src: &conn->local_tx_ctrl_fin, conn); |
977 | /* set prod cursor to old state, enforce tx_rdma_writes() */ |
978 | smc_curs_copy(tgt: &conn->local_tx_ctrl.prod, src: &fin, conn); |
979 | smc_curs_copy(tgt: &cons, src: &conn->local_rx_ctrl.cons, conn); |
980 | |
981 | if (smc_curs_comp(size: conn->peer_rmbe_size, old: &cons, new: &fin) < 0) { |
982 | /* cons cursor advanced more than fin, and prod was set |
983 | * fin above, so now prod is smaller than cons. Fix that. |
984 | */ |
985 | diff = smc_curs_diff(size: conn->peer_rmbe_size, old: &fin, new: &cons); |
986 | smc_curs_add(size: conn->sndbuf_desc->len, |
987 | curs: &conn->tx_curs_sent, value: diff); |
988 | smc_curs_add(size: conn->sndbuf_desc->len, |
989 | curs: &conn->tx_curs_fin, value: diff); |
990 | |
991 | smp_mb__before_atomic(); |
992 | atomic_add(i: diff, v: &conn->sndbuf_space); |
993 | smp_mb__after_atomic(); |
994 | |
995 | smc_curs_add(size: conn->peer_rmbe_size, |
996 | curs: &conn->local_tx_ctrl.prod, value: diff); |
997 | smc_curs_add(size: conn->peer_rmbe_size, |
998 | curs: &conn->local_tx_ctrl_fin, value: diff); |
999 | } |
1000 | /* recalculate, value is used by tx_rdma_writes() */ |
1001 | atomic_set(v: &smc->conn.peer_rmbe_space, i: smc_write_space(conn)); |
1002 | |
1003 | if (smc->sk.sk_state != SMC_INIT && |
1004 | smc->sk.sk_state != SMC_CLOSED) { |
1005 | rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); |
1006 | if (!rc) { |
1007 | queue_delayed_work(wq: conn->lgr->tx_wq, dwork: &conn->tx_work, delay: 0); |
1008 | smc->sk.sk_data_ready(&smc->sk); |
1009 | } |
1010 | } else { |
1011 | smc_wr_tx_put_slot(link: conn->lnk, |
1012 | wr_pend_priv: (struct smc_wr_tx_pend_priv *)pend); |
1013 | } |
1014 | return rc; |
1015 | } |
1016 | |
1017 | void smc_switch_link_and_count(struct smc_connection *conn, |
1018 | struct smc_link *to_lnk) |
1019 | { |
1020 | atomic_dec(v: &conn->lnk->conn_cnt); |
1021 | /* link_hold in smc_conn_create() */ |
1022 | smcr_link_put(lnk: conn->lnk); |
1023 | conn->lnk = to_lnk; |
1024 | atomic_inc(v: &conn->lnk->conn_cnt); |
1025 | /* link_put in smc_conn_free() */ |
1026 | smcr_link_hold(lnk: conn->lnk); |
1027 | } |
1028 | |
1029 | struct smc_link *smc_switch_conns(struct smc_link_group *lgr, |
1030 | struct smc_link *from_lnk, bool is_dev_err) |
1031 | { |
1032 | struct smc_link *to_lnk = NULL; |
1033 | struct smc_cdc_tx_pend *pend; |
1034 | struct smc_connection *conn; |
1035 | struct smc_wr_buf *wr_buf; |
1036 | struct smc_sock *smc; |
1037 | struct rb_node *node; |
1038 | int i, rc = 0; |
1039 | |
1040 | /* link is inactive, wake up tx waiters */ |
1041 | smc_wr_wakeup_tx_wait(lnk: from_lnk); |
1042 | |
1043 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1044 | if (!smc_link_active(lnk: &lgr->lnk[i]) || i == from_lnk->link_idx) |
1045 | continue; |
1046 | if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && |
1047 | from_lnk->ibport == lgr->lnk[i].ibport) { |
1048 | continue; |
1049 | } |
1050 | to_lnk = &lgr->lnk[i]; |
1051 | break; |
1052 | } |
1053 | if (!to_lnk || !smc_wr_tx_link_hold(link: to_lnk)) { |
1054 | smc_lgr_terminate_sched(lgr); |
1055 | return NULL; |
1056 | } |
1057 | again: |
1058 | read_lock_bh(&lgr->conns_lock); |
1059 | for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { |
1060 | conn = rb_entry(node, struct smc_connection, alert_node); |
1061 | if (conn->lnk != from_lnk) |
1062 | continue; |
1063 | smc = container_of(conn, struct smc_sock, conn); |
1064 | /* conn->lnk not yet set in SMC_INIT state */ |
1065 | if (smc->sk.sk_state == SMC_INIT) |
1066 | continue; |
1067 | if (smc->sk.sk_state == SMC_CLOSED || |
1068 | smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || |
1069 | smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || |
1070 | smc->sk.sk_state == SMC_APPFINCLOSEWAIT || |
1071 | smc->sk.sk_state == SMC_APPCLOSEWAIT1 || |
1072 | smc->sk.sk_state == SMC_APPCLOSEWAIT2 || |
1073 | smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || |
1074 | smc->sk.sk_state == SMC_PEERABORTWAIT || |
1075 | smc->sk.sk_state == SMC_PROCESSABORT) { |
1076 | spin_lock_bh(lock: &conn->send_lock); |
1077 | smc_switch_link_and_count(conn, to_lnk); |
1078 | spin_unlock_bh(lock: &conn->send_lock); |
1079 | continue; |
1080 | } |
1081 | sock_hold(sk: &smc->sk); |
1082 | read_unlock_bh(&lgr->conns_lock); |
1083 | /* pre-fetch buffer outside of send_lock, might sleep */ |
1084 | rc = smc_cdc_get_free_slot(conn, link: to_lnk, wr_buf: &wr_buf, NULL, pend: &pend); |
1085 | if (rc) |
1086 | goto err_out; |
1087 | /* avoid race with smcr_tx_sndbuf_nonempty() */ |
1088 | spin_lock_bh(lock: &conn->send_lock); |
1089 | smc_switch_link_and_count(conn, to_lnk); |
1090 | rc = smc_switch_cursor(smc, pend, wr_buf); |
1091 | spin_unlock_bh(lock: &conn->send_lock); |
1092 | sock_put(sk: &smc->sk); |
1093 | if (rc) |
1094 | goto err_out; |
1095 | goto again; |
1096 | } |
1097 | read_unlock_bh(&lgr->conns_lock); |
1098 | smc_wr_tx_link_put(link: to_lnk); |
1099 | return to_lnk; |
1100 | |
1101 | err_out: |
1102 | smcr_link_down_cond_sched(lnk: to_lnk); |
1103 | smc_wr_tx_link_put(link: to_lnk); |
1104 | return NULL; |
1105 | } |
1106 | |
1107 | static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, |
1108 | struct smc_link_group *lgr) |
1109 | { |
1110 | struct rw_semaphore *lock; /* lock buffer list */ |
1111 | int rc; |
1112 | |
1113 | if (is_rmb && buf_desc->is_conf_rkey && !list_empty(head: &lgr->list)) { |
1114 | /* unregister rmb with peer */ |
1115 | rc = smc_llc_flow_initiate(lgr, type: SMC_LLC_FLOW_RKEY); |
1116 | if (!rc) { |
1117 | /* protect against smc_llc_cli_rkey_exchange() */ |
1118 | down_read(sem: &lgr->llc_conf_mutex); |
1119 | smc_llc_do_delete_rkey(lgr, rmb_desc: buf_desc); |
1120 | buf_desc->is_conf_rkey = false; |
1121 | up_read(sem: &lgr->llc_conf_mutex); |
1122 | smc_llc_flow_stop(lgr, flow: &lgr->llc_flow_lcl); |
1123 | } |
1124 | } |
1125 | |
1126 | if (buf_desc->is_reg_err) { |
1127 | /* buf registration failed, reuse not possible */ |
1128 | lock = is_rmb ? &lgr->rmbs_lock : |
1129 | &lgr->sndbufs_lock; |
1130 | down_write(sem: lock); |
1131 | list_del(entry: &buf_desc->list); |
1132 | up_write(sem: lock); |
1133 | |
1134 | smc_buf_free(lgr, is_rmb, buf_desc); |
1135 | } else { |
1136 | /* memzero_explicit provides potential memory barrier semantics */ |
1137 | memzero_explicit(s: buf_desc->cpu_addr, count: buf_desc->len); |
1138 | WRITE_ONCE(buf_desc->used, 0); |
1139 | } |
1140 | } |
1141 | |
1142 | static void smc_buf_unuse(struct smc_connection *conn, |
1143 | struct smc_link_group *lgr) |
1144 | { |
1145 | if (conn->sndbuf_desc) { |
1146 | if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) { |
1147 | smcr_buf_unuse(buf_desc: conn->sndbuf_desc, is_rmb: false, lgr); |
1148 | } else { |
1149 | memzero_explicit(s: conn->sndbuf_desc->cpu_addr, count: conn->sndbuf_desc->len); |
1150 | WRITE_ONCE(conn->sndbuf_desc->used, 0); |
1151 | } |
1152 | } |
1153 | if (conn->rmb_desc) { |
1154 | if (!lgr->is_smcd) { |
1155 | smcr_buf_unuse(buf_desc: conn->rmb_desc, is_rmb: true, lgr); |
1156 | } else { |
1157 | memzero_explicit(s: conn->rmb_desc->cpu_addr, |
1158 | count: conn->rmb_desc->len + sizeof(struct smcd_cdc_msg)); |
1159 | WRITE_ONCE(conn->rmb_desc->used, 0); |
1160 | } |
1161 | } |
1162 | } |
1163 | |
1164 | /* remove a finished connection from its link group */ |
1165 | void smc_conn_free(struct smc_connection *conn) |
1166 | { |
1167 | struct smc_link_group *lgr = conn->lgr; |
1168 | |
1169 | if (!lgr || conn->freed) |
1170 | /* Connection has never been registered in a |
1171 | * link group, or has already been freed. |
1172 | */ |
1173 | return; |
1174 | |
1175 | conn->freed = 1; |
1176 | if (!smc_conn_lgr_valid(conn)) |
1177 | /* Connection has already unregistered from |
1178 | * link group. |
1179 | */ |
1180 | goto lgr_put; |
1181 | |
1182 | if (lgr->is_smcd) { |
1183 | if (!list_empty(head: &lgr->list)) |
1184 | smc_ism_unset_conn(conn); |
1185 | tasklet_kill(t: &conn->rx_tsklet); |
1186 | } else { |
1187 | smc_cdc_wait_pend_tx_wr(conn); |
1188 | if (current_work() != &conn->abort_work) |
1189 | cancel_work_sync(work: &conn->abort_work); |
1190 | } |
1191 | if (!list_empty(head: &lgr->list)) { |
1192 | smc_buf_unuse(conn, lgr); /* allow buffer reuse */ |
1193 | smc_lgr_unregister_conn(conn); |
1194 | } |
1195 | |
1196 | if (!lgr->conns_num) |
1197 | smc_lgr_schedule_free_work(lgr); |
1198 | lgr_put: |
1199 | if (!lgr->is_smcd) |
1200 | smcr_link_put(lnk: conn->lnk); /* link_hold in smc_conn_create() */ |
1201 | smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ |
1202 | } |
1203 | |
1204 | /* unregister a link from a buf_desc */ |
1205 | static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, |
1206 | struct smc_link *lnk) |
1207 | { |
1208 | if (is_rmb || buf_desc->is_vm) |
1209 | buf_desc->is_reg_mr[lnk->link_idx] = false; |
1210 | if (!buf_desc->is_map_ib[lnk->link_idx]) |
1211 | return; |
1212 | |
1213 | if ((is_rmb || buf_desc->is_vm) && |
1214 | buf_desc->mr[lnk->link_idx]) { |
1215 | smc_ib_put_memory_region(mr: buf_desc->mr[lnk->link_idx]); |
1216 | buf_desc->mr[lnk->link_idx] = NULL; |
1217 | } |
1218 | if (is_rmb) |
1219 | smc_ib_buf_unmap_sg(lnk, buf_slot: buf_desc, data_direction: DMA_FROM_DEVICE); |
1220 | else |
1221 | smc_ib_buf_unmap_sg(lnk, buf_slot: buf_desc, data_direction: DMA_TO_DEVICE); |
1222 | |
1223 | sg_free_table(&buf_desc->sgt[lnk->link_idx]); |
1224 | buf_desc->is_map_ib[lnk->link_idx] = false; |
1225 | } |
1226 | |
1227 | /* unmap all buffers of lgr for a deleted link */ |
1228 | static void smcr_buf_unmap_lgr(struct smc_link *lnk) |
1229 | { |
1230 | struct smc_link_group *lgr = lnk->lgr; |
1231 | struct smc_buf_desc *buf_desc, *bf; |
1232 | int i; |
1233 | |
1234 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
1235 | down_write(sem: &lgr->rmbs_lock); |
1236 | list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) |
1237 | smcr_buf_unmap_link(buf_desc, is_rmb: true, lnk); |
1238 | up_write(sem: &lgr->rmbs_lock); |
1239 | |
1240 | down_write(sem: &lgr->sndbufs_lock); |
1241 | list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], |
1242 | list) |
1243 | smcr_buf_unmap_link(buf_desc, is_rmb: false, lnk); |
1244 | up_write(sem: &lgr->sndbufs_lock); |
1245 | } |
1246 | } |
1247 | |
1248 | static void smcr_rtoken_clear_link(struct smc_link *lnk) |
1249 | { |
1250 | struct smc_link_group *lgr = lnk->lgr; |
1251 | int i; |
1252 | |
1253 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
1254 | lgr->rtokens[i][lnk->link_idx].rkey = 0; |
1255 | lgr->rtokens[i][lnk->link_idx].dma_addr = 0; |
1256 | } |
1257 | } |
1258 | |
1259 | static void __smcr_link_clear(struct smc_link *lnk) |
1260 | { |
1261 | struct smc_link_group *lgr = lnk->lgr; |
1262 | struct smc_ib_device *smcibdev; |
1263 | |
1264 | smc_wr_free_link_mem(lnk); |
1265 | smc_ibdev_cnt_dec(lnk); |
1266 | put_device(dev: &lnk->smcibdev->ibdev->dev); |
1267 | smcibdev = lnk->smcibdev; |
1268 | memset(lnk, 0, sizeof(struct smc_link)); |
1269 | lnk->state = SMC_LNK_UNUSED; |
1270 | if (!atomic_dec_return(v: &smcibdev->lnk_cnt)) |
1271 | wake_up(&smcibdev->lnks_deleted); |
1272 | smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ |
1273 | } |
1274 | |
1275 | /* must be called under lgr->llc_conf_mutex lock */ |
1276 | void smcr_link_clear(struct smc_link *lnk, bool log) |
1277 | { |
1278 | if (!lnk->lgr || lnk->clearing || |
1279 | lnk->state == SMC_LNK_UNUSED) |
1280 | return; |
1281 | lnk->clearing = 1; |
1282 | lnk->peer_qpn = 0; |
1283 | smc_llc_link_clear(link: lnk, log); |
1284 | smcr_buf_unmap_lgr(lnk); |
1285 | smcr_rtoken_clear_link(lnk); |
1286 | smc_ib_modify_qp_error(lnk); |
1287 | smc_wr_free_link(lnk); |
1288 | smc_ib_destroy_queue_pair(lnk); |
1289 | smc_ib_dealloc_protection_domain(lnk); |
1290 | smcr_link_put(lnk); /* theoretically last link_put */ |
1291 | } |
1292 | |
1293 | void smcr_link_hold(struct smc_link *lnk) |
1294 | { |
1295 | refcount_inc(r: &lnk->refcnt); |
1296 | } |
1297 | |
1298 | void smcr_link_put(struct smc_link *lnk) |
1299 | { |
1300 | if (refcount_dec_and_test(r: &lnk->refcnt)) |
1301 | __smcr_link_clear(lnk); |
1302 | } |
1303 | |
1304 | static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, |
1305 | struct smc_buf_desc *buf_desc) |
1306 | { |
1307 | int i; |
1308 | |
1309 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) |
1310 | smcr_buf_unmap_link(buf_desc, is_rmb, lnk: &lgr->lnk[i]); |
1311 | |
1312 | if (!buf_desc->is_vm && buf_desc->pages) |
1313 | __free_pages(page: buf_desc->pages, order: buf_desc->order); |
1314 | else if (buf_desc->is_vm && buf_desc->cpu_addr) |
1315 | vfree(addr: buf_desc->cpu_addr); |
1316 | kfree(objp: buf_desc); |
1317 | } |
1318 | |
1319 | static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, |
1320 | struct smc_buf_desc *buf_desc) |
1321 | { |
1322 | if (is_dmb) { |
1323 | /* restore original buf len */ |
1324 | buf_desc->len += sizeof(struct smcd_cdc_msg); |
1325 | smc_ism_unregister_dmb(dev: lgr->smcd, dmb_desc: buf_desc); |
1326 | } else { |
1327 | kfree(objp: buf_desc->cpu_addr); |
1328 | } |
1329 | kfree(objp: buf_desc); |
1330 | } |
1331 | |
1332 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
1333 | struct smc_buf_desc *buf_desc) |
1334 | { |
1335 | if (lgr->is_smcd) |
1336 | smcd_buf_free(lgr, is_dmb: is_rmb, buf_desc); |
1337 | else |
1338 | smcr_buf_free(lgr, is_rmb, buf_desc); |
1339 | } |
1340 | |
1341 | static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) |
1342 | { |
1343 | struct smc_buf_desc *buf_desc, *bf_desc; |
1344 | struct list_head *buf_list; |
1345 | int i; |
1346 | |
1347 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
1348 | if (is_rmb) |
1349 | buf_list = &lgr->rmbs[i]; |
1350 | else |
1351 | buf_list = &lgr->sndbufs[i]; |
1352 | list_for_each_entry_safe(buf_desc, bf_desc, buf_list, |
1353 | list) { |
1354 | list_del(entry: &buf_desc->list); |
1355 | smc_buf_free(lgr, is_rmb, buf_desc); |
1356 | } |
1357 | } |
1358 | } |
1359 | |
1360 | static void smc_lgr_free_bufs(struct smc_link_group *lgr) |
1361 | { |
1362 | /* free send buffers */ |
1363 | __smc_lgr_free_bufs(lgr, is_rmb: false); |
1364 | /* free rmbs */ |
1365 | __smc_lgr_free_bufs(lgr, is_rmb: true); |
1366 | } |
1367 | |
1368 | /* won't be freed until no one accesses to lgr anymore */ |
1369 | static void __smc_lgr_free(struct smc_link_group *lgr) |
1370 | { |
1371 | smc_lgr_free_bufs(lgr); |
1372 | if (lgr->is_smcd) { |
1373 | if (!atomic_dec_return(v: &lgr->smcd->lgr_cnt)) |
1374 | wake_up(&lgr->smcd->lgrs_deleted); |
1375 | } else { |
1376 | smc_wr_free_lgr_mem(lgr); |
1377 | if (!atomic_dec_return(v: &lgr_cnt)) |
1378 | wake_up(&lgrs_deleted); |
1379 | } |
1380 | kfree(objp: lgr); |
1381 | } |
1382 | |
1383 | /* remove a link group */ |
1384 | static void smc_lgr_free(struct smc_link_group *lgr) |
1385 | { |
1386 | int i; |
1387 | |
1388 | if (!lgr->is_smcd) { |
1389 | down_write(sem: &lgr->llc_conf_mutex); |
1390 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1391 | if (lgr->lnk[i].state != SMC_LNK_UNUSED) |
1392 | smcr_link_clear(lnk: &lgr->lnk[i], log: false); |
1393 | } |
1394 | up_write(sem: &lgr->llc_conf_mutex); |
1395 | smc_llc_lgr_clear(lgr); |
1396 | } |
1397 | |
1398 | destroy_workqueue(wq: lgr->tx_wq); |
1399 | if (lgr->is_smcd) { |
1400 | smc_ism_put_vlan(dev: lgr->smcd, vlan_id: lgr->vlan_id); |
1401 | put_device(dev: lgr->smcd->ops->get_dev(lgr->smcd)); |
1402 | } |
1403 | smc_lgr_put(lgr); /* theoretically last lgr_put */ |
1404 | } |
1405 | |
1406 | void smc_lgr_hold(struct smc_link_group *lgr) |
1407 | { |
1408 | refcount_inc(r: &lgr->refcnt); |
1409 | } |
1410 | |
1411 | void smc_lgr_put(struct smc_link_group *lgr) |
1412 | { |
1413 | if (refcount_dec_and_test(r: &lgr->refcnt)) |
1414 | __smc_lgr_free(lgr); |
1415 | } |
1416 | |
1417 | static void smc_sk_wake_ups(struct smc_sock *smc) |
1418 | { |
1419 | smc->sk.sk_write_space(&smc->sk); |
1420 | smc->sk.sk_data_ready(&smc->sk); |
1421 | smc->sk.sk_state_change(&smc->sk); |
1422 | } |
1423 | |
1424 | /* kill a connection */ |
1425 | static void smc_conn_kill(struct smc_connection *conn, bool soft) |
1426 | { |
1427 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
1428 | |
1429 | if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) |
1430 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
1431 | else |
1432 | smc_close_abort(conn); |
1433 | conn->killed = 1; |
1434 | smc->sk.sk_err = ECONNABORTED; |
1435 | smc_sk_wake_ups(smc); |
1436 | if (conn->lgr->is_smcd) { |
1437 | smc_ism_unset_conn(conn); |
1438 | if (soft) |
1439 | tasklet_kill(t: &conn->rx_tsklet); |
1440 | else |
1441 | tasklet_unlock_wait(t: &conn->rx_tsklet); |
1442 | } else { |
1443 | smc_cdc_wait_pend_tx_wr(conn); |
1444 | } |
1445 | smc_lgr_unregister_conn(conn); |
1446 | smc_close_active_abort(smc); |
1447 | } |
1448 | |
1449 | static void smc_lgr_cleanup(struct smc_link_group *lgr) |
1450 | { |
1451 | if (lgr->is_smcd) { |
1452 | smc_ism_signal_shutdown(lgr); |
1453 | } else { |
1454 | u32 rsn = lgr->llc_termination_rsn; |
1455 | |
1456 | if (!rsn) |
1457 | rsn = SMC_LLC_DEL_PROG_INIT_TERM; |
1458 | smc_llc_send_link_delete_all(lgr, ord: false, rsn); |
1459 | smcr_lgr_link_deactivate_all(lgr); |
1460 | } |
1461 | } |
1462 | |
1463 | /* terminate link group |
1464 | * @soft: true if link group shutdown can take its time |
1465 | * false if immediate link group shutdown is required |
1466 | */ |
1467 | static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) |
1468 | { |
1469 | struct smc_connection *conn; |
1470 | struct smc_sock *smc; |
1471 | struct rb_node *node; |
1472 | |
1473 | if (lgr->terminating) |
1474 | return; /* lgr already terminating */ |
1475 | /* cancel free_work sync, will terminate when lgr->freeing is set */ |
1476 | cancel_delayed_work(dwork: &lgr->free_work); |
1477 | lgr->terminating = 1; |
1478 | |
1479 | /* kill remaining link group connections */ |
1480 | read_lock_bh(&lgr->conns_lock); |
1481 | node = rb_first(&lgr->conns_all); |
1482 | while (node) { |
1483 | read_unlock_bh(&lgr->conns_lock); |
1484 | conn = rb_entry(node, struct smc_connection, alert_node); |
1485 | smc = container_of(conn, struct smc_sock, conn); |
1486 | sock_hold(sk: &smc->sk); /* sock_put below */ |
1487 | lock_sock(sk: &smc->sk); |
1488 | smc_conn_kill(conn, soft); |
1489 | release_sock(sk: &smc->sk); |
1490 | sock_put(sk: &smc->sk); /* sock_hold above */ |
1491 | read_lock_bh(&lgr->conns_lock); |
1492 | node = rb_first(&lgr->conns_all); |
1493 | } |
1494 | read_unlock_bh(&lgr->conns_lock); |
1495 | smc_lgr_cleanup(lgr); |
1496 | smc_lgr_free(lgr); |
1497 | } |
1498 | |
1499 | /* unlink link group and schedule termination */ |
1500 | void smc_lgr_terminate_sched(struct smc_link_group *lgr) |
1501 | { |
1502 | spinlock_t *lgr_lock; |
1503 | |
1504 | smc_lgr_list_head(lgr, lgr_lock: &lgr_lock); |
1505 | spin_lock_bh(lock: lgr_lock); |
1506 | if (list_empty(head: &lgr->list) || lgr->terminating || lgr->freeing) { |
1507 | spin_unlock_bh(lock: lgr_lock); |
1508 | return; /* lgr already terminating */ |
1509 | } |
1510 | list_del_init(entry: &lgr->list); |
1511 | lgr->freeing = 1; |
1512 | spin_unlock_bh(lock: lgr_lock); |
1513 | schedule_work(work: &lgr->terminate_work); |
1514 | } |
1515 | |
1516 | /* Called when peer lgr shutdown (regularly or abnormally) is received */ |
1517 | void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) |
1518 | { |
1519 | struct smc_link_group *lgr, *l; |
1520 | LIST_HEAD(lgr_free_list); |
1521 | |
1522 | /* run common cleanup function and build free list */ |
1523 | spin_lock_bh(lock: &dev->lgr_lock); |
1524 | list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { |
1525 | if ((!peer_gid || lgr->peer_gid == peer_gid) && |
1526 | (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { |
1527 | if (peer_gid) /* peer triggered termination */ |
1528 | lgr->peer_shutdown = 1; |
1529 | list_move(list: &lgr->list, head: &lgr_free_list); |
1530 | lgr->freeing = 1; |
1531 | } |
1532 | } |
1533 | spin_unlock_bh(lock: &dev->lgr_lock); |
1534 | |
1535 | /* cancel the regular free workers and actually free lgrs */ |
1536 | list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { |
1537 | list_del_init(entry: &lgr->list); |
1538 | schedule_work(work: &lgr->terminate_work); |
1539 | } |
1540 | } |
1541 | |
1542 | /* Called when an SMCD device is removed or the smc module is unloaded */ |
1543 | void smc_smcd_terminate_all(struct smcd_dev *smcd) |
1544 | { |
1545 | struct smc_link_group *lgr, *lg; |
1546 | LIST_HEAD(lgr_free_list); |
1547 | |
1548 | spin_lock_bh(lock: &smcd->lgr_lock); |
1549 | list_splice_init(list: &smcd->lgr_list, head: &lgr_free_list); |
1550 | list_for_each_entry(lgr, &lgr_free_list, list) |
1551 | lgr->freeing = 1; |
1552 | spin_unlock_bh(lock: &smcd->lgr_lock); |
1553 | |
1554 | list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { |
1555 | list_del_init(entry: &lgr->list); |
1556 | __smc_lgr_terminate(lgr, soft: false); |
1557 | } |
1558 | |
1559 | if (atomic_read(v: &smcd->lgr_cnt)) |
1560 | wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); |
1561 | } |
1562 | |
1563 | /* Called when an SMCR device is removed or the smc module is unloaded. |
1564 | * If smcibdev is given, all SMCR link groups using this device are terminated. |
1565 | * If smcibdev is NULL, all SMCR link groups are terminated. |
1566 | */ |
1567 | void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) |
1568 | { |
1569 | struct smc_link_group *lgr, *lg; |
1570 | LIST_HEAD(lgr_free_list); |
1571 | int i; |
1572 | |
1573 | spin_lock_bh(lock: &smc_lgr_list.lock); |
1574 | if (!smcibdev) { |
1575 | list_splice_init(list: &smc_lgr_list.list, head: &lgr_free_list); |
1576 | list_for_each_entry(lgr, &lgr_free_list, list) |
1577 | lgr->freeing = 1; |
1578 | } else { |
1579 | list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { |
1580 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1581 | if (lgr->lnk[i].smcibdev == smcibdev) |
1582 | smcr_link_down_cond_sched(lnk: &lgr->lnk[i]); |
1583 | } |
1584 | } |
1585 | } |
1586 | spin_unlock_bh(lock: &smc_lgr_list.lock); |
1587 | |
1588 | list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { |
1589 | list_del_init(entry: &lgr->list); |
1590 | smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); |
1591 | __smc_lgr_terminate(lgr, soft: false); |
1592 | } |
1593 | |
1594 | if (smcibdev) { |
1595 | if (atomic_read(v: &smcibdev->lnk_cnt)) |
1596 | wait_event(smcibdev->lnks_deleted, |
1597 | !atomic_read(&smcibdev->lnk_cnt)); |
1598 | } else { |
1599 | if (atomic_read(v: &lgr_cnt)) |
1600 | wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); |
1601 | } |
1602 | } |
1603 | |
1604 | /* set new lgr type and clear all asymmetric link tagging */ |
1605 | void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) |
1606 | { |
1607 | char *lgr_type = "" ; |
1608 | int i; |
1609 | |
1610 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) |
1611 | if (smc_link_usable(lnk: &lgr->lnk[i])) |
1612 | lgr->lnk[i].link_is_asym = false; |
1613 | if (lgr->type == new_type) |
1614 | return; |
1615 | lgr->type = new_type; |
1616 | |
1617 | switch (lgr->type) { |
1618 | case SMC_LGR_NONE: |
1619 | lgr_type = "NONE" ; |
1620 | break; |
1621 | case SMC_LGR_SINGLE: |
1622 | lgr_type = "SINGLE" ; |
1623 | break; |
1624 | case SMC_LGR_SYMMETRIC: |
1625 | lgr_type = "SYMMETRIC" ; |
1626 | break; |
1627 | case SMC_LGR_ASYMMETRIC_PEER: |
1628 | lgr_type = "ASYMMETRIC_PEER" ; |
1629 | break; |
1630 | case SMC_LGR_ASYMMETRIC_LOCAL: |
1631 | lgr_type = "ASYMMETRIC_LOCAL" ; |
1632 | break; |
1633 | } |
1634 | pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: " |
1635 | "%s, pnetid %.16s\n" , SMC_LGR_ID_SIZE, &lgr->id, |
1636 | lgr->net->net_cookie, lgr_type, lgr->pnet_id); |
1637 | } |
1638 | |
1639 | /* set new lgr type and tag a link as asymmetric */ |
1640 | void smcr_lgr_set_type_asym(struct smc_link_group *lgr, |
1641 | enum smc_lgr_type new_type, int asym_lnk_idx) |
1642 | { |
1643 | smcr_lgr_set_type(lgr, new_type); |
1644 | lgr->lnk[asym_lnk_idx].link_is_asym = true; |
1645 | } |
1646 | |
1647 | /* abort connection, abort_work scheduled from tasklet context */ |
1648 | static void smc_conn_abort_work(struct work_struct *work) |
1649 | { |
1650 | struct smc_connection *conn = container_of(work, |
1651 | struct smc_connection, |
1652 | abort_work); |
1653 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
1654 | |
1655 | lock_sock(sk: &smc->sk); |
1656 | smc_conn_kill(conn, soft: true); |
1657 | release_sock(sk: &smc->sk); |
1658 | sock_put(sk: &smc->sk); /* sock_hold done by schedulers of abort_work */ |
1659 | } |
1660 | |
1661 | void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) |
1662 | { |
1663 | struct smc_link_group *lgr, *n; |
1664 | |
1665 | spin_lock_bh(lock: &smc_lgr_list.lock); |
1666 | list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { |
1667 | struct smc_link *link; |
1668 | |
1669 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, |
1670 | SMC_MAX_PNETID_LEN) || |
1671 | lgr->type == SMC_LGR_SYMMETRIC || |
1672 | lgr->type == SMC_LGR_ASYMMETRIC_PEER || |
1673 | !rdma_dev_access_netns(device: smcibdev->ibdev, net: lgr->net)) |
1674 | continue; |
1675 | |
1676 | if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) |
1677 | continue; |
1678 | |
1679 | /* trigger local add link processing */ |
1680 | link = smc_llc_usable_link(lgr); |
1681 | if (link) |
1682 | smc_llc_add_link_local(link); |
1683 | } |
1684 | spin_unlock_bh(lock: &smc_lgr_list.lock); |
1685 | } |
1686 | |
1687 | /* link is down - switch connections to alternate link, |
1688 | * must be called under lgr->llc_conf_mutex lock |
1689 | */ |
1690 | static void smcr_link_down(struct smc_link *lnk) |
1691 | { |
1692 | struct smc_link_group *lgr = lnk->lgr; |
1693 | struct smc_link *to_lnk; |
1694 | int del_link_id; |
1695 | |
1696 | if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(head: &lgr->list)) |
1697 | return; |
1698 | |
1699 | to_lnk = smc_switch_conns(lgr, from_lnk: lnk, is_dev_err: true); |
1700 | if (!to_lnk) { /* no backup link available */ |
1701 | smcr_link_clear(lnk, log: true); |
1702 | return; |
1703 | } |
1704 | smcr_lgr_set_type(lgr, new_type: SMC_LGR_SINGLE); |
1705 | del_link_id = lnk->link_id; |
1706 | |
1707 | if (lgr->role == SMC_SERV) { |
1708 | /* trigger local delete link processing */ |
1709 | smc_llc_srv_delete_link_local(link: to_lnk, del_link_id); |
1710 | } else { |
1711 | if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { |
1712 | /* another llc task is ongoing */ |
1713 | up_write(sem: &lgr->llc_conf_mutex); |
1714 | wait_event_timeout(lgr->llc_flow_waiter, |
1715 | (list_empty(&lgr->list) || |
1716 | lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), |
1717 | SMC_LLC_WAIT_TIME); |
1718 | down_write(sem: &lgr->llc_conf_mutex); |
1719 | } |
1720 | if (!list_empty(head: &lgr->list)) { |
1721 | smc_llc_send_delete_link(link: to_lnk, link_del_id: del_link_id, |
1722 | reqresp: SMC_LLC_REQ, orderly: true, |
1723 | SMC_LLC_DEL_LOST_PATH); |
1724 | smcr_link_clear(lnk, log: true); |
1725 | } |
1726 | wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ |
1727 | } |
1728 | } |
1729 | |
1730 | /* must be called under lgr->llc_conf_mutex lock */ |
1731 | void smcr_link_down_cond(struct smc_link *lnk) |
1732 | { |
1733 | if (smc_link_downing(&lnk->state)) { |
1734 | trace_smcr_link_down(lnk, location: __builtin_return_address(0)); |
1735 | smcr_link_down(lnk); |
1736 | } |
1737 | } |
1738 | |
1739 | /* will get the lgr->llc_conf_mutex lock */ |
1740 | void smcr_link_down_cond_sched(struct smc_link *lnk) |
1741 | { |
1742 | if (smc_link_downing(&lnk->state)) { |
1743 | trace_smcr_link_down(lnk, location: __builtin_return_address(0)); |
1744 | schedule_work(work: &lnk->link_down_wrk); |
1745 | } |
1746 | } |
1747 | |
1748 | void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) |
1749 | { |
1750 | struct smc_link_group *lgr, *n; |
1751 | int i; |
1752 | |
1753 | list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { |
1754 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, |
1755 | SMC_MAX_PNETID_LEN)) |
1756 | continue; /* lgr is not affected */ |
1757 | if (list_empty(head: &lgr->list)) |
1758 | continue; |
1759 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1760 | struct smc_link *lnk = &lgr->lnk[i]; |
1761 | |
1762 | if (smc_link_usable(lnk) && |
1763 | lnk->smcibdev == smcibdev && lnk->ibport == ibport) |
1764 | smcr_link_down_cond_sched(lnk); |
1765 | } |
1766 | } |
1767 | } |
1768 | |
1769 | static void smc_link_down_work(struct work_struct *work) |
1770 | { |
1771 | struct smc_link *link = container_of(work, struct smc_link, |
1772 | link_down_wrk); |
1773 | struct smc_link_group *lgr = link->lgr; |
1774 | |
1775 | if (list_empty(head: &lgr->list)) |
1776 | return; |
1777 | wake_up_all(&lgr->llc_msg_waiter); |
1778 | down_write(sem: &lgr->llc_conf_mutex); |
1779 | smcr_link_down(lnk: link); |
1780 | up_write(sem: &lgr->llc_conf_mutex); |
1781 | } |
1782 | |
1783 | static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, |
1784 | struct netdev_nested_priv *priv) |
1785 | { |
1786 | unsigned short *vlan_id = (unsigned short *)priv->data; |
1787 | |
1788 | if (is_vlan_dev(dev: lower_dev)) { |
1789 | *vlan_id = vlan_dev_vlan_id(dev: lower_dev); |
1790 | return 1; |
1791 | } |
1792 | |
1793 | return 0; |
1794 | } |
1795 | |
1796 | /* Determine vlan of internal TCP socket. */ |
1797 | int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) |
1798 | { |
1799 | struct dst_entry *dst = sk_dst_get(sk: clcsock->sk); |
1800 | struct netdev_nested_priv priv; |
1801 | struct net_device *ndev; |
1802 | int rc = 0; |
1803 | |
1804 | ini->vlan_id = 0; |
1805 | if (!dst) { |
1806 | rc = -ENOTCONN; |
1807 | goto out; |
1808 | } |
1809 | if (!dst->dev) { |
1810 | rc = -ENODEV; |
1811 | goto out_rel; |
1812 | } |
1813 | |
1814 | ndev = dst->dev; |
1815 | if (is_vlan_dev(dev: ndev)) { |
1816 | ini->vlan_id = vlan_dev_vlan_id(dev: ndev); |
1817 | goto out_rel; |
1818 | } |
1819 | |
1820 | priv.data = (void *)&ini->vlan_id; |
1821 | rtnl_lock(); |
1822 | netdev_walk_all_lower_dev(dev: ndev, fn: smc_vlan_by_tcpsk_walk, priv: &priv); |
1823 | rtnl_unlock(); |
1824 | |
1825 | out_rel: |
1826 | dst_release(dst); |
1827 | out: |
1828 | return rc; |
1829 | } |
1830 | |
1831 | static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, |
1832 | u8 peer_systemid[], |
1833 | u8 peer_gid[], |
1834 | u8 peer_mac_v1[], |
1835 | enum smc_lgr_role role, u32 clcqpn, |
1836 | struct net *net) |
1837 | { |
1838 | struct smc_link *lnk; |
1839 | int i; |
1840 | |
1841 | if (memcmp(p: lgr->peer_systemid, q: peer_systemid, SMC_SYSTEMID_LEN) || |
1842 | lgr->role != role) |
1843 | return false; |
1844 | |
1845 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1846 | lnk = &lgr->lnk[i]; |
1847 | |
1848 | if (!smc_link_active(lnk)) |
1849 | continue; |
1850 | /* use verbs API to check netns, instead of lgr->net */ |
1851 | if (!rdma_dev_access_netns(device: lnk->smcibdev->ibdev, net)) |
1852 | return false; |
1853 | if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) && |
1854 | !memcmp(p: lnk->peer_gid, q: peer_gid, SMC_GID_SIZE) && |
1855 | (smcr_version == SMC_V2 || |
1856 | !memcmp(p: lnk->peer_mac, q: peer_mac_v1, ETH_ALEN))) |
1857 | return true; |
1858 | } |
1859 | return false; |
1860 | } |
1861 | |
1862 | static bool smcd_lgr_match(struct smc_link_group *lgr, |
1863 | struct smcd_dev *smcismdev, u64 peer_gid) |
1864 | { |
1865 | return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; |
1866 | } |
1867 | |
1868 | /* create a new SMC connection (and a new link group if necessary) */ |
1869 | int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) |
1870 | { |
1871 | struct smc_connection *conn = &smc->conn; |
1872 | struct net *net = sock_net(sk: &smc->sk); |
1873 | struct list_head *lgr_list; |
1874 | struct smc_link_group *lgr; |
1875 | enum smc_lgr_role role; |
1876 | spinlock_t *lgr_lock; |
1877 | int rc = 0; |
1878 | |
1879 | lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list : |
1880 | &smc_lgr_list.list; |
1881 | lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock : |
1882 | &smc_lgr_list.lock; |
1883 | ini->first_contact_local = 1; |
1884 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
1885 | if (role == SMC_CLNT && ini->first_contact_peer) |
1886 | /* create new link group as well */ |
1887 | goto create; |
1888 | |
1889 | /* determine if an existing link group can be reused */ |
1890 | spin_lock_bh(lock: lgr_lock); |
1891 | list_for_each_entry(lgr, lgr_list, list) { |
1892 | write_lock_bh(&lgr->conns_lock); |
1893 | if ((ini->is_smcd ? |
1894 | smcd_lgr_match(lgr, smcismdev: ini->ism_dev[ini->ism_selected], |
1895 | peer_gid: ini->ism_peer_gid[ini->ism_selected]) : |
1896 | smcr_lgr_match(lgr, smcr_version: ini->smcr_version, |
1897 | peer_systemid: ini->peer_systemid, |
1898 | peer_gid: ini->peer_gid, peer_mac_v1: ini->peer_mac, role, |
1899 | clcqpn: ini->ib_clcqpn, net)) && |
1900 | !lgr->sync_err && |
1901 | (ini->smcd_version == SMC_V2 || |
1902 | lgr->vlan_id == ini->vlan_id) && |
1903 | (role == SMC_CLNT || ini->is_smcd || |
1904 | (lgr->conns_num < lgr->max_conns && |
1905 | !bitmap_full(src: lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { |
1906 | /* link group found */ |
1907 | ini->first_contact_local = 0; |
1908 | conn->lgr = lgr; |
1909 | rc = smc_lgr_register_conn(conn, first: false); |
1910 | write_unlock_bh(&lgr->conns_lock); |
1911 | if (!rc && delayed_work_pending(&lgr->free_work)) |
1912 | cancel_delayed_work(dwork: &lgr->free_work); |
1913 | break; |
1914 | } |
1915 | write_unlock_bh(&lgr->conns_lock); |
1916 | } |
1917 | spin_unlock_bh(lock: lgr_lock); |
1918 | if (rc) |
1919 | return rc; |
1920 | |
1921 | if (role == SMC_CLNT && !ini->first_contact_peer && |
1922 | ini->first_contact_local) { |
1923 | /* Server reuses a link group, but Client wants to start |
1924 | * a new one |
1925 | * send out_of_sync decline, reason synchr. error |
1926 | */ |
1927 | return SMC_CLC_DECL_SYNCERR; |
1928 | } |
1929 | |
1930 | create: |
1931 | if (ini->first_contact_local) { |
1932 | rc = smc_lgr_create(smc, ini); |
1933 | if (rc) |
1934 | goto out; |
1935 | lgr = conn->lgr; |
1936 | write_lock_bh(&lgr->conns_lock); |
1937 | rc = smc_lgr_register_conn(conn, first: true); |
1938 | write_unlock_bh(&lgr->conns_lock); |
1939 | if (rc) { |
1940 | smc_lgr_cleanup_early(lgr); |
1941 | goto out; |
1942 | } |
1943 | } |
1944 | smc_lgr_hold(lgr: conn->lgr); /* lgr_put in smc_conn_free() */ |
1945 | if (!conn->lgr->is_smcd) |
1946 | smcr_link_hold(lnk: conn->lnk); /* link_put in smc_conn_free() */ |
1947 | conn->freed = 0; |
1948 | conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; |
1949 | conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; |
1950 | conn->urg_state = SMC_URG_READ; |
1951 | init_waitqueue_head(&conn->cdc_pend_tx_wq); |
1952 | INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); |
1953 | if (ini->is_smcd) { |
1954 | conn->rx_off = sizeof(struct smcd_cdc_msg); |
1955 | smcd_cdc_rx_init(conn); /* init tasklet for this conn */ |
1956 | } else { |
1957 | conn->rx_off = 0; |
1958 | } |
1959 | #ifndef KERNEL_HAS_ATOMIC64 |
1960 | spin_lock_init(&conn->acurs_lock); |
1961 | #endif |
1962 | |
1963 | out: |
1964 | return rc; |
1965 | } |
1966 | |
1967 | #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ |
1968 | #define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */ |
1969 | |
1970 | /* convert the RMB size into the compressed notation (minimum 16K, see |
1971 | * SMCD/R_DMBE_SIZES. |
1972 | * In contrast to plain ilog2, this rounds towards the next power of 2, |
1973 | * so the socket application gets at least its desired sndbuf / rcvbuf size. |
1974 | */ |
1975 | static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) |
1976 | { |
1977 | const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE; |
1978 | u8 compressed; |
1979 | |
1980 | if (size <= SMC_BUF_MIN_SIZE) |
1981 | return 0; |
1982 | |
1983 | size = (size - 1) >> 14; /* convert to 16K multiple */ |
1984 | compressed = min_t(u8, ilog2(size) + 1, |
1985 | is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES); |
1986 | |
1987 | if (!is_smcd && is_rmb) |
1988 | /* RMBs are backed by & limited to max size of scatterlists */ |
1989 | compressed = min_t(u8, compressed, ilog2(max_scat >> 14)); |
1990 | |
1991 | return compressed; |
1992 | } |
1993 | |
1994 | /* convert the RMB size from compressed notation into integer */ |
1995 | int smc_uncompress_bufsize(u8 compressed) |
1996 | { |
1997 | u32 size; |
1998 | |
1999 | size = 0x00000001 << (((int)compressed) + 14); |
2000 | return (int)size; |
2001 | } |
2002 | |
2003 | /* try to reuse a sndbuf or rmb description slot for a certain |
2004 | * buffer size; if not available, return NULL |
2005 | */ |
2006 | static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, |
2007 | struct rw_semaphore *lock, |
2008 | struct list_head *buf_list) |
2009 | { |
2010 | struct smc_buf_desc *buf_slot; |
2011 | |
2012 | down_read(sem: lock); |
2013 | list_for_each_entry(buf_slot, buf_list, list) { |
2014 | if (cmpxchg(&buf_slot->used, 0, 1) == 0) { |
2015 | up_read(sem: lock); |
2016 | return buf_slot; |
2017 | } |
2018 | } |
2019 | up_read(sem: lock); |
2020 | return NULL; |
2021 | } |
2022 | |
2023 | /* one of the conditions for announcing a receiver's current window size is |
2024 | * that it "results in a minimum increase in the window size of 10% of the |
2025 | * receive buffer space" [RFC7609] |
2026 | */ |
2027 | static inline int smc_rmb_wnd_update_limit(int rmbe_size) |
2028 | { |
2029 | return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); |
2030 | } |
2031 | |
2032 | /* map an buf to a link */ |
2033 | static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, |
2034 | struct smc_link *lnk) |
2035 | { |
2036 | int rc, i, nents, offset, buf_size, size, access_flags; |
2037 | struct scatterlist *sg; |
2038 | void *buf; |
2039 | |
2040 | if (buf_desc->is_map_ib[lnk->link_idx]) |
2041 | return 0; |
2042 | |
2043 | if (buf_desc->is_vm) { |
2044 | buf = buf_desc->cpu_addr; |
2045 | buf_size = buf_desc->len; |
2046 | offset = offset_in_page(buf_desc->cpu_addr); |
2047 | nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE; |
2048 | } else { |
2049 | nents = 1; |
2050 | } |
2051 | |
2052 | rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL); |
2053 | if (rc) |
2054 | return rc; |
2055 | |
2056 | if (buf_desc->is_vm) { |
2057 | /* virtually contiguous buffer */ |
2058 | for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) { |
2059 | size = min_t(int, PAGE_SIZE - offset, buf_size); |
2060 | sg_set_page(sg, page: vmalloc_to_page(addr: buf), len: size, offset); |
2061 | buf += size / sizeof(*buf); |
2062 | buf_size -= size; |
2063 | offset = 0; |
2064 | } |
2065 | } else { |
2066 | /* physically contiguous buffer */ |
2067 | sg_set_buf(sg: buf_desc->sgt[lnk->link_idx].sgl, |
2068 | buf: buf_desc->cpu_addr, buflen: buf_desc->len); |
2069 | } |
2070 | |
2071 | /* map sg table to DMA address */ |
2072 | rc = smc_ib_buf_map_sg(lnk, buf_slot: buf_desc, |
2073 | data_direction: is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
2074 | /* SMC protocol depends on mapping to one DMA address only */ |
2075 | if (rc != nents) { |
2076 | rc = -EAGAIN; |
2077 | goto free_table; |
2078 | } |
2079 | |
2080 | buf_desc->is_dma_need_sync |= |
2081 | smc_ib_is_sg_need_sync(lnk, buf_slot: buf_desc) << lnk->link_idx; |
2082 | |
2083 | if (is_rmb || buf_desc->is_vm) { |
2084 | /* create a new memory region for the RMB or vzalloced sndbuf */ |
2085 | access_flags = is_rmb ? |
2086 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
2087 | IB_ACCESS_LOCAL_WRITE; |
2088 | |
2089 | rc = smc_ib_get_memory_region(pd: lnk->roce_pd, access_flags, |
2090 | buf_slot: buf_desc, link_idx: lnk->link_idx); |
2091 | if (rc) |
2092 | goto buf_unmap; |
2093 | smc_ib_sync_sg_for_device(lnk, buf_slot: buf_desc, |
2094 | data_direction: is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
2095 | } |
2096 | buf_desc->is_map_ib[lnk->link_idx] = true; |
2097 | return 0; |
2098 | |
2099 | buf_unmap: |
2100 | smc_ib_buf_unmap_sg(lnk, buf_slot: buf_desc, |
2101 | data_direction: is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
2102 | free_table: |
2103 | sg_free_table(&buf_desc->sgt[lnk->link_idx]); |
2104 | return rc; |
2105 | } |
2106 | |
2107 | /* register a new buf on IB device, rmb or vzalloced sndbuf |
2108 | * must be called under lgr->llc_conf_mutex lock |
2109 | */ |
2110 | int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc) |
2111 | { |
2112 | if (list_empty(head: &link->lgr->list)) |
2113 | return -ENOLINK; |
2114 | if (!buf_desc->is_reg_mr[link->link_idx]) { |
2115 | /* register memory region for new buf */ |
2116 | if (buf_desc->is_vm) |
2117 | buf_desc->mr[link->link_idx]->iova = |
2118 | (uintptr_t)buf_desc->cpu_addr; |
2119 | if (smc_wr_reg_send(link, mr: buf_desc->mr[link->link_idx])) { |
2120 | buf_desc->is_reg_err = true; |
2121 | return -EFAULT; |
2122 | } |
2123 | buf_desc->is_reg_mr[link->link_idx] = true; |
2124 | } |
2125 | return 0; |
2126 | } |
2127 | |
2128 | static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock, |
2129 | struct list_head *lst, bool is_rmb) |
2130 | { |
2131 | struct smc_buf_desc *buf_desc, *bf; |
2132 | int rc = 0; |
2133 | |
2134 | down_write(sem: lock); |
2135 | list_for_each_entry_safe(buf_desc, bf, lst, list) { |
2136 | if (!buf_desc->used) |
2137 | continue; |
2138 | rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); |
2139 | if (rc) |
2140 | goto out; |
2141 | } |
2142 | out: |
2143 | up_write(sem: lock); |
2144 | return rc; |
2145 | } |
2146 | |
2147 | /* map all used buffers of lgr for a new link */ |
2148 | int smcr_buf_map_lgr(struct smc_link *lnk) |
2149 | { |
2150 | struct smc_link_group *lgr = lnk->lgr; |
2151 | int i, rc = 0; |
2152 | |
2153 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
2154 | rc = _smcr_buf_map_lgr(lnk, lock: &lgr->rmbs_lock, |
2155 | lst: &lgr->rmbs[i], is_rmb: true); |
2156 | if (rc) |
2157 | return rc; |
2158 | rc = _smcr_buf_map_lgr(lnk, lock: &lgr->sndbufs_lock, |
2159 | lst: &lgr->sndbufs[i], is_rmb: false); |
2160 | if (rc) |
2161 | return rc; |
2162 | } |
2163 | return 0; |
2164 | } |
2165 | |
2166 | /* register all used buffers of lgr for a new link, |
2167 | * must be called under lgr->llc_conf_mutex lock |
2168 | */ |
2169 | int smcr_buf_reg_lgr(struct smc_link *lnk) |
2170 | { |
2171 | struct smc_link_group *lgr = lnk->lgr; |
2172 | struct smc_buf_desc *buf_desc, *bf; |
2173 | int i, rc = 0; |
2174 | |
2175 | /* reg all RMBs for a new link */ |
2176 | down_write(sem: &lgr->rmbs_lock); |
2177 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
2178 | list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { |
2179 | if (!buf_desc->used) |
2180 | continue; |
2181 | rc = smcr_link_reg_buf(link: lnk, buf_desc); |
2182 | if (rc) { |
2183 | up_write(sem: &lgr->rmbs_lock); |
2184 | return rc; |
2185 | } |
2186 | } |
2187 | } |
2188 | up_write(sem: &lgr->rmbs_lock); |
2189 | |
2190 | if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) |
2191 | return rc; |
2192 | |
2193 | /* reg all vzalloced sndbufs for a new link */ |
2194 | down_write(sem: &lgr->sndbufs_lock); |
2195 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
2196 | list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) { |
2197 | if (!buf_desc->used || !buf_desc->is_vm) |
2198 | continue; |
2199 | rc = smcr_link_reg_buf(link: lnk, buf_desc); |
2200 | if (rc) { |
2201 | up_write(sem: &lgr->sndbufs_lock); |
2202 | return rc; |
2203 | } |
2204 | } |
2205 | } |
2206 | up_write(sem: &lgr->sndbufs_lock); |
2207 | return rc; |
2208 | } |
2209 | |
2210 | static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, |
2211 | bool is_rmb, int bufsize) |
2212 | { |
2213 | struct smc_buf_desc *buf_desc; |
2214 | |
2215 | /* try to alloc a new buffer */ |
2216 | buf_desc = kzalloc(size: sizeof(*buf_desc), GFP_KERNEL); |
2217 | if (!buf_desc) |
2218 | return ERR_PTR(error: -ENOMEM); |
2219 | |
2220 | switch (lgr->buf_type) { |
2221 | case SMCR_PHYS_CONT_BUFS: |
2222 | case SMCR_MIXED_BUFS: |
2223 | buf_desc->order = get_order(size: bufsize); |
2224 | buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | |
2225 | __GFP_NOMEMALLOC | __GFP_COMP | |
2226 | __GFP_NORETRY | __GFP_ZERO, |
2227 | order: buf_desc->order); |
2228 | if (buf_desc->pages) { |
2229 | buf_desc->cpu_addr = |
2230 | (void *)page_address(buf_desc->pages); |
2231 | buf_desc->len = bufsize; |
2232 | buf_desc->is_vm = false; |
2233 | break; |
2234 | } |
2235 | if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) |
2236 | goto out; |
2237 | fallthrough; // try virtually continguous buf |
2238 | case SMCR_VIRT_CONT_BUFS: |
2239 | buf_desc->order = get_order(size: bufsize); |
2240 | buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order); |
2241 | if (!buf_desc->cpu_addr) |
2242 | goto out; |
2243 | buf_desc->pages = NULL; |
2244 | buf_desc->len = bufsize; |
2245 | buf_desc->is_vm = true; |
2246 | break; |
2247 | } |
2248 | return buf_desc; |
2249 | |
2250 | out: |
2251 | kfree(objp: buf_desc); |
2252 | return ERR_PTR(error: -EAGAIN); |
2253 | } |
2254 | |
2255 | /* map buf_desc on all usable links, |
2256 | * unused buffers stay mapped as long as the link is up |
2257 | */ |
2258 | static int smcr_buf_map_usable_links(struct smc_link_group *lgr, |
2259 | struct smc_buf_desc *buf_desc, bool is_rmb) |
2260 | { |
2261 | int i, rc = 0, cnt = 0; |
2262 | |
2263 | /* protect against parallel link reconfiguration */ |
2264 | down_read(sem: &lgr->llc_conf_mutex); |
2265 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
2266 | struct smc_link *lnk = &lgr->lnk[i]; |
2267 | |
2268 | if (!smc_link_usable(lnk)) |
2269 | continue; |
2270 | if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { |
2271 | rc = -ENOMEM; |
2272 | goto out; |
2273 | } |
2274 | cnt++; |
2275 | } |
2276 | out: |
2277 | up_read(sem: &lgr->llc_conf_mutex); |
2278 | if (!rc && !cnt) |
2279 | rc = -EINVAL; |
2280 | return rc; |
2281 | } |
2282 | |
2283 | static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, |
2284 | bool is_dmb, int bufsize) |
2285 | { |
2286 | struct smc_buf_desc *buf_desc; |
2287 | int rc; |
2288 | |
2289 | /* try to alloc a new DMB */ |
2290 | buf_desc = kzalloc(size: sizeof(*buf_desc), GFP_KERNEL); |
2291 | if (!buf_desc) |
2292 | return ERR_PTR(error: -ENOMEM); |
2293 | if (is_dmb) { |
2294 | rc = smc_ism_register_dmb(lgr, buf_size: bufsize, dmb_desc: buf_desc); |
2295 | if (rc) { |
2296 | kfree(objp: buf_desc); |
2297 | if (rc == -ENOMEM) |
2298 | return ERR_PTR(error: -EAGAIN); |
2299 | if (rc == -ENOSPC) |
2300 | return ERR_PTR(error: -ENOSPC); |
2301 | return ERR_PTR(error: -EIO); |
2302 | } |
2303 | buf_desc->pages = virt_to_page(buf_desc->cpu_addr); |
2304 | /* CDC header stored in buf. So, pretend it was smaller */ |
2305 | buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); |
2306 | } else { |
2307 | buf_desc->cpu_addr = kzalloc(size: bufsize, GFP_KERNEL | |
2308 | __GFP_NOWARN | __GFP_NORETRY | |
2309 | __GFP_NOMEMALLOC); |
2310 | if (!buf_desc->cpu_addr) { |
2311 | kfree(objp: buf_desc); |
2312 | return ERR_PTR(error: -EAGAIN); |
2313 | } |
2314 | buf_desc->len = bufsize; |
2315 | } |
2316 | return buf_desc; |
2317 | } |
2318 | |
2319 | static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) |
2320 | { |
2321 | struct smc_buf_desc *buf_desc = ERR_PTR(error: -ENOMEM); |
2322 | struct smc_connection *conn = &smc->conn; |
2323 | struct smc_link_group *lgr = conn->lgr; |
2324 | struct list_head *buf_list; |
2325 | int bufsize, bufsize_comp; |
2326 | struct rw_semaphore *lock; /* lock buffer list */ |
2327 | bool is_dgraded = false; |
2328 | |
2329 | if (is_rmb) |
2330 | /* use socket recv buffer size (w/o overhead) as start value */ |
2331 | bufsize = smc->sk.sk_rcvbuf / 2; |
2332 | else |
2333 | /* use socket send buffer size (w/o overhead) as start value */ |
2334 | bufsize = smc->sk.sk_sndbuf / 2; |
2335 | |
2336 | for (bufsize_comp = smc_compress_bufsize(size: bufsize, is_smcd, is_rmb); |
2337 | bufsize_comp >= 0; bufsize_comp--) { |
2338 | if (is_rmb) { |
2339 | lock = &lgr->rmbs_lock; |
2340 | buf_list = &lgr->rmbs[bufsize_comp]; |
2341 | } else { |
2342 | lock = &lgr->sndbufs_lock; |
2343 | buf_list = &lgr->sndbufs[bufsize_comp]; |
2344 | } |
2345 | bufsize = smc_uncompress_bufsize(compressed: bufsize_comp); |
2346 | |
2347 | /* check for reusable slot in the link group */ |
2348 | buf_desc = smc_buf_get_slot(compressed_bufsize: bufsize_comp, lock, buf_list); |
2349 | if (buf_desc) { |
2350 | buf_desc->is_dma_need_sync = 0; |
2351 | SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); |
2352 | SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); |
2353 | break; /* found reusable slot */ |
2354 | } |
2355 | |
2356 | if (is_smcd) |
2357 | buf_desc = smcd_new_buf_create(lgr, is_dmb: is_rmb, bufsize); |
2358 | else |
2359 | buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); |
2360 | |
2361 | if (PTR_ERR(ptr: buf_desc) == -ENOMEM) |
2362 | break; |
2363 | if (IS_ERR(ptr: buf_desc)) { |
2364 | if (!is_dgraded) { |
2365 | is_dgraded = true; |
2366 | SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb); |
2367 | } |
2368 | continue; |
2369 | } |
2370 | |
2371 | SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb); |
2372 | SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); |
2373 | buf_desc->used = 1; |
2374 | down_write(sem: lock); |
2375 | list_add(new: &buf_desc->list, head: buf_list); |
2376 | up_write(sem: lock); |
2377 | break; /* found */ |
2378 | } |
2379 | |
2380 | if (IS_ERR(ptr: buf_desc)) |
2381 | return PTR_ERR(ptr: buf_desc); |
2382 | |
2383 | if (!is_smcd) { |
2384 | if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { |
2385 | smcr_buf_unuse(buf_desc, is_rmb, lgr); |
2386 | return -ENOMEM; |
2387 | } |
2388 | } |
2389 | |
2390 | if (is_rmb) { |
2391 | conn->rmb_desc = buf_desc; |
2392 | conn->rmbe_size_comp = bufsize_comp; |
2393 | smc->sk.sk_rcvbuf = bufsize * 2; |
2394 | atomic_set(v: &conn->bytes_to_rcv, i: 0); |
2395 | conn->rmbe_update_limit = |
2396 | smc_rmb_wnd_update_limit(rmbe_size: buf_desc->len); |
2397 | if (is_smcd) |
2398 | smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ |
2399 | } else { |
2400 | conn->sndbuf_desc = buf_desc; |
2401 | smc->sk.sk_sndbuf = bufsize * 2; |
2402 | atomic_set(v: &conn->sndbuf_space, i: bufsize); |
2403 | } |
2404 | return 0; |
2405 | } |
2406 | |
2407 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) |
2408 | { |
2409 | if (!conn->sndbuf_desc->is_dma_need_sync) |
2410 | return; |
2411 | if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || |
2412 | !smc_link_active(lnk: conn->lnk)) |
2413 | return; |
2414 | smc_ib_sync_sg_for_device(lnk: conn->lnk, buf_slot: conn->sndbuf_desc, data_direction: DMA_TO_DEVICE); |
2415 | } |
2416 | |
2417 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) |
2418 | { |
2419 | int i; |
2420 | |
2421 | if (!conn->rmb_desc->is_dma_need_sync) |
2422 | return; |
2423 | if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) |
2424 | return; |
2425 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
2426 | if (!smc_link_active(lnk: &conn->lgr->lnk[i])) |
2427 | continue; |
2428 | smc_ib_sync_sg_for_cpu(lnk: &conn->lgr->lnk[i], buf_slot: conn->rmb_desc, |
2429 | data_direction: DMA_FROM_DEVICE); |
2430 | } |
2431 | } |
2432 | |
2433 | /* create the send and receive buffer for an SMC socket; |
2434 | * receive buffers are called RMBs; |
2435 | * (even though the SMC protocol allows more than one RMB-element per RMB, |
2436 | * the Linux implementation uses just one RMB-element per RMB, i.e. uses an |
2437 | * extra RMB for every connection in a link group |
2438 | */ |
2439 | int smc_buf_create(struct smc_sock *smc, bool is_smcd) |
2440 | { |
2441 | int rc; |
2442 | |
2443 | /* create send buffer */ |
2444 | rc = __smc_buf_create(smc, is_smcd, is_rmb: false); |
2445 | if (rc) |
2446 | return rc; |
2447 | /* create rmb */ |
2448 | rc = __smc_buf_create(smc, is_smcd, is_rmb: true); |
2449 | if (rc) { |
2450 | down_write(sem: &smc->conn.lgr->sndbufs_lock); |
2451 | list_del(entry: &smc->conn.sndbuf_desc->list); |
2452 | up_write(sem: &smc->conn.lgr->sndbufs_lock); |
2453 | smc_buf_free(lgr: smc->conn.lgr, is_rmb: false, buf_desc: smc->conn.sndbuf_desc); |
2454 | smc->conn.sndbuf_desc = NULL; |
2455 | } |
2456 | return rc; |
2457 | } |
2458 | |
2459 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) |
2460 | { |
2461 | int i; |
2462 | |
2463 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { |
2464 | if (!test_and_set_bit(nr: i, addr: lgr->rtokens_used_mask)) |
2465 | return i; |
2466 | } |
2467 | return -ENOSPC; |
2468 | } |
2469 | |
2470 | static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, |
2471 | u32 rkey) |
2472 | { |
2473 | int i; |
2474 | |
2475 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
2476 | if (test_bit(i, lgr->rtokens_used_mask) && |
2477 | lgr->rtokens[i][lnk_idx].rkey == rkey) |
2478 | return i; |
2479 | } |
2480 | return -ENOENT; |
2481 | } |
2482 | |
2483 | /* set rtoken for a new link to an existing rmb */ |
2484 | void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, |
2485 | __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) |
2486 | { |
2487 | int rtok_idx; |
2488 | |
2489 | rtok_idx = smc_rtoken_find_by_link(lgr, lnk_idx: link_idx, ntohl(nw_rkey_known)); |
2490 | if (rtok_idx == -ENOENT) |
2491 | return; |
2492 | lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); |
2493 | lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); |
2494 | } |
2495 | |
2496 | /* set rtoken for a new link whose link_id is given */ |
2497 | void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, |
2498 | __be64 nw_vaddr, __be32 nw_rkey) |
2499 | { |
2500 | u64 dma_addr = be64_to_cpu(nw_vaddr); |
2501 | u32 rkey = ntohl(nw_rkey); |
2502 | bool found = false; |
2503 | int link_idx; |
2504 | |
2505 | for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { |
2506 | if (lgr->lnk[link_idx].link_id == link_id) { |
2507 | found = true; |
2508 | break; |
2509 | } |
2510 | } |
2511 | if (!found) |
2512 | return; |
2513 | lgr->rtokens[rtok_idx][link_idx].rkey = rkey; |
2514 | lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; |
2515 | } |
2516 | |
2517 | /* add a new rtoken from peer */ |
2518 | int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) |
2519 | { |
2520 | struct smc_link_group *lgr = smc_get_lgr(link: lnk); |
2521 | u64 dma_addr = be64_to_cpu(nw_vaddr); |
2522 | u32 rkey = ntohl(nw_rkey); |
2523 | int i; |
2524 | |
2525 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
2526 | if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && |
2527 | lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && |
2528 | test_bit(i, lgr->rtokens_used_mask)) { |
2529 | /* already in list */ |
2530 | return i; |
2531 | } |
2532 | } |
2533 | i = smc_rmb_reserve_rtoken_idx(lgr); |
2534 | if (i < 0) |
2535 | return i; |
2536 | lgr->rtokens[i][lnk->link_idx].rkey = rkey; |
2537 | lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; |
2538 | return i; |
2539 | } |
2540 | |
2541 | /* delete an rtoken from all links */ |
2542 | int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) |
2543 | { |
2544 | struct smc_link_group *lgr = smc_get_lgr(link: lnk); |
2545 | u32 rkey = ntohl(nw_rkey); |
2546 | int i, j; |
2547 | |
2548 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
2549 | if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && |
2550 | test_bit(i, lgr->rtokens_used_mask)) { |
2551 | for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { |
2552 | lgr->rtokens[i][j].rkey = 0; |
2553 | lgr->rtokens[i][j].dma_addr = 0; |
2554 | } |
2555 | clear_bit(nr: i, addr: lgr->rtokens_used_mask); |
2556 | return 0; |
2557 | } |
2558 | } |
2559 | return -ENOENT; |
2560 | } |
2561 | |
2562 | /* save rkey and dma_addr received from peer during clc handshake */ |
2563 | int smc_rmb_rtoken_handling(struct smc_connection *conn, |
2564 | struct smc_link *lnk, |
2565 | struct smc_clc_msg_accept_confirm *clc) |
2566 | { |
2567 | conn->rtoken_idx = smc_rtoken_add(lnk, nw_vaddr: clc->r0.rmb_dma_addr, |
2568 | nw_rkey: clc->r0.rmb_rkey); |
2569 | if (conn->rtoken_idx < 0) |
2570 | return conn->rtoken_idx; |
2571 | return 0; |
2572 | } |
2573 | |
2574 | static void smc_core_going_away(void) |
2575 | { |
2576 | struct smc_ib_device *smcibdev; |
2577 | struct smcd_dev *smcd; |
2578 | |
2579 | mutex_lock(&smc_ib_devices.mutex); |
2580 | list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { |
2581 | int i; |
2582 | |
2583 | for (i = 0; i < SMC_MAX_PORTS; i++) |
2584 | set_bit(nr: i, addr: smcibdev->ports_going_away); |
2585 | } |
2586 | mutex_unlock(lock: &smc_ib_devices.mutex); |
2587 | |
2588 | mutex_lock(&smcd_dev_list.mutex); |
2589 | list_for_each_entry(smcd, &smcd_dev_list.list, list) { |
2590 | smcd->going_away = 1; |
2591 | } |
2592 | mutex_unlock(lock: &smcd_dev_list.mutex); |
2593 | } |
2594 | |
2595 | /* Clean up all SMC link groups */ |
2596 | static void smc_lgrs_shutdown(void) |
2597 | { |
2598 | struct smcd_dev *smcd; |
2599 | |
2600 | smc_core_going_away(); |
2601 | |
2602 | smc_smcr_terminate_all(NULL); |
2603 | |
2604 | mutex_lock(&smcd_dev_list.mutex); |
2605 | list_for_each_entry(smcd, &smcd_dev_list.list, list) |
2606 | smc_smcd_terminate_all(smcd); |
2607 | mutex_unlock(lock: &smcd_dev_list.mutex); |
2608 | } |
2609 | |
2610 | static int smc_core_reboot_event(struct notifier_block *this, |
2611 | unsigned long event, void *ptr) |
2612 | { |
2613 | smc_lgrs_shutdown(); |
2614 | smc_ib_unregister_client(); |
2615 | smc_ism_exit(); |
2616 | return 0; |
2617 | } |
2618 | |
2619 | static struct notifier_block smc_reboot_notifier = { |
2620 | .notifier_call = smc_core_reboot_event, |
2621 | }; |
2622 | |
2623 | int __init smc_core_init(void) |
2624 | { |
2625 | return register_reboot_notifier(&smc_reboot_notifier); |
2626 | } |
2627 | |
2628 | /* Called (from smc_exit) when module is removed */ |
2629 | void smc_core_exit(void) |
2630 | { |
2631 | unregister_reboot_notifier(&smc_reboot_notifier); |
2632 | smc_lgrs_shutdown(); |
2633 | } |
2634 | |