1 | /* |
2 | * Copyright (c) 2009, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * |
4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU |
6 | * General Public License (GPL) Version 2, available from the file |
7 | * COPYING in the main directory of this source tree, or the |
8 | * OpenIB.org BSD license below: |
9 | * |
10 | * Redistribution and use in source and binary forms, with or |
11 | * without modification, are permitted provided that the following |
12 | * conditions are met: |
13 | * |
14 | * - Redistributions of source code must retain the above |
15 | * copyright notice, this list of conditions and the following |
16 | * disclaimer. |
17 | * |
18 | * - Redistributions in binary form must reproduce the above |
19 | * copyright notice, this list of conditions and the following |
20 | * disclaimer in the documentation and/or other materials |
21 | * provided with the distribution. |
22 | * |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
30 | * SOFTWARE. |
31 | * |
32 | */ |
33 | #include <linux/module.h> |
34 | #include <rdma/rdma_cm.h> |
35 | |
36 | #include "rds_single_path.h" |
37 | #include "rdma_transport.h" |
38 | #include "ib.h" |
39 | |
40 | /* Global IPv4 and IPv6 RDS RDMA listener cm_id */ |
41 | static struct rdma_cm_id *rds_rdma_listen_id; |
42 | #if IS_ENABLED(CONFIG_IPV6) |
43 | static struct rdma_cm_id *rds6_rdma_listen_id; |
44 | #endif |
45 | |
46 | /* Per IB specification 7.7.3, service level is a 4-bit field. */ |
47 | #define TOS_TO_SL(tos) ((tos) & 0xF) |
48 | |
49 | static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, |
50 | struct rdma_cm_event *event, |
51 | bool isv6) |
52 | { |
53 | /* this can be null in the listening path */ |
54 | struct rds_connection *conn = cm_id->context; |
55 | struct rds_transport *trans; |
56 | int ret = 0; |
57 | int *err; |
58 | u8 len; |
59 | |
60 | rdsdebug("conn %p id %p handling event %u (%s)\n" , conn, cm_id, |
61 | event->event, rdma_event_msg(event->event)); |
62 | |
63 | if (cm_id->device->node_type == RDMA_NODE_IB_CA) |
64 | trans = &rds_ib_transport; |
65 | |
66 | /* Prevent shutdown from tearing down the connection |
67 | * while we're executing. */ |
68 | if (conn) { |
69 | mutex_lock(&conn->c_cm_lock); |
70 | |
71 | /* If the connection is being shut down, bail out |
72 | * right away. We return 0 so cm_id doesn't get |
73 | * destroyed prematurely */ |
74 | if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) { |
75 | /* Reject incoming connections while we're tearing |
76 | * down an existing one. */ |
77 | if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) |
78 | ret = 1; |
79 | goto out; |
80 | } |
81 | } |
82 | |
83 | switch (event->event) { |
84 | case RDMA_CM_EVENT_CONNECT_REQUEST: |
85 | ret = trans->cm_handle_connect(cm_id, event, isv6); |
86 | break; |
87 | |
88 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
89 | if (conn) { |
90 | rdma_set_service_type(id: cm_id, tos: conn->c_tos); |
91 | rdma_set_min_rnr_timer(id: cm_id, min_rnr_timer: IB_RNR_TIMER_000_32); |
92 | /* XXX do we need to clean up if this fails? */ |
93 | ret = rdma_resolve_route(id: cm_id, |
94 | RDS_RDMA_RESOLVE_TIMEOUT_MS); |
95 | } |
96 | break; |
97 | |
98 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
99 | /* Connection could have been dropped so make sure the |
100 | * cm_id is valid before proceeding |
101 | */ |
102 | if (conn) { |
103 | struct rds_ib_connection *ibic; |
104 | |
105 | ibic = conn->c_transport_data; |
106 | if (ibic && ibic->i_cm_id == cm_id) { |
107 | cm_id->route.path_rec[0].sl = |
108 | TOS_TO_SL(conn->c_tos); |
109 | ret = trans->cm_initiate_connect(cm_id, isv6); |
110 | } else { |
111 | rds_conn_drop(conn); |
112 | } |
113 | } |
114 | break; |
115 | |
116 | case RDMA_CM_EVENT_ESTABLISHED: |
117 | if (conn) |
118 | trans->cm_connect_complete(conn, event); |
119 | break; |
120 | |
121 | case RDMA_CM_EVENT_REJECTED: |
122 | if (!conn) |
123 | break; |
124 | err = (int *)rdma_consumer_reject_data(id: cm_id, ev: event, data_len: &len); |
125 | if (!err || |
126 | (err && len >= sizeof(*err) && |
127 | ((*err) <= RDS_RDMA_REJ_INCOMPAT))) { |
128 | pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n" , |
129 | &conn->c_laddr, &conn->c_faddr); |
130 | |
131 | if (!conn->c_tos) |
132 | conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; |
133 | |
134 | rds_conn_drop(conn); |
135 | } |
136 | rdsdebug("Connection rejected: %s\n" , |
137 | rdma_reject_msg(cm_id, event->status)); |
138 | break; |
139 | case RDMA_CM_EVENT_ADDR_ERROR: |
140 | case RDMA_CM_EVENT_ROUTE_ERROR: |
141 | case RDMA_CM_EVENT_CONNECT_ERROR: |
142 | case RDMA_CM_EVENT_UNREACHABLE: |
143 | case RDMA_CM_EVENT_DEVICE_REMOVAL: |
144 | case RDMA_CM_EVENT_ADDR_CHANGE: |
145 | if (conn) |
146 | rds_conn_drop(conn); |
147 | break; |
148 | |
149 | case RDMA_CM_EVENT_DISCONNECTED: |
150 | if (!conn) |
151 | break; |
152 | rdsdebug("DISCONNECT event - dropping connection " |
153 | "%pI6c->%pI6c\n" , &conn->c_laddr, |
154 | &conn->c_faddr); |
155 | rds_conn_drop(conn); |
156 | break; |
157 | |
158 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: |
159 | if (conn) { |
160 | pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI6c->%pI6c\n" , |
161 | &conn->c_laddr, &conn->c_faddr); |
162 | rds_conn_drop(conn); |
163 | } |
164 | break; |
165 | |
166 | default: |
167 | /* things like device disconnect? */ |
168 | printk(KERN_ERR "RDS: unknown event %u (%s)!\n" , |
169 | event->event, rdma_event_msg(event->event)); |
170 | break; |
171 | } |
172 | |
173 | out: |
174 | if (conn) |
175 | mutex_unlock(lock: &conn->c_cm_lock); |
176 | |
177 | rdsdebug("id %p event %u (%s) handling ret %d\n" , cm_id, event->event, |
178 | rdma_event_msg(event->event), ret); |
179 | |
180 | return ret; |
181 | } |
182 | |
183 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, |
184 | struct rdma_cm_event *event) |
185 | { |
186 | return rds_rdma_cm_event_handler_cmn(cm_id, event, isv6: false); |
187 | } |
188 | |
189 | #if IS_ENABLED(CONFIG_IPV6) |
190 | int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id, |
191 | struct rdma_cm_event *event) |
192 | { |
193 | return rds_rdma_cm_event_handler_cmn(cm_id, event, isv6: true); |
194 | } |
195 | #endif |
196 | |
197 | static int rds_rdma_listen_init_common(rdma_cm_event_handler handler, |
198 | struct sockaddr *sa, |
199 | struct rdma_cm_id **ret_cm_id) |
200 | { |
201 | struct rdma_cm_id *cm_id; |
202 | int ret; |
203 | |
204 | cm_id = rdma_create_id(&init_net, handler, NULL, |
205 | RDMA_PS_TCP, IB_QPT_RC); |
206 | if (IS_ERR(ptr: cm_id)) { |
207 | ret = PTR_ERR(ptr: cm_id); |
208 | printk(KERN_ERR "RDS/RDMA: failed to setup listener, " |
209 | "rdma_create_id() returned %d\n" , ret); |
210 | return ret; |
211 | } |
212 | |
213 | /* |
214 | * XXX I bet this binds the cm_id to a device. If we want to support |
215 | * fail-over we'll have to take this into consideration. |
216 | */ |
217 | ret = rdma_bind_addr(id: cm_id, addr: sa); |
218 | if (ret) { |
219 | printk(KERN_ERR "RDS/RDMA: failed to setup listener, " |
220 | "rdma_bind_addr() returned %d\n" , ret); |
221 | goto out; |
222 | } |
223 | |
224 | ret = rdma_listen(id: cm_id, backlog: 128); |
225 | if (ret) { |
226 | printk(KERN_ERR "RDS/RDMA: failed to setup listener, " |
227 | "rdma_listen() returned %d\n" , ret); |
228 | goto out; |
229 | } |
230 | |
231 | rdsdebug("cm %p listening on port %u\n" , cm_id, RDS_PORT); |
232 | |
233 | *ret_cm_id = cm_id; |
234 | cm_id = NULL; |
235 | out: |
236 | if (cm_id) |
237 | rdma_destroy_id(id: cm_id); |
238 | return ret; |
239 | } |
240 | |
241 | /* Initialize the RDS RDMA listeners. We create two listeners for |
242 | * compatibility reason. The one on RDS_PORT is used for IPv4 |
243 | * requests only. The one on RDS_CM_PORT is used for IPv6 requests |
244 | * only. So only IPv6 enabled RDS module will communicate using this |
245 | * port. |
246 | */ |
247 | static int rds_rdma_listen_init(void) |
248 | { |
249 | int ret; |
250 | #if IS_ENABLED(CONFIG_IPV6) |
251 | struct sockaddr_in6 sin6; |
252 | #endif |
253 | struct sockaddr_in sin; |
254 | |
255 | sin.sin_family = PF_INET; |
256 | sin.sin_addr.s_addr = htonl(INADDR_ANY); |
257 | sin.sin_port = htons(RDS_PORT); |
258 | ret = rds_rdma_listen_init_common(handler: rds_rdma_cm_event_handler, |
259 | sa: (struct sockaddr *)&sin, |
260 | ret_cm_id: &rds_rdma_listen_id); |
261 | if (ret != 0) |
262 | return ret; |
263 | |
264 | #if IS_ENABLED(CONFIG_IPV6) |
265 | sin6.sin6_family = PF_INET6; |
266 | sin6.sin6_addr = in6addr_any; |
267 | sin6.sin6_port = htons(RDS_CM_PORT); |
268 | sin6.sin6_scope_id = 0; |
269 | sin6.sin6_flowinfo = 0; |
270 | ret = rds_rdma_listen_init_common(handler: rds6_rdma_cm_event_handler, |
271 | sa: (struct sockaddr *)&sin6, |
272 | ret_cm_id: &rds6_rdma_listen_id); |
273 | /* Keep going even when IPv6 is not enabled in the system. */ |
274 | if (ret != 0) |
275 | rdsdebug("Cannot set up IPv6 RDMA listener\n" ); |
276 | #endif |
277 | return 0; |
278 | } |
279 | |
280 | static void rds_rdma_listen_stop(void) |
281 | { |
282 | if (rds_rdma_listen_id) { |
283 | rdsdebug("cm %p\n" , rds_rdma_listen_id); |
284 | rdma_destroy_id(id: rds_rdma_listen_id); |
285 | rds_rdma_listen_id = NULL; |
286 | } |
287 | #if IS_ENABLED(CONFIG_IPV6) |
288 | if (rds6_rdma_listen_id) { |
289 | rdsdebug("cm %p\n" , rds6_rdma_listen_id); |
290 | rdma_destroy_id(id: rds6_rdma_listen_id); |
291 | rds6_rdma_listen_id = NULL; |
292 | } |
293 | #endif |
294 | } |
295 | |
296 | static int __init rds_rdma_init(void) |
297 | { |
298 | int ret; |
299 | |
300 | ret = rds_ib_init(); |
301 | if (ret) |
302 | goto out; |
303 | |
304 | ret = rds_rdma_listen_init(); |
305 | if (ret) |
306 | rds_ib_exit(); |
307 | out: |
308 | return ret; |
309 | } |
310 | module_init(rds_rdma_init); |
311 | |
312 | static void __exit rds_rdma_exit(void) |
313 | { |
314 | /* stop listening first to ensure no new connections are attempted */ |
315 | rds_rdma_listen_stop(); |
316 | rds_ib_exit(); |
317 | } |
318 | module_exit(rds_rdma_exit); |
319 | |
320 | MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>" ); |
321 | MODULE_DESCRIPTION("RDS: IB transport" ); |
322 | MODULE_LICENSE("Dual BSD/GPL" ); |
323 | |