Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ |
---|---|
2 | /* |
3 | * Copyright (c) 2008, 2018 Oracle and/or its affiliates. All rights reserved. |
4 | * |
5 | * This software is available to you under a choice of one of two |
6 | * licenses. You may choose to be licensed under the terms of the GNU |
7 | * General Public License (GPL) Version 2, available from the file |
8 | * COPYING in the main directory of this source tree, or the |
9 | * OpenIB.org BSD license below: |
10 | * |
11 | * Redistribution and use in source and binary forms, with or |
12 | * without modification, are permitted provided that the following |
13 | * conditions are met: |
14 | * |
15 | * - Redistributions of source code must retain the above |
16 | * copyright notice, this list of conditions and the following |
17 | * disclaimer. |
18 | * |
19 | * - Redistributions in binary form must reproduce the above |
20 | * copyright notice, this list of conditions and the following |
21 | * disclaimer in the documentation and/or other materials |
22 | * provided with the distribution. |
23 | * |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
31 | * SOFTWARE. |
32 | * |
33 | */ |
34 | |
35 | #ifndef _LINUX_RDS_H |
36 | #define _LINUX_RDS_H |
37 | |
38 | #include <linux/types.h> |
39 | #include <linux/socket.h> /* For __kernel_sockaddr_storage. */ |
40 | #include <linux/in6.h> /* For struct in6_addr. */ |
41 | |
42 | #define RDS_IB_ABI_VERSION 0x301 |
43 | |
44 | #define SOL_RDS 276 |
45 | |
46 | /* |
47 | * setsockopt/getsockopt for SOL_RDS |
48 | */ |
49 | #define RDS_CANCEL_SENT_TO 1 |
50 | #define RDS_GET_MR 2 |
51 | #define RDS_FREE_MR 3 |
52 | /* deprecated: RDS_BARRIER 4 */ |
53 | #define RDS_RECVERR 5 |
54 | #define RDS_CONG_MONITOR 6 |
55 | #define RDS_GET_MR_FOR_DEST 7 |
56 | #define SO_RDS_TRANSPORT 8 |
57 | |
58 | /* Socket option to tap receive path latency |
59 | * SO_RDS: SO_RDS_MSG_RXPATH_LATENCY |
60 | * Format used struct rds_rx_trace_so |
61 | */ |
62 | #define SO_RDS_MSG_RXPATH_LATENCY 10 |
63 | |
64 | |
65 | /* supported values for SO_RDS_TRANSPORT */ |
66 | #define RDS_TRANS_IB 0 |
67 | #define RDS_TRANS_GAP 1 |
68 | #define RDS_TRANS_TCP 2 |
69 | #define RDS_TRANS_COUNT 3 |
70 | #define RDS_TRANS_NONE (~0) |
71 | /* don't use RDS_TRANS_IWARP - it is deprecated */ |
72 | #define RDS_TRANS_IWARP RDS_TRANS_GAP |
73 | |
74 | /* IOCTLS commands for SOL_RDS */ |
75 | #define SIOCRDSSETTOS (SIOCPROTOPRIVATE) |
76 | #define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1) |
77 | |
78 | typedef __u8 rds_tos_t; |
79 | |
80 | /* |
81 | * Control message types for SOL_RDS. |
82 | * |
83 | * CMSG_RDMA_ARGS (sendmsg) |
84 | * Request a RDMA transfer to/from the specified |
85 | * memory ranges. |
86 | * The cmsg_data is a struct rds_rdma_args. |
87 | * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) |
88 | * Kernel informs application about intended |
89 | * source/destination of a RDMA transfer |
90 | * RDS_CMSG_RDMA_MAP (sendmsg) |
91 | * Application asks kernel to map the given |
92 | * memory range into a IB MR, and send the |
93 | * R_Key along in an RDS extension header. |
94 | * The cmsg_data is a struct rds_get_mr_args, |
95 | * the same as for the GET_MR setsockopt. |
96 | * RDS_CMSG_RDMA_STATUS (recvmsg) |
97 | * Returns the status of a completed RDMA operation. |
98 | * RDS_CMSG_RXPATH_LATENCY(recvmsg) |
99 | * Returns rds message latencies in various stages of receive |
100 | * path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY |
101 | * socket option. Legitimate points are defined in |
102 | * enum rds_message_rxpath_latency. More points can be added in |
103 | * future. CSMG format is struct rds_cmsg_rx_trace. |
104 | */ |
105 | #define RDS_CMSG_RDMA_ARGS 1 |
106 | #define RDS_CMSG_RDMA_DEST 2 |
107 | #define RDS_CMSG_RDMA_MAP 3 |
108 | #define RDS_CMSG_RDMA_STATUS 4 |
109 | #define RDS_CMSG_CONG_UPDATE 5 |
110 | #define RDS_CMSG_ATOMIC_FADD 6 |
111 | #define RDS_CMSG_ATOMIC_CSWP 7 |
112 | #define RDS_CMSG_MASKED_ATOMIC_FADD 8 |
113 | #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 |
114 | #define RDS_CMSG_RXPATH_LATENCY 11 |
115 | #define RDS_CMSG_ZCOPY_COOKIE 12 |
116 | #define RDS_CMSG_ZCOPY_COMPLETION 13 |
117 | |
118 | #define RDS_INFO_FIRST 10000 |
119 | #define RDS_INFO_COUNTERS 10000 |
120 | #define RDS_INFO_CONNECTIONS 10001 |
121 | /* 10002 aka RDS_INFO_FLOWS is deprecated */ |
122 | #define RDS_INFO_SEND_MESSAGES 10003 |
123 | #define RDS_INFO_RETRANS_MESSAGES 10004 |
124 | #define RDS_INFO_RECV_MESSAGES 10005 |
125 | #define RDS_INFO_SOCKETS 10006 |
126 | #define RDS_INFO_TCP_SOCKETS 10007 |
127 | #define RDS_INFO_IB_CONNECTIONS 10008 |
128 | #define RDS_INFO_CONNECTION_STATS 10009 |
129 | #define RDS_INFO_IWARP_CONNECTIONS 10010 |
130 | |
131 | /* PF_RDS6 options */ |
132 | #define RDS6_INFO_CONNECTIONS 10011 |
133 | #define RDS6_INFO_SEND_MESSAGES 10012 |
134 | #define RDS6_INFO_RETRANS_MESSAGES 10013 |
135 | #define RDS6_INFO_RECV_MESSAGES 10014 |
136 | #define RDS6_INFO_SOCKETS 10015 |
137 | #define RDS6_INFO_TCP_SOCKETS 10016 |
138 | #define RDS6_INFO_IB_CONNECTIONS 10017 |
139 | |
140 | #define RDS_INFO_LAST 10017 |
141 | |
142 | struct rds_info_counter { |
143 | __u8 name[32]; |
144 | __u64 value; |
145 | } __attribute__((packed)); |
146 | |
147 | #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 |
148 | #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 |
149 | #define RDS_INFO_CONNECTION_FLAG_CONNECTED 0x04 |
150 | |
151 | #define TRANSNAMSIZ 16 |
152 | |
153 | struct rds_info_connection { |
154 | __u64 next_tx_seq; |
155 | __u64 next_rx_seq; |
156 | __be32 laddr; |
157 | __be32 faddr; |
158 | __u8 transport[TRANSNAMSIZ]; /* null term ascii */ |
159 | __u8 flags; |
160 | __u8 tos; |
161 | } __attribute__((packed)); |
162 | |
163 | struct rds6_info_connection { |
164 | __u64 next_tx_seq; |
165 | __u64 next_rx_seq; |
166 | struct in6_addr laddr; |
167 | struct in6_addr faddr; |
168 | __u8 transport[TRANSNAMSIZ]; /* null term ascii */ |
169 | __u8 flags; |
170 | } __attribute__((packed)); |
171 | |
172 | #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 |
173 | #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 |
174 | |
175 | struct rds_info_message { |
176 | __u64 seq; |
177 | __u32 len; |
178 | __be32 laddr; |
179 | __be32 faddr; |
180 | __be16 lport; |
181 | __be16 fport; |
182 | __u8 flags; |
183 | __u8 tos; |
184 | } __attribute__((packed)); |
185 | |
186 | struct rds6_info_message { |
187 | __u64 seq; |
188 | __u32 len; |
189 | struct in6_addr laddr; |
190 | struct in6_addr faddr; |
191 | __be16 lport; |
192 | __be16 fport; |
193 | __u8 flags; |
194 | __u8 tos; |
195 | } __attribute__((packed)); |
196 | |
197 | struct rds_info_socket { |
198 | __u32 sndbuf; |
199 | __be32 bound_addr; |
200 | __be32 connected_addr; |
201 | __be16 bound_port; |
202 | __be16 connected_port; |
203 | __u32 rcvbuf; |
204 | __u64 inum; |
205 | } __attribute__((packed)); |
206 | |
207 | struct rds6_info_socket { |
208 | __u32 sndbuf; |
209 | struct in6_addr bound_addr; |
210 | struct in6_addr connected_addr; |
211 | __be16 bound_port; |
212 | __be16 connected_port; |
213 | __u32 rcvbuf; |
214 | __u64 inum; |
215 | } __attribute__((packed)); |
216 | |
217 | struct rds_info_tcp_socket { |
218 | __be32 local_addr; |
219 | __be16 local_port; |
220 | __be32 peer_addr; |
221 | __be16 peer_port; |
222 | __u64 hdr_rem; |
223 | __u64 data_rem; |
224 | __u32 last_sent_nxt; |
225 | __u32 last_expected_una; |
226 | __u32 last_seen_una; |
227 | __u8 tos; |
228 | } __attribute__((packed)); |
229 | |
230 | struct rds6_info_tcp_socket { |
231 | struct in6_addr local_addr; |
232 | __be16 local_port; |
233 | struct in6_addr peer_addr; |
234 | __be16 peer_port; |
235 | __u64 hdr_rem; |
236 | __u64 data_rem; |
237 | __u32 last_sent_nxt; |
238 | __u32 last_expected_una; |
239 | __u32 last_seen_una; |
240 | } __attribute__((packed)); |
241 | |
242 | #define RDS_IB_GID_LEN 16 |
243 | struct rds_info_rdma_connection { |
244 | __be32 src_addr; |
245 | __be32 dst_addr; |
246 | __u8 src_gid[RDS_IB_GID_LEN]; |
247 | __u8 dst_gid[RDS_IB_GID_LEN]; |
248 | |
249 | __u32 max_send_wr; |
250 | __u32 max_recv_wr; |
251 | __u32 max_send_sge; |
252 | __u32 rdma_mr_max; |
253 | __u32 rdma_mr_size; |
254 | __u8 tos; |
255 | __u8 sl; |
256 | __u32 cache_allocs; |
257 | }; |
258 | |
259 | struct rds6_info_rdma_connection { |
260 | struct in6_addr src_addr; |
261 | struct in6_addr dst_addr; |
262 | __u8 src_gid[RDS_IB_GID_LEN]; |
263 | __u8 dst_gid[RDS_IB_GID_LEN]; |
264 | |
265 | __u32 max_send_wr; |
266 | __u32 max_recv_wr; |
267 | __u32 max_send_sge; |
268 | __u32 rdma_mr_max; |
269 | __u32 rdma_mr_size; |
270 | __u8 tos; |
271 | __u8 sl; |
272 | __u32 cache_allocs; |
273 | }; |
274 | |
275 | /* RDS message Receive Path Latency points */ |
276 | enum rds_message_rxpath_latency { |
277 | RDS_MSG_RX_HDR_TO_DGRAM_START = 0, |
278 | RDS_MSG_RX_DGRAM_REASSEMBLE, |
279 | RDS_MSG_RX_DGRAM_DELIVERED, |
280 | RDS_MSG_RX_DGRAM_TRACE_MAX |
281 | }; |
282 | |
283 | struct rds_rx_trace_so { |
284 | __u8 rx_traces; |
285 | __u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; |
286 | }; |
287 | |
288 | struct rds_cmsg_rx_trace { |
289 | __u8 rx_traces; |
290 | __u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; |
291 | __u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; |
292 | }; |
293 | |
294 | /* |
295 | * Congestion monitoring. |
296 | * Congestion control in RDS happens at the host connection |
297 | * level by exchanging a bitmap marking congested ports. |
298 | * By default, a process sleeping in poll() is always woken |
299 | * up when the congestion map is updated. |
300 | * With explicit monitoring, an application can have more |
301 | * fine-grained control. |
302 | * The application installs a 64bit mask value in the socket, |
303 | * where each bit corresponds to a group of ports. |
304 | * When a congestion update arrives, RDS checks the set of |
305 | * ports that are now uncongested against the list bit mask |
306 | * installed in the socket, and if they overlap, we queue a |
307 | * cong_notification on the socket. |
308 | * |
309 | * To install the congestion monitor bitmask, use RDS_CONG_MONITOR |
310 | * with the 64bit mask. |
311 | * Congestion updates are received via RDS_CMSG_CONG_UPDATE |
312 | * control messages. |
313 | * |
314 | * The correspondence between bits and ports is |
315 | * 1 << (portnum % 64) |
316 | */ |
317 | #define RDS_CONG_MONITOR_SIZE 64 |
318 | #define RDS_CONG_MONITOR_BIT(port) (((unsigned int) port) % RDS_CONG_MONITOR_SIZE) |
319 | #define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port)) |
320 | |
321 | /* |
322 | * RDMA related types |
323 | */ |
324 | |
325 | /* |
326 | * This encapsulates a remote memory location. |
327 | * In the current implementation, it contains the R_Key |
328 | * of the remote memory region, and the offset into it |
329 | * (so that the application does not have to worry about |
330 | * alignment). |
331 | */ |
332 | typedef __u64 rds_rdma_cookie_t; |
333 | |
334 | struct rds_iovec { |
335 | __u64 addr; |
336 | __u64 bytes; |
337 | }; |
338 | |
339 | struct rds_get_mr_args { |
340 | struct rds_iovec vec; |
341 | __u64 cookie_addr; |
342 | __u64 flags; |
343 | }; |
344 | |
345 | struct rds_get_mr_for_dest_args { |
346 | struct __kernel_sockaddr_storage dest_addr; |
347 | struct rds_iovec vec; |
348 | __u64 cookie_addr; |
349 | __u64 flags; |
350 | }; |
351 | |
352 | struct rds_free_mr_args { |
353 | rds_rdma_cookie_t cookie; |
354 | __u64 flags; |
355 | }; |
356 | |
357 | struct rds_rdma_args { |
358 | rds_rdma_cookie_t cookie; |
359 | struct rds_iovec remote_vec; |
360 | __u64 local_vec_addr; |
361 | __u64 nr_local; |
362 | __u64 flags; |
363 | __u64 user_token; |
364 | }; |
365 | |
366 | struct rds_atomic_args { |
367 | rds_rdma_cookie_t cookie; |
368 | __u64 local_addr; |
369 | __u64 remote_addr; |
370 | union { |
371 | struct { |
372 | __u64 compare; |
373 | __u64 swap; |
374 | } cswp; |
375 | struct { |
376 | __u64 add; |
377 | } fadd; |
378 | struct { |
379 | __u64 compare; |
380 | __u64 swap; |
381 | __u64 compare_mask; |
382 | __u64 swap_mask; |
383 | } m_cswp; |
384 | struct { |
385 | __u64 add; |
386 | __u64 nocarry_mask; |
387 | } m_fadd; |
388 | }; |
389 | __u64 flags; |
390 | __u64 user_token; |
391 | }; |
392 | |
393 | struct rds_rdma_notify { |
394 | __u64 user_token; |
395 | __s32 status; |
396 | }; |
397 | |
398 | #define RDS_RDMA_SUCCESS 0 |
399 | #define RDS_RDMA_REMOTE_ERROR 1 |
400 | #define RDS_RDMA_CANCELED 2 |
401 | #define RDS_RDMA_DROPPED 3 |
402 | #define RDS_RDMA_OTHER_ERROR 4 |
403 | |
404 | #define RDS_MAX_ZCOOKIES 8 |
405 | struct rds_zcopy_cookies { |
406 | __u32 num; |
407 | __u32 cookies[RDS_MAX_ZCOOKIES]; |
408 | }; |
409 | |
410 | /* |
411 | * Common set of flags for all RDMA related structs |
412 | */ |
413 | #define RDS_RDMA_READWRITE 0x0001 |
414 | #define RDS_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ |
415 | #define RDS_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ |
416 | #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ |
417 | #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ |
418 | #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ |
419 | #define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ |
420 | |
421 | #endif /* IB_RDS_H */ |
422 |
Warning: This file is not a C or C++ file. It does not have highlighting.