1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2015 - 2020 Intel Corporation. |
4 | */ |
5 | |
6 | #include <rdma/ib_mad.h> |
7 | #include <rdma/ib_user_verbs.h> |
8 | #include <linux/io.h> |
9 | #include <linux/module.h> |
10 | #include <linux/utsname.h> |
11 | #include <linux/rculist.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/vmalloc.h> |
14 | #include <rdma/opa_addr.h> |
15 | #include <linux/nospec.h> |
16 | |
17 | #include "hfi.h" |
18 | #include "common.h" |
19 | #include "device.h" |
20 | #include "trace.h" |
21 | #include "qp.h" |
22 | #include "verbs_txreq.h" |
23 | #include "debugfs.h" |
24 | #include "vnic.h" |
25 | #include "fault.h" |
26 | #include "affinity.h" |
27 | #include "ipoib.h" |
28 | |
29 | static unsigned int hfi1_lkey_table_size = 16; |
30 | module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, |
31 | S_IRUGO); |
32 | MODULE_PARM_DESC(lkey_table_size, |
33 | "LKEY table size in bits (2^n, 1 <= n <= 23)" ); |
34 | |
35 | static unsigned int hfi1_max_pds = 0xFFFF; |
36 | module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO); |
37 | MODULE_PARM_DESC(max_pds, |
38 | "Maximum number of protection domains to support" ); |
39 | |
40 | static unsigned int hfi1_max_ahs = 0xFFFF; |
41 | module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); |
42 | MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support" ); |
43 | |
44 | unsigned int hfi1_max_cqes = 0x2FFFFF; |
45 | module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); |
46 | MODULE_PARM_DESC(max_cqes, |
47 | "Maximum number of completion queue entries to support" ); |
48 | |
49 | unsigned int hfi1_max_cqs = 0x1FFFF; |
50 | module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO); |
51 | MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support" ); |
52 | |
53 | unsigned int hfi1_max_qp_wrs = 0x3FFF; |
54 | module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); |
55 | MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support" ); |
56 | |
57 | unsigned int hfi1_max_qps = 32768; |
58 | module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); |
59 | MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support" ); |
60 | |
61 | unsigned int hfi1_max_sges = 0x60; |
62 | module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO); |
63 | MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support" ); |
64 | |
65 | unsigned int hfi1_max_mcast_grps = 16384; |
66 | module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO); |
67 | MODULE_PARM_DESC(max_mcast_grps, |
68 | "Maximum number of multicast groups to support" ); |
69 | |
70 | unsigned int hfi1_max_mcast_qp_attached = 16; |
71 | module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached, |
72 | uint, S_IRUGO); |
73 | MODULE_PARM_DESC(max_mcast_qp_attached, |
74 | "Maximum number of attached QPs to support" ); |
75 | |
76 | unsigned int hfi1_max_srqs = 1024; |
77 | module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO); |
78 | MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support" ); |
79 | |
80 | unsigned int hfi1_max_srq_sges = 128; |
81 | module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO); |
82 | MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support" ); |
83 | |
84 | unsigned int hfi1_max_srq_wrs = 0x1FFFF; |
85 | module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); |
86 | MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support" ); |
87 | |
88 | unsigned short piothreshold = 256; |
89 | module_param(piothreshold, ushort, S_IRUGO); |
90 | MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio" ); |
91 | |
92 | static unsigned int sge_copy_mode; |
93 | module_param(sge_copy_mode, uint, S_IRUGO); |
94 | MODULE_PARM_DESC(sge_copy_mode, |
95 | "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS" ); |
96 | |
97 | static void verbs_sdma_complete( |
98 | struct sdma_txreq *cookie, |
99 | int status); |
100 | |
101 | static int pio_wait(struct rvt_qp *qp, |
102 | struct send_context *sc, |
103 | struct hfi1_pkt_state *ps, |
104 | u32 flag); |
105 | |
106 | /* Length of buffer to create verbs txreq cache name */ |
107 | #define TXREQ_NAME_LEN 24 |
108 | |
109 | static uint wss_threshold = 80; |
110 | module_param(wss_threshold, uint, S_IRUGO); |
111 | MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy" ); |
112 | static uint wss_clean_period = 256; |
113 | module_param(wss_clean_period, uint, S_IRUGO); |
114 | MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned" ); |
115 | |
116 | /* |
117 | * Translate ib_wr_opcode into ib_wc_opcode. |
118 | */ |
119 | const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { |
120 | [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, |
121 | [IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE, |
122 | [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, |
123 | [IB_WR_SEND] = IB_WC_SEND, |
124 | [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, |
125 | [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, |
126 | [IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ, |
127 | [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, |
128 | [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, |
129 | [IB_WR_SEND_WITH_INV] = IB_WC_SEND, |
130 | [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, |
131 | [IB_WR_REG_MR] = IB_WC_REG_MR |
132 | }; |
133 | |
134 | /* |
135 | * Length of header by opcode, 0 --> not supported |
136 | */ |
137 | const u8 hdr_len_by_opcode[256] = { |
138 | /* RC */ |
139 | [IB_OPCODE_RC_SEND_FIRST] = 12 + 8, |
140 | [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8, |
141 | [IB_OPCODE_RC_SEND_LAST] = 12 + 8, |
142 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, |
143 | [IB_OPCODE_RC_SEND_ONLY] = 12 + 8, |
144 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, |
145 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16, |
146 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8, |
147 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8, |
148 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, |
149 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16, |
150 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, |
151 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16, |
152 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4, |
153 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8, |
154 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, |
155 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, |
156 | [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, |
157 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, |
158 | [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, |
159 | [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, |
160 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, |
161 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, |
162 | [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36, |
163 | [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36, |
164 | [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36, |
165 | [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36, |
166 | [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36, |
167 | [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36, |
168 | [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36, |
169 | [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36, |
170 | /* UC */ |
171 | [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, |
172 | [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, |
173 | [IB_OPCODE_UC_SEND_LAST] = 12 + 8, |
174 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, |
175 | [IB_OPCODE_UC_SEND_ONLY] = 12 + 8, |
176 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, |
177 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16, |
178 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8, |
179 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8, |
180 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, |
181 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16, |
182 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, |
183 | /* UD */ |
184 | [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8, |
185 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12 |
186 | }; |
187 | |
188 | static const opcode_handler opcode_handler_tbl[256] = { |
189 | /* RC */ |
190 | [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv, |
191 | [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv, |
192 | [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv, |
193 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, |
194 | [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv, |
195 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, |
196 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv, |
197 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv, |
198 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv, |
199 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, |
200 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv, |
201 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, |
202 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv, |
203 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv, |
204 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv, |
205 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv, |
206 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv, |
207 | [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv, |
208 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, |
209 | [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, |
210 | [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, |
211 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, |
212 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, |
213 | |
214 | /* TID RDMA has separate handlers for different opcodes.*/ |
215 | [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req, |
216 | [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp, |
217 | [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data, |
218 | [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data, |
219 | [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req, |
220 | [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp, |
221 | [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync, |
222 | [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack, |
223 | |
224 | /* UC */ |
225 | [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, |
226 | [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, |
227 | [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv, |
228 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, |
229 | [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv, |
230 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, |
231 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv, |
232 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv, |
233 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv, |
234 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, |
235 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv, |
236 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, |
237 | /* UD */ |
238 | [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv, |
239 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv, |
240 | /* CNP */ |
241 | [IB_OPCODE_CNP] = &hfi1_cnp_rcv |
242 | }; |
243 | |
244 | #define OPMASK 0x1f |
245 | |
246 | static const u32 pio_opmask[BIT(3)] = { |
247 | /* RC */ |
248 | [IB_OPCODE_RC >> 5] = |
249 | BIT(RC_OP(SEND_ONLY) & OPMASK) | |
250 | BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | |
251 | BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | |
252 | BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | |
253 | BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | |
254 | BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | |
255 | BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | |
256 | BIT(RC_OP(COMPARE_SWAP) & OPMASK) | |
257 | BIT(RC_OP(FETCH_ADD) & OPMASK), |
258 | /* UC */ |
259 | [IB_OPCODE_UC >> 5] = |
260 | BIT(UC_OP(SEND_ONLY) & OPMASK) | |
261 | BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | |
262 | BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | |
263 | BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), |
264 | }; |
265 | |
266 | /* |
267 | * System image GUID. |
268 | */ |
269 | __be64 ib_hfi1_sys_image_guid; |
270 | |
271 | /* |
272 | * Make sure the QP is ready and able to accept the given opcode. |
273 | */ |
274 | static inline opcode_handler qp_ok(struct hfi1_packet *packet) |
275 | { |
276 | if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) |
277 | return NULL; |
278 | if (((packet->opcode & RVT_OPCODE_QP_MASK) == |
279 | packet->qp->allowed_ops) || |
280 | (packet->opcode == IB_OPCODE_CNP)) |
281 | return opcode_handler_tbl[packet->opcode]; |
282 | |
283 | return NULL; |
284 | } |
285 | |
286 | static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) |
287 | { |
288 | #ifdef CONFIG_FAULT_INJECTION |
289 | if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) { |
290 | /* |
291 | * In order to drop non-IB traffic we |
292 | * set PbcInsertHrc to NONE (0x2). |
293 | * The packet will still be delivered |
294 | * to the receiving node but a |
295 | * KHdrHCRCErr (KDETH packet with a bad |
296 | * HCRC) will be triggered and the |
297 | * packet will not be delivered to the |
298 | * correct context. |
299 | */ |
300 | pbc &= ~PBC_INSERT_HCRC_SMASK; |
301 | pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; |
302 | } else { |
303 | /* |
304 | * In order to drop regular verbs |
305 | * traffic we set the PbcTestEbp |
306 | * flag. The packet will still be |
307 | * delivered to the receiving node but |
308 | * a 'late ebp error' will be |
309 | * triggered and will be dropped. |
310 | */ |
311 | pbc |= PBC_TEST_EBP; |
312 | } |
313 | #endif |
314 | return pbc; |
315 | } |
316 | |
317 | static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet) |
318 | { |
319 | if (packet->qp->ibqp.qp_type != IB_QPT_RC || |
320 | !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) |
321 | return NULL; |
322 | if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA) |
323 | return opcode_handler_tbl[opcode]; |
324 | return NULL; |
325 | } |
326 | |
327 | void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet) |
328 | { |
329 | struct hfi1_ctxtdata *rcd = packet->rcd; |
330 | struct ib_header *hdr = packet->hdr; |
331 | u32 tlen = packet->tlen; |
332 | struct hfi1_pportdata *ppd = rcd->ppd; |
333 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
334 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
335 | opcode_handler opcode_handler; |
336 | unsigned long flags; |
337 | u32 qp_num; |
338 | int lnh; |
339 | u8 opcode; |
340 | |
341 | /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ |
342 | if (unlikely(tlen < 15 * sizeof(u32))) |
343 | goto drop; |
344 | |
345 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; |
346 | if (lnh != HFI1_LRH_BTH) |
347 | goto drop; |
348 | |
349 | packet->ohdr = &hdr->u.oth; |
350 | trace_input_ibhdr(dd: rcd->dd, packet, sc5: !!(rhf_dc_info(rhf: packet->rhf))); |
351 | |
352 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); |
353 | inc_opstats(tlen, stats: &rcd->opstats->stats[opcode]); |
354 | |
355 | /* verbs_qp can be picked up from any tid_rdma header struct */ |
356 | qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) & |
357 | RVT_QPN_MASK; |
358 | |
359 | rcu_read_lock(); |
360 | packet->qp = rvt_lookup_qpn(rdi, rvp: &ibp->rvp, qpn: qp_num); |
361 | if (!packet->qp) |
362 | goto drop_rcu; |
363 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
364 | opcode_handler = tid_qp_ok(opcode, packet); |
365 | if (likely(opcode_handler)) |
366 | opcode_handler(packet); |
367 | else |
368 | goto drop_unlock; |
369 | spin_unlock_irqrestore(lock: &packet->qp->r_lock, flags); |
370 | rcu_read_unlock(); |
371 | |
372 | return; |
373 | drop_unlock: |
374 | spin_unlock_irqrestore(lock: &packet->qp->r_lock, flags); |
375 | drop_rcu: |
376 | rcu_read_unlock(); |
377 | drop: |
378 | ibp->rvp.n_pkt_drops++; |
379 | } |
380 | |
381 | void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet) |
382 | { |
383 | struct hfi1_ctxtdata *rcd = packet->rcd; |
384 | struct ib_header *hdr = packet->hdr; |
385 | u32 tlen = packet->tlen; |
386 | struct hfi1_pportdata *ppd = rcd->ppd; |
387 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
388 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
389 | opcode_handler opcode_handler; |
390 | unsigned long flags; |
391 | u32 qp_num; |
392 | int lnh; |
393 | u8 opcode; |
394 | |
395 | /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ |
396 | if (unlikely(tlen < 15 * sizeof(u32))) |
397 | goto drop; |
398 | |
399 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; |
400 | if (lnh != HFI1_LRH_BTH) |
401 | goto drop; |
402 | |
403 | packet->ohdr = &hdr->u.oth; |
404 | trace_input_ibhdr(dd: rcd->dd, packet, sc5: !!(rhf_dc_info(rhf: packet->rhf))); |
405 | |
406 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); |
407 | inc_opstats(tlen, stats: &rcd->opstats->stats[opcode]); |
408 | |
409 | /* verbs_qp can be picked up from any tid_rdma header struct */ |
410 | qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) & |
411 | RVT_QPN_MASK; |
412 | |
413 | rcu_read_lock(); |
414 | packet->qp = rvt_lookup_qpn(rdi, rvp: &ibp->rvp, qpn: qp_num); |
415 | if (!packet->qp) |
416 | goto drop_rcu; |
417 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
418 | opcode_handler = tid_qp_ok(opcode, packet); |
419 | if (likely(opcode_handler)) |
420 | opcode_handler(packet); |
421 | else |
422 | goto drop_unlock; |
423 | spin_unlock_irqrestore(lock: &packet->qp->r_lock, flags); |
424 | rcu_read_unlock(); |
425 | |
426 | return; |
427 | drop_unlock: |
428 | spin_unlock_irqrestore(lock: &packet->qp->r_lock, flags); |
429 | drop_rcu: |
430 | rcu_read_unlock(); |
431 | drop: |
432 | ibp->rvp.n_pkt_drops++; |
433 | } |
434 | |
435 | static int hfi1_do_pkey_check(struct hfi1_packet *packet) |
436 | { |
437 | struct hfi1_ctxtdata *rcd = packet->rcd; |
438 | struct hfi1_pportdata *ppd = rcd->ppd; |
439 | struct hfi1_16b_header *hdr = packet->hdr; |
440 | u16 pkey; |
441 | |
442 | /* Pkey check needed only for bypass packets */ |
443 | if (packet->etype != RHF_RCV_TYPE_BYPASS) |
444 | return 0; |
445 | |
446 | /* Perform pkey check */ |
447 | pkey = hfi1_16B_get_pkey(hdr); |
448 | return ingress_pkey_check(ppd, pkey, sc5: packet->sc, |
449 | idx: packet->qp->s_pkey_index, |
450 | slid: packet->slid, force: true); |
451 | } |
452 | |
453 | static inline void hfi1_handle_packet(struct hfi1_packet *packet, |
454 | bool is_mcast) |
455 | { |
456 | u32 qp_num; |
457 | struct hfi1_ctxtdata *rcd = packet->rcd; |
458 | struct hfi1_pportdata *ppd = rcd->ppd; |
459 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
460 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
461 | opcode_handler packet_handler; |
462 | unsigned long flags; |
463 | |
464 | inc_opstats(tlen: packet->tlen, stats: &rcd->opstats->stats[packet->opcode]); |
465 | |
466 | if (unlikely(is_mcast)) { |
467 | struct rvt_mcast *mcast; |
468 | struct rvt_mcast_qp *p; |
469 | |
470 | if (!packet->grh) |
471 | goto drop; |
472 | mcast = rvt_mcast_find(ibp: &ibp->rvp, |
473 | mgid: &packet->grh->dgid, |
474 | opa_get_lid(packet->dlid, 9B)); |
475 | if (!mcast) |
476 | goto drop; |
477 | rcu_read_lock(); |
478 | list_for_each_entry_rcu(p, &mcast->qp_list, list) { |
479 | packet->qp = p->qp; |
480 | if (hfi1_do_pkey_check(packet)) |
481 | goto unlock_drop; |
482 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
483 | packet_handler = qp_ok(packet); |
484 | if (likely(packet_handler)) |
485 | packet_handler(packet); |
486 | else |
487 | ibp->rvp.n_pkt_drops++; |
488 | spin_unlock_irqrestore(lock: &packet->qp->r_lock, flags); |
489 | } |
490 | rcu_read_unlock(); |
491 | /* |
492 | * Notify rvt_multicast_detach() if it is waiting for us |
493 | * to finish. |
494 | */ |
495 | if (atomic_dec_return(v: &mcast->refcount) <= 1) |
496 | wake_up(&mcast->wait); |
497 | } else { |
498 | /* Get the destination QP number. */ |
499 | if (packet->etype == RHF_RCV_TYPE_BYPASS && |
500 | hfi1_16B_get_l4(hdr: packet->hdr) == OPA_16B_L4_FM) |
501 | qp_num = hfi1_16B_get_dest_qpn(mgmt: packet->mgmt); |
502 | else |
503 | qp_num = ib_bth_get_qpn(ohdr: packet->ohdr); |
504 | |
505 | rcu_read_lock(); |
506 | packet->qp = rvt_lookup_qpn(rdi, rvp: &ibp->rvp, qpn: qp_num); |
507 | if (!packet->qp) |
508 | goto unlock_drop; |
509 | |
510 | if (hfi1_do_pkey_check(packet)) |
511 | goto unlock_drop; |
512 | |
513 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
514 | packet_handler = qp_ok(packet); |
515 | if (likely(packet_handler)) |
516 | packet_handler(packet); |
517 | else |
518 | ibp->rvp.n_pkt_drops++; |
519 | spin_unlock_irqrestore(lock: &packet->qp->r_lock, flags); |
520 | rcu_read_unlock(); |
521 | } |
522 | return; |
523 | unlock_drop: |
524 | rcu_read_unlock(); |
525 | drop: |
526 | ibp->rvp.n_pkt_drops++; |
527 | } |
528 | |
529 | /** |
530 | * hfi1_ib_rcv - process an incoming packet |
531 | * @packet: data packet information |
532 | * |
533 | * This is called to process an incoming packet at interrupt level. |
534 | */ |
535 | void hfi1_ib_rcv(struct hfi1_packet *packet) |
536 | { |
537 | struct hfi1_ctxtdata *rcd = packet->rcd; |
538 | |
539 | trace_input_ibhdr(dd: rcd->dd, packet, sc5: !!(rhf_dc_info(rhf: packet->rhf))); |
540 | hfi1_handle_packet(packet, is_mcast: hfi1_check_mcast(lid: packet->dlid)); |
541 | } |
542 | |
543 | void hfi1_16B_rcv(struct hfi1_packet *packet) |
544 | { |
545 | struct hfi1_ctxtdata *rcd = packet->rcd; |
546 | |
547 | trace_input_ibhdr(dd: rcd->dd, packet, sc5: false); |
548 | hfi1_handle_packet(packet, is_mcast: hfi1_check_mcast(lid: packet->dlid)); |
549 | } |
550 | |
551 | /* |
552 | * This is called from a timer to check for QPs |
553 | * which need kernel memory in order to send a packet. |
554 | */ |
555 | static void mem_timer(struct timer_list *t) |
556 | { |
557 | struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer); |
558 | struct list_head *list = &dev->memwait; |
559 | struct rvt_qp *qp = NULL; |
560 | struct iowait *wait; |
561 | unsigned long flags; |
562 | struct hfi1_qp_priv *priv; |
563 | |
564 | write_seqlock_irqsave(&dev->iowait_lock, flags); |
565 | if (!list_empty(head: list)) { |
566 | wait = list_first_entry(list, struct iowait, list); |
567 | qp = iowait_to_qp(s_iowait: wait); |
568 | priv = qp->priv; |
569 | list_del_init(entry: &priv->s_iowait.list); |
570 | priv->s_iowait.lock = NULL; |
571 | /* refcount held until actual wake up */ |
572 | if (!list_empty(head: list)) |
573 | mod_timer(timer: &dev->mem_timer, expires: jiffies + 1); |
574 | } |
575 | write_sequnlock_irqrestore(sl: &dev->iowait_lock, flags); |
576 | |
577 | if (qp) |
578 | hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM); |
579 | } |
580 | |
581 | /* |
582 | * This is called with progress side lock held. |
583 | */ |
584 | /* New API */ |
585 | static void verbs_sdma_complete( |
586 | struct sdma_txreq *cookie, |
587 | int status) |
588 | { |
589 | struct verbs_txreq *tx = |
590 | container_of(cookie, struct verbs_txreq, txreq); |
591 | struct rvt_qp *qp = tx->qp; |
592 | |
593 | spin_lock(lock: &qp->s_lock); |
594 | if (tx->wqe) { |
595 | rvt_send_complete(qp, wqe: tx->wqe, status: IB_WC_SUCCESS); |
596 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { |
597 | struct hfi1_opa_header *hdr; |
598 | |
599 | hdr = &tx->phdr.hdr; |
600 | if (unlikely(status == SDMA_TXREQ_S_ABORTED)) |
601 | hfi1_rc_verbs_aborted(qp, opah: hdr); |
602 | hfi1_rc_send_complete(qp, opah: hdr); |
603 | } |
604 | spin_unlock(lock: &qp->s_lock); |
605 | |
606 | hfi1_put_txreq(tx); |
607 | } |
608 | |
609 | void hfi1_wait_kmem(struct rvt_qp *qp) |
610 | { |
611 | struct hfi1_qp_priv *priv = qp->priv; |
612 | struct ib_qp *ibqp = &qp->ibqp; |
613 | struct ib_device *ibdev = ibqp->device; |
614 | struct hfi1_ibdev *dev = to_idev(ibdev); |
615 | |
616 | if (list_empty(head: &priv->s_iowait.list)) { |
617 | if (list_empty(head: &dev->memwait)) |
618 | mod_timer(timer: &dev->mem_timer, expires: jiffies + 1); |
619 | qp->s_flags |= RVT_S_WAIT_KMEM; |
620 | list_add_tail(new: &priv->s_iowait.list, head: &dev->memwait); |
621 | priv->s_iowait.lock = &dev->iowait_lock; |
622 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); |
623 | rvt_get_qp(qp); |
624 | } |
625 | } |
626 | |
627 | static int wait_kmem(struct hfi1_ibdev *dev, |
628 | struct rvt_qp *qp, |
629 | struct hfi1_pkt_state *ps) |
630 | { |
631 | unsigned long flags; |
632 | int ret = 0; |
633 | |
634 | spin_lock_irqsave(&qp->s_lock, flags); |
635 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
636 | write_seqlock(sl: &dev->iowait_lock); |
637 | list_add_tail(new: &ps->s_txreq->txreq.list, |
638 | head: &ps->wait->tx_head); |
639 | hfi1_wait_kmem(qp); |
640 | write_sequnlock(sl: &dev->iowait_lock); |
641 | hfi1_qp_unbusy(qp, wait: ps->wait); |
642 | ret = -EBUSY; |
643 | } |
644 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
645 | |
646 | return ret; |
647 | } |
648 | |
649 | /* |
650 | * This routine calls txadds for each sg entry. |
651 | * |
652 | * Add failures will revert the sge cursor |
653 | */ |
654 | static noinline int build_verbs_ulp_payload( |
655 | struct sdma_engine *sde, |
656 | u32 length, |
657 | struct verbs_txreq *tx) |
658 | { |
659 | struct rvt_sge_state *ss = tx->ss; |
660 | struct rvt_sge *sg_list = ss->sg_list; |
661 | struct rvt_sge sge = ss->sge; |
662 | u8 num_sge = ss->num_sge; |
663 | u32 len; |
664 | int ret = 0; |
665 | |
666 | while (length) { |
667 | len = rvt_get_sge_length(sge: &ss->sge, length); |
668 | WARN_ON_ONCE(len == 0); |
669 | ret = sdma_txadd_kvaddr( |
670 | dd: sde->dd, |
671 | tx: &tx->txreq, |
672 | kvaddr: ss->sge.vaddr, |
673 | len); |
674 | if (ret) |
675 | goto bail_txadd; |
676 | rvt_update_sge(ss, length: len, release: false); |
677 | length -= len; |
678 | } |
679 | return ret; |
680 | bail_txadd: |
681 | /* unwind cursor */ |
682 | ss->sge = sge; |
683 | ss->num_sge = num_sge; |
684 | ss->sg_list = sg_list; |
685 | return ret; |
686 | } |
687 | |
688 | /** |
689 | * update_tx_opstats - record stats by opcode |
690 | * @qp: the qp |
691 | * @ps: transmit packet state |
692 | * @plen: the plen in dwords |
693 | * |
694 | * This is a routine to record the tx opstats after a |
695 | * packet has been presented to the egress mechanism. |
696 | */ |
697 | static void update_tx_opstats(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
698 | u32 plen) |
699 | { |
700 | #ifdef CONFIG_DEBUG_FS |
701 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev: qp->ibqp.device); |
702 | struct hfi1_opcode_stats_perctx *s = get_cpu_ptr(dd->tx_opstats); |
703 | |
704 | inc_opstats(tlen: plen * 4, stats: &s->stats[ps->opcode]); |
705 | put_cpu_ptr(s); |
706 | #endif |
707 | } |
708 | |
709 | /* |
710 | * Build the number of DMA descriptors needed to send length bytes of data. |
711 | * |
712 | * NOTE: DMA mapping is held in the tx until completed in the ring or |
713 | * the tx desc is freed without having been submitted to the ring |
714 | * |
715 | * This routine ensures all the helper routine calls succeed. |
716 | */ |
717 | /* New API */ |
718 | static int build_verbs_tx_desc( |
719 | struct sdma_engine *sde, |
720 | u32 length, |
721 | struct verbs_txreq *tx, |
722 | struct hfi1_ahg_info *ahg_info, |
723 | u64 pbc) |
724 | { |
725 | int ret = 0; |
726 | struct hfi1_sdma_header *phdr = &tx->phdr; |
727 | u16 hdrbytes = (tx->hdr_dwords + sizeof(pbc) / 4) << 2; |
728 | u8 = 0; |
729 | |
730 | if (tx->phdr.hdr.hdr_type) { |
731 | /* |
732 | * hdrbytes accounts for PBC. Need to subtract 8 bytes |
733 | * before calculating padding. |
734 | */ |
735 | extra_bytes = hfi1_get_16b_padding(hdr_size: hdrbytes - 8, payload: length) + |
736 | (SIZE_OF_CRC << 2) + SIZE_OF_LT; |
737 | } |
738 | if (!ahg_info->ahgcount) { |
739 | ret = sdma_txinit_ahg( |
740 | tx: &tx->txreq, |
741 | flags: ahg_info->tx_flags, |
742 | tlen: hdrbytes + length + |
743 | extra_bytes, |
744 | ahg_entry: ahg_info->ahgidx, |
745 | num_ahg: 0, |
746 | NULL, |
747 | ahg_hlen: 0, |
748 | cb: verbs_sdma_complete); |
749 | if (ret) |
750 | goto bail_txadd; |
751 | phdr->pbc = cpu_to_le64(pbc); |
752 | ret = sdma_txadd_kvaddr( |
753 | dd: sde->dd, |
754 | tx: &tx->txreq, |
755 | kvaddr: phdr, |
756 | len: hdrbytes); |
757 | if (ret) |
758 | goto bail_txadd; |
759 | } else { |
760 | ret = sdma_txinit_ahg( |
761 | tx: &tx->txreq, |
762 | flags: ahg_info->tx_flags, |
763 | tlen: length, |
764 | ahg_entry: ahg_info->ahgidx, |
765 | num_ahg: ahg_info->ahgcount, |
766 | ahg: ahg_info->ahgdesc, |
767 | ahg_hlen: hdrbytes, |
768 | cb: verbs_sdma_complete); |
769 | if (ret) |
770 | goto bail_txadd; |
771 | } |
772 | /* add the ulp payload - if any. tx->ss can be NULL for acks */ |
773 | if (tx->ss) { |
774 | ret = build_verbs_ulp_payload(sde, length, tx); |
775 | if (ret) |
776 | goto bail_txadd; |
777 | } |
778 | |
779 | /* add icrc, lt byte, and padding to flit */ |
780 | if (extra_bytes) |
781 | ret = sdma_txadd_daddr(dd: sde->dd, tx: &tx->txreq, addr: sde->dd->sdma_pad_phys, |
782 | len: extra_bytes); |
783 | |
784 | bail_txadd: |
785 | return ret; |
786 | } |
787 | |
788 | static u64 update_hcrc(u8 opcode, u64 pbc) |
789 | { |
790 | if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) { |
791 | pbc &= ~PBC_INSERT_HCRC_SMASK; |
792 | pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT; |
793 | } |
794 | return pbc; |
795 | } |
796 | |
797 | int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
798 | u64 pbc) |
799 | { |
800 | struct hfi1_qp_priv *priv = qp->priv; |
801 | struct hfi1_ahg_info *ahg_info = priv->s_ahg; |
802 | u32 hdrwords = ps->s_txreq->hdr_dwords; |
803 | u32 len = ps->s_txreq->s_cur_size; |
804 | u32 plen; |
805 | struct hfi1_ibdev *dev = ps->dev; |
806 | struct hfi1_pportdata *ppd = ps->ppd; |
807 | struct verbs_txreq *tx; |
808 | u8 sc5 = priv->s_sc; |
809 | int ret; |
810 | u32 dwords; |
811 | |
812 | if (ps->s_txreq->phdr.hdr.hdr_type) { |
813 | u8 = hfi1_get_16b_padding(hdr_size: (hdrwords << 2), payload: len); |
814 | |
815 | dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) + |
816 | SIZE_OF_LT) >> 2; |
817 | } else { |
818 | dwords = (len + 3) >> 2; |
819 | } |
820 | plen = hdrwords + dwords + sizeof(pbc) / 4; |
821 | |
822 | tx = ps->s_txreq; |
823 | if (!sdma_txreq_built(tx: &tx->txreq)) { |
824 | if (likely(pbc == 0)) { |
825 | u32 vl = sc_to_vlt(dd: dd_from_ibdev(ibdev: qp->ibqp.device), sc5); |
826 | |
827 | /* No vl15 here */ |
828 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ |
829 | if (ps->s_txreq->phdr.hdr.hdr_type) |
830 | pbc |= PBC_PACKET_BYPASS | |
831 | PBC_INSERT_BYPASS_ICRC; |
832 | else |
833 | pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); |
834 | |
835 | pbc = create_pbc(ppd, |
836 | flags: pbc, |
837 | srate_mbs: qp->srate_mbps, |
838 | vl, |
839 | dw_len: plen); |
840 | |
841 | if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) |
842 | pbc = hfi1_fault_tx(qp, opcode: ps->opcode, pbc); |
843 | else |
844 | /* Update HCRC based on packet opcode */ |
845 | pbc = update_hcrc(opcode: ps->opcode, pbc); |
846 | } |
847 | tx->wqe = qp->s_wqe; |
848 | ret = build_verbs_tx_desc(sde: tx->sde, length: len, tx, ahg_info, pbc); |
849 | if (unlikely(ret)) |
850 | goto bail_build; |
851 | } |
852 | ret = sdma_send_txreq(sde: tx->sde, wait: ps->wait, tx: &tx->txreq, pkts_sent: ps->pkts_sent); |
853 | if (unlikely(ret < 0)) { |
854 | if (ret == -ECOMM) |
855 | goto bail_ecomm; |
856 | return ret; |
857 | } |
858 | |
859 | update_tx_opstats(qp, ps, plen); |
860 | trace_sdma_output_ibhdr(dd: dd_from_ibdev(ibdev: qp->ibqp.device), |
861 | opah: &ps->s_txreq->phdr.hdr, sc5: ib_is_sc5(sc5)); |
862 | return ret; |
863 | |
864 | bail_ecomm: |
865 | /* The current one got "sent" */ |
866 | return 0; |
867 | bail_build: |
868 | ret = wait_kmem(dev, qp, ps); |
869 | if (!ret) { |
870 | /* free txreq - bad state */ |
871 | hfi1_put_txreq(tx: ps->s_txreq); |
872 | ps->s_txreq = NULL; |
873 | } |
874 | return ret; |
875 | } |
876 | |
877 | /* |
878 | * If we are now in the error state, return zero to flush the |
879 | * send work request. |
880 | */ |
881 | static int pio_wait(struct rvt_qp *qp, |
882 | struct send_context *sc, |
883 | struct hfi1_pkt_state *ps, |
884 | u32 flag) |
885 | { |
886 | struct hfi1_qp_priv *priv = qp->priv; |
887 | struct hfi1_devdata *dd = sc->dd; |
888 | unsigned long flags; |
889 | int ret = 0; |
890 | |
891 | /* |
892 | * Note that as soon as want_buffer() is called and |
893 | * possibly before it returns, sc_piobufavail() |
894 | * could be called. Therefore, put QP on the I/O wait list before |
895 | * enabling the PIO avail interrupt. |
896 | */ |
897 | spin_lock_irqsave(&qp->s_lock, flags); |
898 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
899 | write_seqlock(sl: &sc->waitlock); |
900 | list_add_tail(new: &ps->s_txreq->txreq.list, |
901 | head: &ps->wait->tx_head); |
902 | if (list_empty(head: &priv->s_iowait.list)) { |
903 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
904 | int was_empty; |
905 | |
906 | dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); |
907 | dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); |
908 | qp->s_flags |= flag; |
909 | was_empty = list_empty(head: &sc->piowait); |
910 | iowait_get_priority(w: &priv->s_iowait); |
911 | iowait_queue(pkts_sent: ps->pkts_sent, w: &priv->s_iowait, |
912 | wait_head: &sc->piowait); |
913 | priv->s_iowait.lock = &sc->waitlock; |
914 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); |
915 | rvt_get_qp(qp); |
916 | /* counting: only call wantpiobuf_intr if first user */ |
917 | if (was_empty) |
918 | hfi1_sc_wantpiobuf_intr(sc, needint: 1); |
919 | } |
920 | write_sequnlock(sl: &sc->waitlock); |
921 | hfi1_qp_unbusy(qp, wait: ps->wait); |
922 | ret = -EBUSY; |
923 | } |
924 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
925 | return ret; |
926 | } |
927 | |
928 | static void verbs_pio_complete(void *arg, int code) |
929 | { |
930 | struct rvt_qp *qp = (struct rvt_qp *)arg; |
931 | struct hfi1_qp_priv *priv = qp->priv; |
932 | |
933 | if (iowait_pio_dec(wait: &priv->s_iowait)) |
934 | iowait_drain_wakeup(wait: &priv->s_iowait); |
935 | } |
936 | |
937 | int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
938 | u64 pbc) |
939 | { |
940 | struct hfi1_qp_priv *priv = qp->priv; |
941 | u32 hdrwords = ps->s_txreq->hdr_dwords; |
942 | struct rvt_sge_state *ss = ps->s_txreq->ss; |
943 | u32 len = ps->s_txreq->s_cur_size; |
944 | u32 dwords; |
945 | u32 plen; |
946 | struct hfi1_pportdata *ppd = ps->ppd; |
947 | u32 *hdr; |
948 | u8 sc5; |
949 | unsigned long flags = 0; |
950 | struct send_context *sc; |
951 | struct pio_buf *pbuf; |
952 | int wc_status = IB_WC_SUCCESS; |
953 | int ret = 0; |
954 | pio_release_cb cb = NULL; |
955 | u8 = 0; |
956 | |
957 | if (ps->s_txreq->phdr.hdr.hdr_type) { |
958 | u8 pad_size = hfi1_get_16b_padding(hdr_size: (hdrwords << 2), payload: len); |
959 | |
960 | extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT; |
961 | dwords = (len + extra_bytes) >> 2; |
962 | hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah; |
963 | } else { |
964 | dwords = (len + 3) >> 2; |
965 | hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; |
966 | } |
967 | plen = hdrwords + dwords + sizeof(pbc) / 4; |
968 | |
969 | /* only RC/UC use complete */ |
970 | switch (qp->ibqp.qp_type) { |
971 | case IB_QPT_RC: |
972 | case IB_QPT_UC: |
973 | cb = verbs_pio_complete; |
974 | break; |
975 | default: |
976 | break; |
977 | } |
978 | |
979 | /* vl15 special case taken care of in ud.c */ |
980 | sc5 = priv->s_sc; |
981 | sc = ps->s_txreq->psc; |
982 | |
983 | if (likely(pbc == 0)) { |
984 | u8 vl = sc_to_vlt(dd: dd_from_ibdev(ibdev: qp->ibqp.device), sc5); |
985 | |
986 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ |
987 | if (ps->s_txreq->phdr.hdr.hdr_type) |
988 | pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; |
989 | else |
990 | pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); |
991 | |
992 | pbc = create_pbc(ppd, flags: pbc, srate_mbs: qp->srate_mbps, vl, dw_len: plen); |
993 | if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) |
994 | pbc = hfi1_fault_tx(qp, opcode: ps->opcode, pbc); |
995 | else |
996 | /* Update HCRC based on packet opcode */ |
997 | pbc = update_hcrc(opcode: ps->opcode, pbc); |
998 | } |
999 | if (cb) |
1000 | iowait_pio_inc(wait: &priv->s_iowait); |
1001 | pbuf = sc_buffer_alloc(sc, dw_len: plen, cb, arg: qp); |
1002 | if (IS_ERR_OR_NULL(ptr: pbuf)) { |
1003 | if (cb) |
1004 | verbs_pio_complete(arg: qp, code: 0); |
1005 | if (IS_ERR(ptr: pbuf)) { |
1006 | /* |
1007 | * If we have filled the PIO buffers to capacity and are |
1008 | * not in an active state this request is not going to |
1009 | * go out to so just complete it with an error or else a |
1010 | * ULP or the core may be stuck waiting. |
1011 | */ |
1012 | hfi1_cdbg( |
1013 | PIO, |
1014 | "alloc failed. state not active, completing" ); |
1015 | wc_status = IB_WC_GENERAL_ERR; |
1016 | goto pio_bail; |
1017 | } else { |
1018 | /* |
1019 | * This is a normal occurrence. The PIO buffs are full |
1020 | * up but we are still happily sending, well we could be |
1021 | * so lets continue to queue the request. |
1022 | */ |
1023 | hfi1_cdbg(PIO, "alloc failed. state active, queuing" ); |
1024 | ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO); |
1025 | if (!ret) |
1026 | /* txreq not queued - free */ |
1027 | goto bail; |
1028 | /* tx consumed in wait */ |
1029 | return ret; |
1030 | } |
1031 | } |
1032 | |
1033 | if (dwords == 0) { |
1034 | pio_copy(dd: ppd->dd, pbuf, pbc, from: hdr, count: hdrwords); |
1035 | } else { |
1036 | seg_pio_copy_start(pbuf, pbc, |
1037 | from: hdr, nbytes: hdrwords * 4); |
1038 | if (ss) { |
1039 | while (len) { |
1040 | void *addr = ss->sge.vaddr; |
1041 | u32 slen = rvt_get_sge_length(sge: &ss->sge, length: len); |
1042 | |
1043 | rvt_update_sge(ss, length: slen, release: false); |
1044 | seg_pio_copy_mid(pbuf, from: addr, nbytes: slen); |
1045 | len -= slen; |
1046 | } |
1047 | } |
1048 | /* add icrc, lt byte, and padding to flit */ |
1049 | if (extra_bytes) |
1050 | seg_pio_copy_mid(pbuf, from: ppd->dd->sdma_pad_dma, |
1051 | nbytes: extra_bytes); |
1052 | |
1053 | seg_pio_copy_end(pbuf); |
1054 | } |
1055 | |
1056 | update_tx_opstats(qp, ps, plen); |
1057 | trace_pio_output_ibhdr(dd: dd_from_ibdev(ibdev: qp->ibqp.device), |
1058 | opah: &ps->s_txreq->phdr.hdr, sc5: ib_is_sc5(sc5)); |
1059 | |
1060 | pio_bail: |
1061 | spin_lock_irqsave(&qp->s_lock, flags); |
1062 | if (qp->s_wqe) { |
1063 | rvt_send_complete(qp, wqe: qp->s_wqe, status: wc_status); |
1064 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { |
1065 | if (unlikely(wc_status == IB_WC_GENERAL_ERR)) |
1066 | hfi1_rc_verbs_aborted(qp, opah: &ps->s_txreq->phdr.hdr); |
1067 | hfi1_rc_send_complete(qp, opah: &ps->s_txreq->phdr.hdr); |
1068 | } |
1069 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
1070 | |
1071 | ret = 0; |
1072 | |
1073 | bail: |
1074 | hfi1_put_txreq(tx: ps->s_txreq); |
1075 | return ret; |
1076 | } |
1077 | |
1078 | /* |
1079 | * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent |
1080 | * being an entry from the partition key table), return 0 |
1081 | * otherwise. Use the matching criteria for egress partition keys |
1082 | * specified in the OPAv1 spec., section 9.1l.7. |
1083 | */ |
1084 | static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) |
1085 | { |
1086 | u16 mkey = pkey & PKEY_LOW_15_MASK; |
1087 | u16 mentry = ent & PKEY_LOW_15_MASK; |
1088 | |
1089 | if (mkey == mentry) { |
1090 | /* |
1091 | * If pkey[15] is set (full partition member), |
1092 | * is bit 15 in the corresponding table element |
1093 | * clear (limited member)? |
1094 | */ |
1095 | if (pkey & PKEY_MEMBER_MASK) |
1096 | return !!(ent & PKEY_MEMBER_MASK); |
1097 | return 1; |
1098 | } |
1099 | return 0; |
1100 | } |
1101 | |
1102 | /** |
1103 | * egress_pkey_check - check P_KEY of a packet |
1104 | * @ppd: Physical IB port data |
1105 | * @slid: SLID for packet |
1106 | * @pkey: PKEY for header |
1107 | * @sc5: SC for packet |
1108 | * @s_pkey_index: It will be used for look up optimization for kernel contexts |
1109 | * only. If it is negative value, then it means user contexts is calling this |
1110 | * function. |
1111 | * |
1112 | * It checks if hdr's pkey is valid. |
1113 | * |
1114 | * Return: 0 on success, otherwise, 1 |
1115 | */ |
1116 | int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, |
1117 | u8 sc5, int8_t s_pkey_index) |
1118 | { |
1119 | struct hfi1_devdata *dd; |
1120 | int i; |
1121 | int is_user_ctxt_mechanism = (s_pkey_index < 0); |
1122 | |
1123 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) |
1124 | return 0; |
1125 | |
1126 | /* If SC15, pkey[0:14] must be 0x7fff */ |
1127 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) |
1128 | goto bad; |
1129 | |
1130 | /* Is the pkey = 0x0, or 0x8000? */ |
1131 | if ((pkey & PKEY_LOW_15_MASK) == 0) |
1132 | goto bad; |
1133 | |
1134 | /* |
1135 | * For the kernel contexts only, if a qp is passed into the function, |
1136 | * the most likely matching pkey has index qp->s_pkey_index |
1137 | */ |
1138 | if (!is_user_ctxt_mechanism && |
1139 | egress_pkey_matches_entry(pkey, ent: ppd->pkeys[s_pkey_index])) { |
1140 | return 0; |
1141 | } |
1142 | |
1143 | for (i = 0; i < MAX_PKEY_VALUES; i++) { |
1144 | if (egress_pkey_matches_entry(pkey, ent: ppd->pkeys[i])) |
1145 | return 0; |
1146 | } |
1147 | bad: |
1148 | /* |
1149 | * For the user-context mechanism, the P_KEY check would only happen |
1150 | * once per SDMA request, not once per packet. Therefore, there's no |
1151 | * need to increment the counter for the user-context mechanism. |
1152 | */ |
1153 | if (!is_user_ctxt_mechanism) { |
1154 | incr_cntr64(cntr: &ppd->port_xmit_constraint_errors); |
1155 | dd = ppd->dd; |
1156 | if (!(dd->err_info_xmit_constraint.status & |
1157 | OPA_EI_STATUS_SMASK)) { |
1158 | dd->err_info_xmit_constraint.status |= |
1159 | OPA_EI_STATUS_SMASK; |
1160 | dd->err_info_xmit_constraint.slid = slid; |
1161 | dd->err_info_xmit_constraint.pkey = pkey; |
1162 | } |
1163 | } |
1164 | return 1; |
1165 | } |
1166 | |
1167 | /* |
1168 | * get_send_routine - choose an egress routine |
1169 | * |
1170 | * Choose an egress routine based on QP type |
1171 | * and size |
1172 | */ |
1173 | static inline send_routine get_send_routine(struct rvt_qp *qp, |
1174 | struct hfi1_pkt_state *ps) |
1175 | { |
1176 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev: qp->ibqp.device); |
1177 | struct hfi1_qp_priv *priv = qp->priv; |
1178 | struct verbs_txreq *tx = ps->s_txreq; |
1179 | |
1180 | if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) |
1181 | return dd->process_pio_send; |
1182 | switch (qp->ibqp.qp_type) { |
1183 | case IB_QPT_SMI: |
1184 | return dd->process_pio_send; |
1185 | case IB_QPT_GSI: |
1186 | case IB_QPT_UD: |
1187 | break; |
1188 | case IB_QPT_UC: |
1189 | case IB_QPT_RC: |
1190 | priv->s_running_pkt_size = |
1191 | (tx->s_cur_size + priv->s_running_pkt_size) / 2; |
1192 | if (piothreshold && |
1193 | priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && |
1194 | (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && |
1195 | iowait_sdma_pending(wait: &priv->s_iowait) == 0 && |
1196 | !sdma_txreq_built(tx: &tx->txreq)) |
1197 | return dd->process_pio_send; |
1198 | break; |
1199 | default: |
1200 | break; |
1201 | } |
1202 | return dd->process_dma_send; |
1203 | } |
1204 | |
1205 | /** |
1206 | * hfi1_verbs_send - send a packet |
1207 | * @qp: the QP to send on |
1208 | * @ps: the state of the packet to send |
1209 | * |
1210 | * Return zero if packet is sent or queued OK. |
1211 | * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. |
1212 | */ |
1213 | int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) |
1214 | { |
1215 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev: qp->ibqp.device); |
1216 | struct hfi1_qp_priv *priv = qp->priv; |
1217 | struct ib_other_headers *ohdr = NULL; |
1218 | send_routine sr; |
1219 | int ret; |
1220 | u16 pkey; |
1221 | u32 slid; |
1222 | u8 l4 = 0; |
1223 | |
1224 | /* locate the pkey within the headers */ |
1225 | if (ps->s_txreq->phdr.hdr.hdr_type) { |
1226 | struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; |
1227 | |
1228 | l4 = hfi1_16B_get_l4(hdr); |
1229 | if (l4 == OPA_16B_L4_IB_LOCAL) |
1230 | ohdr = &hdr->u.oth; |
1231 | else if (l4 == OPA_16B_L4_IB_GLOBAL) |
1232 | ohdr = &hdr->u.l.oth; |
1233 | |
1234 | slid = hfi1_16B_get_slid(hdr); |
1235 | pkey = hfi1_16B_get_pkey(hdr); |
1236 | } else { |
1237 | struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh; |
1238 | u8 lnh = ib_get_lnh(hdr); |
1239 | |
1240 | if (lnh == HFI1_LRH_GRH) |
1241 | ohdr = &hdr->u.l.oth; |
1242 | else |
1243 | ohdr = &hdr->u.oth; |
1244 | slid = ib_get_slid(hdr); |
1245 | pkey = ib_bth_get_pkey(ohdr); |
1246 | } |
1247 | |
1248 | if (likely(l4 != OPA_16B_L4_FM)) |
1249 | ps->opcode = ib_bth_get_opcode(ohdr); |
1250 | else |
1251 | ps->opcode = IB_OPCODE_UD_SEND_ONLY; |
1252 | |
1253 | sr = get_send_routine(qp, ps); |
1254 | ret = egress_pkey_check(ppd: dd->pport, slid, pkey, |
1255 | sc5: priv->s_sc, s_pkey_index: qp->s_pkey_index); |
1256 | if (unlikely(ret)) { |
1257 | /* |
1258 | * The value we are returning here does not get propagated to |
1259 | * the verbs caller. Thus we need to complete the request with |
1260 | * error otherwise the caller could be sitting waiting on the |
1261 | * completion event. Only do this for PIO. SDMA has its own |
1262 | * mechanism for handling the errors. So for SDMA we can just |
1263 | * return. |
1264 | */ |
1265 | if (sr == dd->process_pio_send) { |
1266 | unsigned long flags; |
1267 | |
1268 | hfi1_cdbg(PIO, "%s() Failed. Completing with err" , |
1269 | __func__); |
1270 | spin_lock_irqsave(&qp->s_lock, flags); |
1271 | rvt_send_complete(qp, wqe: qp->s_wqe, status: IB_WC_GENERAL_ERR); |
1272 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
1273 | } |
1274 | return -EINVAL; |
1275 | } |
1276 | if (sr == dd->process_dma_send && iowait_pio_pending(wait: &priv->s_iowait)) |
1277 | return pio_wait(qp, |
1278 | sc: ps->s_txreq->psc, |
1279 | ps, |
1280 | HFI1_S_WAIT_PIO_DRAIN); |
1281 | return sr(qp, ps, 0); |
1282 | } |
1283 | |
1284 | /** |
1285 | * hfi1_fill_device_attr - Fill in rvt dev info device attributes. |
1286 | * @dd: the device data structure |
1287 | */ |
1288 | static void hfi1_fill_device_attr(struct hfi1_devdata *dd) |
1289 | { |
1290 | struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; |
1291 | u32 ver = dd->dc8051_ver; |
1292 | |
1293 | memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); |
1294 | |
1295 | rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) | |
1296 | ((u64)(dc8051_ver_min(ver)) << 16) | |
1297 | (u64)dc8051_ver_patch(ver); |
1298 | |
1299 | rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | |
1300 | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | |
1301 | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | |
1302 | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | |
1303 | IB_DEVICE_MEM_MGT_EXTENSIONS; |
1304 | rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA; |
1305 | rdi->dparms.props.page_size_cap = PAGE_SIZE; |
1306 | rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; |
1307 | rdi->dparms.props.vendor_part_id = dd->pcidev->device; |
1308 | rdi->dparms.props.hw_ver = dd->minrev; |
1309 | rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; |
1310 | rdi->dparms.props.max_mr_size = U64_MAX; |
1311 | rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; |
1312 | rdi->dparms.props.max_qp = hfi1_max_qps; |
1313 | rdi->dparms.props.max_qp_wr = |
1314 | (hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ? |
1315 | HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs); |
1316 | rdi->dparms.props.max_send_sge = hfi1_max_sges; |
1317 | rdi->dparms.props.max_recv_sge = hfi1_max_sges; |
1318 | rdi->dparms.props.max_sge_rd = hfi1_max_sges; |
1319 | rdi->dparms.props.max_cq = hfi1_max_cqs; |
1320 | rdi->dparms.props.max_ah = hfi1_max_ahs; |
1321 | rdi->dparms.props.max_cqe = hfi1_max_cqes; |
1322 | rdi->dparms.props.max_pd = hfi1_max_pds; |
1323 | rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; |
1324 | rdi->dparms.props.max_qp_init_rd_atom = 255; |
1325 | rdi->dparms.props.max_srq = hfi1_max_srqs; |
1326 | rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs; |
1327 | rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges; |
1328 | rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; |
1329 | rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd); |
1330 | rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps; |
1331 | rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached; |
1332 | rdi->dparms.props.max_total_mcast_qp_attach = |
1333 | rdi->dparms.props.max_mcast_qp_attach * |
1334 | rdi->dparms.props.max_mcast_grp; |
1335 | } |
1336 | |
1337 | static inline u16 opa_speed_to_ib(u16 in) |
1338 | { |
1339 | u16 out = 0; |
1340 | |
1341 | if (in & OPA_LINK_SPEED_25G) |
1342 | out |= IB_SPEED_EDR; |
1343 | if (in & OPA_LINK_SPEED_12_5G) |
1344 | out |= IB_SPEED_FDR; |
1345 | |
1346 | return out; |
1347 | } |
1348 | |
1349 | /* |
1350 | * Convert a single OPA link width (no multiple flags) to an IB value. |
1351 | * A zero OPA link width means link down, which means the IB width value |
1352 | * is a don't care. |
1353 | */ |
1354 | static inline u16 opa_width_to_ib(u16 in) |
1355 | { |
1356 | switch (in) { |
1357 | case OPA_LINK_WIDTH_1X: |
1358 | /* map 2x and 3x to 1x as they don't exist in IB */ |
1359 | case OPA_LINK_WIDTH_2X: |
1360 | case OPA_LINK_WIDTH_3X: |
1361 | return IB_WIDTH_1X; |
1362 | default: /* link down or unknown, return our largest width */ |
1363 | case OPA_LINK_WIDTH_4X: |
1364 | return IB_WIDTH_4X; |
1365 | } |
1366 | } |
1367 | |
1368 | static int query_port(struct rvt_dev_info *rdi, u32 port_num, |
1369 | struct ib_port_attr *props) |
1370 | { |
1371 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1372 | struct hfi1_devdata *dd = dd_from_dev(dev: verbs_dev); |
1373 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; |
1374 | u32 lid = ppd->lid; |
1375 | |
1376 | /* props being zeroed by the caller, avoid zeroing it here */ |
1377 | props->lid = lid ? lid : 0; |
1378 | props->lmc = ppd->lmc; |
1379 | /* OPA logical states match IB logical states */ |
1380 | props->state = driver_lstate(ppd); |
1381 | props->phys_state = driver_pstate(ppd); |
1382 | props->gid_tbl_len = HFI1_GUIDS_PER_PORT; |
1383 | props->active_width = (u8)opa_width_to_ib(in: ppd->link_width_active); |
1384 | /* see rate_show() in ib core/sysfs.c */ |
1385 | props->active_speed = opa_speed_to_ib(in: ppd->link_speed_active); |
1386 | props->max_vl_num = ppd->vls_supported; |
1387 | |
1388 | /* Once we are a "first class" citizen and have added the OPA MTUs to |
1389 | * the core we can advertise the larger MTU enum to the ULPs, for now |
1390 | * advertise only 4K. |
1391 | * |
1392 | * Those applications which are either OPA aware or pass the MTU enum |
1393 | * from the Path Records to us will get the new 8k MTU. Those that |
1394 | * attempt to process the MTU enum may fail in various ways. |
1395 | */ |
1396 | props->max_mtu = mtu_to_enum(mtu: (!valid_ib_mtu(mtu: hfi1_max_mtu) ? |
1397 | 4096 : hfi1_max_mtu), default_if_bad: IB_MTU_4096); |
1398 | props->active_mtu = !valid_ib_mtu(mtu: ppd->ibmtu) ? props->max_mtu : |
1399 | mtu_to_enum(mtu: ppd->ibmtu, default_if_bad: IB_MTU_4096); |
1400 | props->phys_mtu = hfi1_max_mtu; |
1401 | |
1402 | return 0; |
1403 | } |
1404 | |
1405 | static int modify_device(struct ib_device *device, |
1406 | int device_modify_mask, |
1407 | struct ib_device_modify *device_modify) |
1408 | { |
1409 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev: device); |
1410 | unsigned i; |
1411 | int ret; |
1412 | |
1413 | if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | |
1414 | IB_DEVICE_MODIFY_NODE_DESC)) { |
1415 | ret = -EOPNOTSUPP; |
1416 | goto bail; |
1417 | } |
1418 | |
1419 | if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { |
1420 | memcpy(device->node_desc, device_modify->node_desc, |
1421 | IB_DEVICE_NODE_DESC_MAX); |
1422 | for (i = 0; i < dd->num_pports; i++) { |
1423 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; |
1424 | |
1425 | hfi1_node_desc_chg(ibp); |
1426 | } |
1427 | } |
1428 | |
1429 | if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { |
1430 | ib_hfi1_sys_image_guid = |
1431 | cpu_to_be64(device_modify->sys_image_guid); |
1432 | for (i = 0; i < dd->num_pports; i++) { |
1433 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; |
1434 | |
1435 | hfi1_sys_guid_chg(ibp); |
1436 | } |
1437 | } |
1438 | |
1439 | ret = 0; |
1440 | |
1441 | bail: |
1442 | return ret; |
1443 | } |
1444 | |
1445 | static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num) |
1446 | { |
1447 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1448 | struct hfi1_devdata *dd = dd_from_dev(dev: verbs_dev); |
1449 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; |
1450 | |
1451 | set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, neigh_reason: 0, |
1452 | OPA_LINKDOWN_REASON_UNKNOWN); |
1453 | return set_link_state(ppd, HLS_DN_DOWNDEF); |
1454 | } |
1455 | |
1456 | static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, |
1457 | int guid_index, __be64 *guid) |
1458 | { |
1459 | struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); |
1460 | |
1461 | if (guid_index >= HFI1_GUIDS_PER_PORT) |
1462 | return -EINVAL; |
1463 | |
1464 | *guid = get_sguid(ibp, index: guid_index); |
1465 | return 0; |
1466 | } |
1467 | |
1468 | /* |
1469 | * convert ah port,sl to sc |
1470 | */ |
1471 | u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah) |
1472 | { |
1473 | struct hfi1_ibport *ibp = to_iport(ibdev, port: rdma_ah_get_port_num(attr: ah)); |
1474 | |
1475 | return ibp->sl_to_sc[rdma_ah_get_sl(attr: ah)]; |
1476 | } |
1477 | |
1478 | static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) |
1479 | { |
1480 | struct hfi1_ibport *ibp; |
1481 | struct hfi1_pportdata *ppd; |
1482 | struct hfi1_devdata *dd; |
1483 | u8 sc5; |
1484 | u8 sl; |
1485 | |
1486 | if (hfi1_check_mcast(lid: rdma_ah_get_dlid(attr: ah_attr)) && |
1487 | !(rdma_ah_get_ah_flags(attr: ah_attr) & IB_AH_GRH)) |
1488 | return -EINVAL; |
1489 | |
1490 | /* test the mapping for validity */ |
1491 | ibp = to_iport(ibdev, port: rdma_ah_get_port_num(attr: ah_attr)); |
1492 | ppd = ppd_from_ibp(ibp); |
1493 | dd = dd_from_ppd(ppd); |
1494 | |
1495 | sl = rdma_ah_get_sl(attr: ah_attr); |
1496 | if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) |
1497 | return -EINVAL; |
1498 | sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc)); |
1499 | |
1500 | sc5 = ibp->sl_to_sc[sl]; |
1501 | if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) |
1502 | return -EINVAL; |
1503 | return 0; |
1504 | } |
1505 | |
1506 | static void hfi1_notify_new_ah(struct ib_device *ibdev, |
1507 | struct rdma_ah_attr *ah_attr, |
1508 | struct rvt_ah *ah) |
1509 | { |
1510 | struct hfi1_ibport *ibp; |
1511 | struct hfi1_pportdata *ppd; |
1512 | struct hfi1_devdata *dd; |
1513 | u8 sc5; |
1514 | struct rdma_ah_attr *attr = &ah->attr; |
1515 | |
1516 | /* |
1517 | * Do not trust reading anything from rvt_ah at this point as it is not |
1518 | * done being setup. We can however modify things which we need to set. |
1519 | */ |
1520 | |
1521 | ibp = to_iport(ibdev, port: rdma_ah_get_port_num(attr: ah_attr)); |
1522 | ppd = ppd_from_ibp(ibp); |
1523 | sc5 = ibp->sl_to_sc[rdma_ah_get_sl(attr: &ah->attr)]; |
1524 | hfi1_update_ah_attr(ibdev, attr); |
1525 | hfi1_make_opa_lid(attr); |
1526 | dd = dd_from_ppd(ppd); |
1527 | ah->vl = sc_to_vlt(dd, sc5); |
1528 | if (ah->vl < num_vls || ah->vl == 15) |
1529 | ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); |
1530 | } |
1531 | |
1532 | /** |
1533 | * hfi1_get_npkeys - return the size of the PKEY table for context 0 |
1534 | * @dd: the hfi1_ib device |
1535 | */ |
1536 | unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) |
1537 | { |
1538 | return ARRAY_SIZE(dd->pport[0].pkeys); |
1539 | } |
1540 | |
1541 | static void init_ibport(struct hfi1_pportdata *ppd) |
1542 | { |
1543 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
1544 | size_t sz = ARRAY_SIZE(ibp->sl_to_sc); |
1545 | int i; |
1546 | |
1547 | for (i = 0; i < sz; i++) { |
1548 | ibp->sl_to_sc[i] = i; |
1549 | ibp->sc_to_sl[i] = i; |
1550 | } |
1551 | |
1552 | for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) |
1553 | INIT_LIST_HEAD(list: &ibp->rvp.trap_lists[i].list); |
1554 | timer_setup(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, 0); |
1555 | |
1556 | spin_lock_init(&ibp->rvp.lock); |
1557 | /* Set the prefix to the default value (see ch. 4.1.1) */ |
1558 | ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; |
1559 | ibp->rvp.sm_lid = 0; |
1560 | /* |
1561 | * Below should only set bits defined in OPA PortInfo.CapabilityMask |
1562 | * and PortInfo.CapabilityMask3 |
1563 | */ |
1564 | ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | |
1565 | IB_PORT_CAP_MASK_NOTICE_SUP; |
1566 | ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported; |
1567 | ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; |
1568 | ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; |
1569 | ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; |
1570 | ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; |
1571 | ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; |
1572 | |
1573 | RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); |
1574 | RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); |
1575 | } |
1576 | |
1577 | static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str) |
1578 | { |
1579 | struct rvt_dev_info *rdi = ib_to_rvt(ibdev); |
1580 | struct hfi1_ibdev *dev = dev_from_rdi(rdi); |
1581 | u32 ver = dd_from_dev(dev)->dc8051_ver; |
1582 | |
1583 | snprintf(buf: str, IB_FW_VERSION_NAME_MAX, fmt: "%u.%u.%u" , dc8051_ver_maj(ver), |
1584 | dc8051_ver_min(ver), dc8051_ver_patch(ver)); |
1585 | } |
1586 | |
1587 | static const char * const driver_cntr_names[] = { |
1588 | /* must be element 0*/ |
1589 | "DRIVER_KernIntr" , |
1590 | "DRIVER_ErrorIntr" , |
1591 | "DRIVER_Tx_Errs" , |
1592 | "DRIVER_Rcv_Errs" , |
1593 | "DRIVER_HW_Errs" , |
1594 | "DRIVER_NoPIOBufs" , |
1595 | "DRIVER_CtxtsOpen" , |
1596 | "DRIVER_RcvLen_Errs" , |
1597 | "DRIVER_EgrBufFull" , |
1598 | "DRIVER_EgrHdrFull" |
1599 | }; |
1600 | |
1601 | static struct rdma_stat_desc *dev_cntr_descs; |
1602 | static struct rdma_stat_desc *port_cntr_descs; |
1603 | int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); |
1604 | static int num_dev_cntrs; |
1605 | static int num_port_cntrs; |
1606 | |
1607 | /* |
1608 | * Convert a list of names separated by '\n' into an array of NULL terminated |
1609 | * strings. Optionally some entries can be reserved in the array to hold extra |
1610 | * external strings. |
1611 | */ |
1612 | static int init_cntr_names(const char *names_in, const size_t names_len, |
1613 | int , int *num_cntrs, |
1614 | struct rdma_stat_desc **cntr_descs) |
1615 | { |
1616 | struct rdma_stat_desc *names_out; |
1617 | char *p; |
1618 | int i, n; |
1619 | |
1620 | n = 0; |
1621 | for (i = 0; i < names_len; i++) |
1622 | if (names_in[i] == '\n') |
1623 | n++; |
1624 | |
1625 | names_out = kzalloc(size: (n + num_extra_names) * sizeof(*names_out) |
1626 | + names_len, |
1627 | GFP_KERNEL); |
1628 | if (!names_out) { |
1629 | *num_cntrs = 0; |
1630 | *cntr_descs = NULL; |
1631 | return -ENOMEM; |
1632 | } |
1633 | |
1634 | p = (char *)&names_out[n + num_extra_names]; |
1635 | memcpy(p, names_in, names_len); |
1636 | |
1637 | for (i = 0; i < n; i++) { |
1638 | names_out[i].name = p; |
1639 | p = strchr(p, '\n'); |
1640 | *p++ = '\0'; |
1641 | } |
1642 | |
1643 | *num_cntrs = n; |
1644 | *cntr_descs = names_out; |
1645 | return 0; |
1646 | } |
1647 | |
1648 | static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev) |
1649 | { |
1650 | if (!dev_cntr_descs) { |
1651 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); |
1652 | int i, err; |
1653 | |
1654 | err = init_cntr_names(names_in: dd->cntrnames, names_len: dd->cntrnameslen, |
1655 | num_extra_names: num_driver_cntrs, |
1656 | num_cntrs: &num_dev_cntrs, cntr_descs: &dev_cntr_descs); |
1657 | if (err) |
1658 | return NULL; |
1659 | |
1660 | for (i = 0; i < num_driver_cntrs; i++) |
1661 | dev_cntr_descs[num_dev_cntrs + i].name = |
1662 | driver_cntr_names[i]; |
1663 | } |
1664 | return rdma_alloc_hw_stats_struct(descs: dev_cntr_descs, |
1665 | num_counters: num_dev_cntrs + num_driver_cntrs, |
1666 | RDMA_HW_STATS_DEFAULT_LIFESPAN); |
1667 | } |
1668 | |
1669 | static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev, |
1670 | u32 port_num) |
1671 | { |
1672 | if (!port_cntr_descs) { |
1673 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); |
1674 | int err; |
1675 | |
1676 | err = init_cntr_names(names_in: dd->portcntrnames, names_len: dd->portcntrnameslen, |
1677 | num_extra_names: 0, |
1678 | num_cntrs: &num_port_cntrs, cntr_descs: &port_cntr_descs); |
1679 | if (err) |
1680 | return NULL; |
1681 | } |
1682 | return rdma_alloc_hw_stats_struct(descs: port_cntr_descs, num_counters: num_port_cntrs, |
1683 | RDMA_HW_STATS_DEFAULT_LIFESPAN); |
1684 | } |
1685 | |
1686 | static u64 hfi1_sps_ints(void) |
1687 | { |
1688 | unsigned long index, flags; |
1689 | struct hfi1_devdata *dd; |
1690 | u64 sps_ints = 0; |
1691 | |
1692 | xa_lock_irqsave(&hfi1_dev_table, flags); |
1693 | xa_for_each(&hfi1_dev_table, index, dd) { |
1694 | sps_ints += get_all_cpu_total(cntr: dd->int_counter); |
1695 | } |
1696 | xa_unlock_irqrestore(&hfi1_dev_table, flags); |
1697 | return sps_ints; |
1698 | } |
1699 | |
1700 | static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, |
1701 | u32 port, int index) |
1702 | { |
1703 | u64 *values; |
1704 | int count; |
1705 | |
1706 | if (!port) { |
1707 | u64 *stats = (u64 *)&hfi1_stats; |
1708 | int i; |
1709 | |
1710 | hfi1_read_cntrs(dd: dd_from_ibdev(ibdev), NULL, cntrp: &values); |
1711 | values[num_dev_cntrs] = hfi1_sps_ints(); |
1712 | for (i = 1; i < num_driver_cntrs; i++) |
1713 | values[num_dev_cntrs + i] = stats[i]; |
1714 | count = num_dev_cntrs + num_driver_cntrs; |
1715 | } else { |
1716 | struct hfi1_ibport *ibp = to_iport(ibdev, port); |
1717 | |
1718 | hfi1_read_portcntrs(ppd: ppd_from_ibp(ibp), NULL, cntrp: &values); |
1719 | count = num_port_cntrs; |
1720 | } |
1721 | |
1722 | memcpy(stats->value, values, count * sizeof(u64)); |
1723 | return count; |
1724 | } |
1725 | |
1726 | static const struct ib_device_ops hfi1_dev_ops = { |
1727 | .owner = THIS_MODULE, |
1728 | .driver_id = RDMA_DRIVER_HFI1, |
1729 | |
1730 | .alloc_hw_device_stats = hfi1_alloc_hw_device_stats, |
1731 | .alloc_hw_port_stats = hfi_alloc_hw_port_stats, |
1732 | .alloc_rdma_netdev = hfi1_vnic_alloc_rn, |
1733 | .device_group = &ib_hfi1_attr_group, |
1734 | .get_dev_fw_str = hfi1_get_dev_fw_str, |
1735 | .get_hw_stats = get_hw_stats, |
1736 | .modify_device = modify_device, |
1737 | .port_groups = hfi1_attr_port_groups, |
1738 | /* keep process mad in the driver */ |
1739 | .process_mad = hfi1_process_mad, |
1740 | .rdma_netdev_get_params = hfi1_ipoib_rn_get_params, |
1741 | }; |
1742 | |
1743 | /** |
1744 | * hfi1_register_ib_device - register our device with the infiniband core |
1745 | * @dd: the device data structure |
1746 | * Return 0 if successful, errno if unsuccessful. |
1747 | */ |
1748 | int hfi1_register_ib_device(struct hfi1_devdata *dd) |
1749 | { |
1750 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
1751 | struct ib_device *ibdev = &dev->rdi.ibdev; |
1752 | struct hfi1_pportdata *ppd = dd->pport; |
1753 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
1754 | unsigned i; |
1755 | int ret; |
1756 | |
1757 | for (i = 0; i < dd->num_pports; i++) |
1758 | init_ibport(ppd: ppd + i); |
1759 | |
1760 | /* Only need to initialize non-zero fields. */ |
1761 | |
1762 | timer_setup(&dev->mem_timer, mem_timer, 0); |
1763 | |
1764 | seqlock_init(&dev->iowait_lock); |
1765 | seqlock_init(&dev->txwait_lock); |
1766 | INIT_LIST_HEAD(list: &dev->txwait); |
1767 | INIT_LIST_HEAD(list: &dev->memwait); |
1768 | |
1769 | ret = verbs_txreq_init(dev); |
1770 | if (ret) |
1771 | goto err_verbs_txreq; |
1772 | |
1773 | /* Use first-port GUID as node guid */ |
1774 | ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); |
1775 | |
1776 | /* |
1777 | * The system image GUID is supposed to be the same for all |
1778 | * HFIs in a single system but since there can be other |
1779 | * device types in the system, we can't be sure this is unique. |
1780 | */ |
1781 | if (!ib_hfi1_sys_image_guid) |
1782 | ib_hfi1_sys_image_guid = ibdev->node_guid; |
1783 | ibdev->phys_port_cnt = dd->num_pports; |
1784 | ibdev->dev.parent = &dd->pcidev->dev; |
1785 | |
1786 | ib_set_device_ops(device: ibdev, ops: &hfi1_dev_ops); |
1787 | |
1788 | strscpy(ibdev->node_desc, init_utsname()->nodename, |
1789 | sizeof(ibdev->node_desc)); |
1790 | |
1791 | /* |
1792 | * Fill in rvt info object. |
1793 | */ |
1794 | dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; |
1795 | dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; |
1796 | dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; |
1797 | dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be; |
1798 | dd->verbs_dev.rdi.driver_f.query_port_state = query_port; |
1799 | dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; |
1800 | dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg; |
1801 | /* |
1802 | * Fill in rvt info device attributes. |
1803 | */ |
1804 | hfi1_fill_device_attr(dd); |
1805 | |
1806 | /* queue pair */ |
1807 | dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size; |
1808 | dd->verbs_dev.rdi.dparms.qpn_start = 0; |
1809 | dd->verbs_dev.rdi.dparms.qpn_inc = 1; |
1810 | dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; |
1811 | dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE; |
1812 | dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX; |
1813 | dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; |
1814 | dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; |
1815 | dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; |
1816 | dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; |
1817 | dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA | |
1818 | RDMA_CORE_CAP_OPA_AH; |
1819 | dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; |
1820 | |
1821 | dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; |
1822 | dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; |
1823 | dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; |
1824 | dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; |
1825 | dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; |
1826 | dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt; |
1827 | dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; |
1828 | dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; |
1829 | dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; |
1830 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; |
1831 | dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; |
1832 | dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; |
1833 | dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; |
1834 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; |
1835 | dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; |
1836 | dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; |
1837 | dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; |
1838 | dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; |
1839 | dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; |
1840 | dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe; |
1841 | dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = |
1842 | hfi1_comp_vect_mappings_lookup; |
1843 | |
1844 | /* completeion queue */ |
1845 | dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus; |
1846 | dd->verbs_dev.rdi.dparms.node = dd->node; |
1847 | |
1848 | /* misc settings */ |
1849 | dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */ |
1850 | dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; |
1851 | dd->verbs_dev.rdi.dparms.nports = dd->num_pports; |
1852 | dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); |
1853 | dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; |
1854 | dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; |
1855 | dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; |
1856 | dd->verbs_dev.rdi.dparms.reserved_operations = 1; |
1857 | dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT; |
1858 | |
1859 | /* post send table */ |
1860 | dd->verbs_dev.rdi.post_parms = hfi1_post_parms; |
1861 | |
1862 | /* opcode translation table */ |
1863 | dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode; |
1864 | |
1865 | ppd = dd->pport; |
1866 | for (i = 0; i < dd->num_pports; i++, ppd++) |
1867 | rvt_init_port(rdi: &dd->verbs_dev.rdi, |
1868 | port: &ppd->ibport_data.rvp, |
1869 | port_index: i, |
1870 | pkey_table: ppd->pkeys); |
1871 | |
1872 | ret = rvt_register_device(rvd: &dd->verbs_dev.rdi); |
1873 | if (ret) |
1874 | goto err_verbs_txreq; |
1875 | |
1876 | ret = hfi1_verbs_register_sysfs(dd); |
1877 | if (ret) |
1878 | goto err_class; |
1879 | |
1880 | return ret; |
1881 | |
1882 | err_class: |
1883 | rvt_unregister_device(rvd: &dd->verbs_dev.rdi); |
1884 | err_verbs_txreq: |
1885 | verbs_txreq_exit(dev); |
1886 | dd_dev_err(dd, "cannot register verbs: %d!\n" , -ret); |
1887 | return ret; |
1888 | } |
1889 | |
1890 | void hfi1_unregister_ib_device(struct hfi1_devdata *dd) |
1891 | { |
1892 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
1893 | |
1894 | hfi1_verbs_unregister_sysfs(dd); |
1895 | |
1896 | rvt_unregister_device(rvd: &dd->verbs_dev.rdi); |
1897 | |
1898 | if (!list_empty(head: &dev->txwait)) |
1899 | dd_dev_err(dd, "txwait list not empty!\n" ); |
1900 | if (!list_empty(head: &dev->memwait)) |
1901 | dd_dev_err(dd, "memwait list not empty!\n" ); |
1902 | |
1903 | del_timer_sync(timer: &dev->mem_timer); |
1904 | verbs_txreq_exit(dev); |
1905 | |
1906 | kfree(objp: dev_cntr_descs); |
1907 | kfree(objp: port_cntr_descs); |
1908 | dev_cntr_descs = NULL; |
1909 | port_cntr_descs = NULL; |
1910 | } |
1911 | |
1912 | void hfi1_cnp_rcv(struct hfi1_packet *packet) |
1913 | { |
1914 | struct hfi1_ibport *ibp = rcd_to_iport(rcd: packet->rcd); |
1915 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
1916 | struct ib_header *hdr = packet->hdr; |
1917 | struct rvt_qp *qp = packet->qp; |
1918 | u32 lqpn, rqpn = 0; |
1919 | u16 rlid = 0; |
1920 | u8 sl, sc5, svc_type; |
1921 | |
1922 | switch (packet->qp->ibqp.qp_type) { |
1923 | case IB_QPT_UC: |
1924 | rlid = rdma_ah_get_dlid(attr: &qp->remote_ah_attr); |
1925 | rqpn = qp->remote_qpn; |
1926 | svc_type = IB_CC_SVCTYPE_UC; |
1927 | break; |
1928 | case IB_QPT_RC: |
1929 | rlid = rdma_ah_get_dlid(attr: &qp->remote_ah_attr); |
1930 | rqpn = qp->remote_qpn; |
1931 | svc_type = IB_CC_SVCTYPE_RC; |
1932 | break; |
1933 | case IB_QPT_SMI: |
1934 | case IB_QPT_GSI: |
1935 | case IB_QPT_UD: |
1936 | svc_type = IB_CC_SVCTYPE_UD; |
1937 | break; |
1938 | default: |
1939 | ibp->rvp.n_pkt_drops++; |
1940 | return; |
1941 | } |
1942 | |
1943 | sc5 = hfi1_9B_get_sc5(hdr, rhf: packet->rhf); |
1944 | sl = ibp->sc_to_sl[sc5]; |
1945 | lqpn = qp->ibqp.qp_num; |
1946 | |
1947 | process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); |
1948 | } |
1949 | |