1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2015-2020 Intel Corporation. |
4 | * Copyright(c) 2021 Cornelis Networks. |
5 | */ |
6 | |
7 | #include <linux/spinlock.h> |
8 | #include <linux/pci.h> |
9 | #include <linux/io.h> |
10 | #include <linux/delay.h> |
11 | #include <linux/netdevice.h> |
12 | #include <linux/vmalloc.h> |
13 | #include <linux/module.h> |
14 | #include <linux/prefetch.h> |
15 | #include <rdma/ib_verbs.h> |
16 | #include <linux/etherdevice.h> |
17 | |
18 | #include "hfi.h" |
19 | #include "trace.h" |
20 | #include "qp.h" |
21 | #include "sdma.h" |
22 | #include "debugfs.h" |
23 | #include "vnic.h" |
24 | #include "fault.h" |
25 | |
26 | #include "ipoib.h" |
27 | #include "netdev.h" |
28 | |
29 | #undef pr_fmt |
30 | #define pr_fmt(fmt) DRIVER_NAME ": " fmt |
31 | |
32 | DEFINE_MUTEX(hfi1_mutex); /* general driver use */ |
33 | |
34 | unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; |
35 | module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO); |
36 | MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify( |
37 | HFI1_DEFAULT_MAX_MTU)); |
38 | |
39 | unsigned int hfi1_cu = 1; |
40 | module_param_named(cu, hfi1_cu, uint, S_IRUGO); |
41 | MODULE_PARM_DESC(cu, "Credit return units" ); |
42 | |
43 | unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT; |
44 | static int hfi1_caps_set(const char *val, const struct kernel_param *kp); |
45 | static int hfi1_caps_get(char *buffer, const struct kernel_param *kp); |
46 | static const struct kernel_param_ops cap_ops = { |
47 | .set = hfi1_caps_set, |
48 | .get = hfi1_caps_get |
49 | }; |
50 | module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO); |
51 | MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features" ); |
52 | |
53 | MODULE_LICENSE("Dual BSD/GPL" ); |
54 | MODULE_DESCRIPTION("Cornelis Omni-Path Express driver" ); |
55 | |
56 | /* |
57 | * MAX_PKT_RCV is the max # if packets processed per receive interrupt. |
58 | */ |
59 | #define MAX_PKT_RECV 64 |
60 | /* |
61 | * MAX_PKT_THREAD_RCV is the max # of packets processed before |
62 | * the qp_wait_list queue is flushed. |
63 | */ |
64 | #define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4) |
65 | #define EGR_HEAD_UPDATE_THRESHOLD 16 |
66 | |
67 | struct hfi1_ib_stats hfi1_stats; |
68 | |
69 | static int hfi1_caps_set(const char *val, const struct kernel_param *kp) |
70 | { |
71 | int ret = 0; |
72 | unsigned long *cap_mask_ptr = (unsigned long *)kp->arg, |
73 | cap_mask = *cap_mask_ptr, value, diff, |
74 | write_mask = ((HFI1_CAP_WRITABLE_MASK << HFI1_CAP_USER_SHIFT) | |
75 | HFI1_CAP_WRITABLE_MASK); |
76 | |
77 | ret = kstrtoul(s: val, base: 0, res: &value); |
78 | if (ret) { |
79 | pr_warn("Invalid module parameter value for 'cap_mask'\n" ); |
80 | goto done; |
81 | } |
82 | /* Get the changed bits (except the locked bit) */ |
83 | diff = value ^ (cap_mask & ~HFI1_CAP_LOCKED_SMASK); |
84 | |
85 | /* Remove any bits that are not allowed to change after driver load */ |
86 | if (HFI1_CAP_LOCKED() && (diff & ~write_mask)) { |
87 | pr_warn("Ignoring non-writable capability bits %#lx\n" , |
88 | diff & ~write_mask); |
89 | diff &= write_mask; |
90 | } |
91 | |
92 | /* Mask off any reserved bits */ |
93 | diff &= ~HFI1_CAP_RESERVED_MASK; |
94 | /* Clear any previously set and changing bits */ |
95 | cap_mask &= ~diff; |
96 | /* Update the bits with the new capability */ |
97 | cap_mask |= (value & diff); |
98 | /* Check for any kernel/user restrictions */ |
99 | diff = (cap_mask & (HFI1_CAP_MUST_HAVE_KERN << HFI1_CAP_USER_SHIFT)) ^ |
100 | ((cap_mask & HFI1_CAP_MUST_HAVE_KERN) << HFI1_CAP_USER_SHIFT); |
101 | cap_mask &= ~diff; |
102 | /* Set the bitmask to the final set */ |
103 | *cap_mask_ptr = cap_mask; |
104 | done: |
105 | return ret; |
106 | } |
107 | |
108 | static int hfi1_caps_get(char *buffer, const struct kernel_param *kp) |
109 | { |
110 | unsigned long cap_mask = *(unsigned long *)kp->arg; |
111 | |
112 | cap_mask &= ~HFI1_CAP_LOCKED_SMASK; |
113 | cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT); |
114 | |
115 | return sysfs_emit(buf: buffer, fmt: "0x%lx\n" , cap_mask); |
116 | } |
117 | |
118 | struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi) |
119 | { |
120 | struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi); |
121 | struct hfi1_devdata *dd = container_of(ibdev, |
122 | struct hfi1_devdata, verbs_dev); |
123 | return dd->pcidev; |
124 | } |
125 | |
126 | /* |
127 | * Return count of units with at least one port ACTIVE. |
128 | */ |
129 | int hfi1_count_active_units(void) |
130 | { |
131 | struct hfi1_devdata *dd; |
132 | struct hfi1_pportdata *ppd; |
133 | unsigned long index, flags; |
134 | int pidx, nunits_active = 0; |
135 | |
136 | xa_lock_irqsave(&hfi1_dev_table, flags); |
137 | xa_for_each(&hfi1_dev_table, index, dd) { |
138 | if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) |
139 | continue; |
140 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { |
141 | ppd = dd->pport + pidx; |
142 | if (ppd->lid && ppd->linkup) { |
143 | nunits_active++; |
144 | break; |
145 | } |
146 | } |
147 | } |
148 | xa_unlock_irqrestore(&hfi1_dev_table, flags); |
149 | return nunits_active; |
150 | } |
151 | |
152 | /* |
153 | * Get address of eager buffer from it's index (allocated in chunks, not |
154 | * contiguous). |
155 | */ |
156 | static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, |
157 | u8 *update) |
158 | { |
159 | u32 idx = rhf_egr_index(rhf), offset = rhf_egr_buf_offset(rhf); |
160 | |
161 | *update |= !(idx & (rcd->egrbufs.threshold - 1)) && !offset; |
162 | return (void *)(((u64)(rcd->egrbufs.rcvtids[idx].addr)) + |
163 | (offset * RCV_BUF_BLOCK_SIZE)); |
164 | } |
165 | |
166 | static inline void *(struct hfi1_ctxtdata *rcd, |
167 | __le32 *rhf_addr) |
168 | { |
169 | u32 offset = rhf_hdrq_offset(rhf: rhf_to_cpu(rbuf: rhf_addr)); |
170 | |
171 | return (void *)(rhf_addr - rcd->rhf_offset + offset); |
172 | } |
173 | |
174 | static inline struct ib_header *(struct hfi1_ctxtdata *rcd, |
175 | __le32 *rhf_addr) |
176 | { |
177 | return (struct ib_header *)hfi1_get_header(rcd, rhf_addr); |
178 | } |
179 | |
180 | static inline struct hfi1_16b_header |
181 | *(struct hfi1_ctxtdata *rcd, |
182 | __le32 *rhf_addr) |
183 | { |
184 | return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr); |
185 | } |
186 | |
187 | /* |
188 | * Validate and encode the a given RcvArray Buffer size. |
189 | * The function will check whether the given size falls within |
190 | * allowed size ranges for the respective type and, optionally, |
191 | * return the proper encoding. |
192 | */ |
193 | int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) |
194 | { |
195 | if (unlikely(!PAGE_ALIGNED(size))) |
196 | return 0; |
197 | if (unlikely(size < MIN_EAGER_BUFFER)) |
198 | return 0; |
199 | if (size > |
200 | (type == PT_EAGER ? MAX_EAGER_BUFFER : MAX_EXPECTED_BUFFER)) |
201 | return 0; |
202 | if (encoded) |
203 | *encoded = ilog2(size / PAGE_SIZE) + 1; |
204 | return 1; |
205 | } |
206 | |
207 | static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, |
208 | struct hfi1_packet *packet) |
209 | { |
210 | struct ib_header *rhdr = packet->hdr; |
211 | u32 rte = rhf_rcv_type_err(rhf: packet->rhf); |
212 | u32 mlid_base; |
213 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
214 | struct hfi1_devdata *dd = ppd->dd; |
215 | struct hfi1_ibdev *verbs_dev = &dd->verbs_dev; |
216 | struct rvt_dev_info *rdi = &verbs_dev->rdi; |
217 | |
218 | if ((packet->rhf & RHF_DC_ERR) && |
219 | hfi1_dbg_fault_suppress_err(ibd: verbs_dev)) |
220 | return; |
221 | |
222 | if (packet->rhf & RHF_ICRC_ERR) |
223 | return; |
224 | |
225 | if (packet->etype == RHF_RCV_TYPE_BYPASS) { |
226 | goto drop; |
227 | } else { |
228 | u8 lnh = ib_get_lnh(hdr: rhdr); |
229 | |
230 | mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); |
231 | if (lnh == HFI1_LRH_BTH) { |
232 | packet->ohdr = &rhdr->u.oth; |
233 | } else if (lnh == HFI1_LRH_GRH) { |
234 | packet->ohdr = &rhdr->u.l.oth; |
235 | packet->grh = &rhdr->u.l.grh; |
236 | } else { |
237 | goto drop; |
238 | } |
239 | } |
240 | |
241 | if (packet->rhf & RHF_TID_ERR) { |
242 | /* For TIDERR and RC QPs preemptively schedule a NAK */ |
243 | u32 tlen = rhf_pkt_len(rhf: packet->rhf); /* in bytes */ |
244 | u32 dlid = ib_get_dlid(hdr: rhdr); |
245 | u32 qp_num; |
246 | |
247 | /* Sanity check packet */ |
248 | if (tlen < 24) |
249 | goto drop; |
250 | |
251 | /* Check for GRH */ |
252 | if (packet->grh) { |
253 | u32 vtf; |
254 | struct ib_grh *grh = packet->grh; |
255 | |
256 | if (grh->next_hdr != IB_GRH_NEXT_HDR) |
257 | goto drop; |
258 | vtf = be32_to_cpu(grh->version_tclass_flow); |
259 | if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) |
260 | goto drop; |
261 | } |
262 | |
263 | /* Get the destination QP number. */ |
264 | qp_num = ib_bth_get_qpn(ohdr: packet->ohdr); |
265 | if (dlid < mlid_base) { |
266 | struct rvt_qp *qp; |
267 | unsigned long flags; |
268 | |
269 | rcu_read_lock(); |
270 | qp = rvt_lookup_qpn(rdi, rvp: &ibp->rvp, qpn: qp_num); |
271 | if (!qp) { |
272 | rcu_read_unlock(); |
273 | goto drop; |
274 | } |
275 | |
276 | /* |
277 | * Handle only RC QPs - for other QP types drop error |
278 | * packet. |
279 | */ |
280 | spin_lock_irqsave(&qp->r_lock, flags); |
281 | |
282 | /* Check for valid receive state. */ |
283 | if (!(ib_rvt_state_ops[qp->state] & |
284 | RVT_PROCESS_RECV_OK)) { |
285 | ibp->rvp.n_pkt_drops++; |
286 | } |
287 | |
288 | switch (qp->ibqp.qp_type) { |
289 | case IB_QPT_RC: |
290 | hfi1_rc_hdrerr(rcd, packet, qp); |
291 | break; |
292 | default: |
293 | /* For now don't handle any other QP types */ |
294 | break; |
295 | } |
296 | |
297 | spin_unlock_irqrestore(lock: &qp->r_lock, flags); |
298 | rcu_read_unlock(); |
299 | } /* Unicast QP */ |
300 | } /* Valid packet with TIDErr */ |
301 | |
302 | /* handle "RcvTypeErr" flags */ |
303 | switch (rte) { |
304 | case RHF_RTE_ERROR_OP_CODE_ERR: |
305 | { |
306 | void *ebuf = NULL; |
307 | u8 opcode; |
308 | |
309 | if (rhf_use_egr_bfr(rhf: packet->rhf)) |
310 | ebuf = packet->ebuf; |
311 | |
312 | if (!ebuf) |
313 | goto drop; /* this should never happen */ |
314 | |
315 | opcode = ib_bth_get_opcode(ohdr: packet->ohdr); |
316 | if (opcode == IB_OPCODE_CNP) { |
317 | /* |
318 | * Only in pre-B0 h/w is the CNP_OPCODE handled |
319 | * via this code path. |
320 | */ |
321 | struct rvt_qp *qp = NULL; |
322 | u32 lqpn, rqpn; |
323 | u16 rlid; |
324 | u8 svc_type, sl, sc5; |
325 | |
326 | sc5 = hfi1_9B_get_sc5(hdr: rhdr, rhf: packet->rhf); |
327 | sl = ibp->sc_to_sl[sc5]; |
328 | |
329 | lqpn = ib_bth_get_qpn(ohdr: packet->ohdr); |
330 | rcu_read_lock(); |
331 | qp = rvt_lookup_qpn(rdi, rvp: &ibp->rvp, qpn: lqpn); |
332 | if (!qp) { |
333 | rcu_read_unlock(); |
334 | goto drop; |
335 | } |
336 | |
337 | switch (qp->ibqp.qp_type) { |
338 | case IB_QPT_UD: |
339 | rlid = 0; |
340 | rqpn = 0; |
341 | svc_type = IB_CC_SVCTYPE_UD; |
342 | break; |
343 | case IB_QPT_UC: |
344 | rlid = ib_get_slid(hdr: rhdr); |
345 | rqpn = qp->remote_qpn; |
346 | svc_type = IB_CC_SVCTYPE_UC; |
347 | break; |
348 | default: |
349 | rcu_read_unlock(); |
350 | goto drop; |
351 | } |
352 | |
353 | process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); |
354 | rcu_read_unlock(); |
355 | } |
356 | |
357 | packet->rhf &= ~RHF_RCV_TYPE_ERR_SMASK; |
358 | break; |
359 | } |
360 | default: |
361 | break; |
362 | } |
363 | |
364 | drop: |
365 | return; |
366 | } |
367 | |
368 | static inline void init_packet(struct hfi1_ctxtdata *rcd, |
369 | struct hfi1_packet *packet) |
370 | { |
371 | packet->rsize = get_hdrqentsize(rcd); /* words */ |
372 | packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */ |
373 | packet->rcd = rcd; |
374 | packet->updegr = 0; |
375 | packet->etail = -1; |
376 | packet->rhf_addr = get_rhf_addr(rcd); |
377 | packet->rhf = rhf_to_cpu(rbuf: packet->rhf_addr); |
378 | packet->rhqoff = hfi1_rcd_head(rcd); |
379 | packet->numpkt = 0; |
380 | } |
381 | |
382 | /* We support only two types - 9B and 16B for now */ |
383 | static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { |
384 | [HFI1_PKT_TYPE_9B] = &return_cnp, |
385 | [HFI1_PKT_TYPE_16B] = &return_cnp_16B |
386 | }; |
387 | |
388 | /** |
389 | * hfi1_process_ecn_slowpath - Process FECN or BECN bits |
390 | * @qp: The packet's destination QP |
391 | * @pkt: The packet itself. |
392 | * @prescan: Is the caller the RXQ prescan |
393 | * |
394 | * Process the packet's FECN or BECN bits. By now, the packet |
395 | * has already been evaluated whether processing of those bit should |
396 | * be done. |
397 | * The significance of the @prescan argument is that if the caller |
398 | * is the RXQ prescan, a CNP will be send out instead of waiting for the |
399 | * normal packet processing to send an ACK with BECN set (or a CNP). |
400 | */ |
401 | bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, |
402 | bool prescan) |
403 | { |
404 | struct hfi1_ibport *ibp = to_iport(ibdev: qp->ibqp.device, port: qp->port_num); |
405 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
406 | struct ib_other_headers *ohdr = pkt->ohdr; |
407 | struct ib_grh *grh = pkt->grh; |
408 | u32 rqpn = 0; |
409 | u16 pkey; |
410 | u32 rlid, slid, dlid = 0; |
411 | u8 hdr_type, sc, svc_type, opcode; |
412 | bool is_mcast = false, ignore_fecn = false, do_cnp = false, |
413 | fecn, becn; |
414 | |
415 | /* can be called from prescan */ |
416 | if (pkt->etype == RHF_RCV_TYPE_BYPASS) { |
417 | pkey = hfi1_16B_get_pkey(hdr: pkt->hdr); |
418 | sc = hfi1_16B_get_sc(hdr: pkt->hdr); |
419 | dlid = hfi1_16B_get_dlid(hdr: pkt->hdr); |
420 | slid = hfi1_16B_get_slid(hdr: pkt->hdr); |
421 | is_mcast = hfi1_is_16B_mcast(lid: dlid); |
422 | opcode = ib_bth_get_opcode(ohdr); |
423 | hdr_type = HFI1_PKT_TYPE_16B; |
424 | fecn = hfi1_16B_get_fecn(hdr: pkt->hdr); |
425 | becn = hfi1_16B_get_becn(hdr: pkt->hdr); |
426 | } else { |
427 | pkey = ib_bth_get_pkey(ohdr); |
428 | sc = hfi1_9B_get_sc5(hdr: pkt->hdr, rhf: pkt->rhf); |
429 | dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(hdr: pkt->hdr) : |
430 | ppd->lid; |
431 | slid = ib_get_slid(hdr: pkt->hdr); |
432 | is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && |
433 | (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); |
434 | opcode = ib_bth_get_opcode(ohdr); |
435 | hdr_type = HFI1_PKT_TYPE_9B; |
436 | fecn = ib_bth_get_fecn(ohdr); |
437 | becn = ib_bth_get_becn(ohdr); |
438 | } |
439 | |
440 | switch (qp->ibqp.qp_type) { |
441 | case IB_QPT_UD: |
442 | rlid = slid; |
443 | rqpn = ib_get_sqpn(ohdr: pkt->ohdr); |
444 | svc_type = IB_CC_SVCTYPE_UD; |
445 | break; |
446 | case IB_QPT_SMI: |
447 | case IB_QPT_GSI: |
448 | rlid = slid; |
449 | rqpn = ib_get_sqpn(ohdr: pkt->ohdr); |
450 | svc_type = IB_CC_SVCTYPE_UD; |
451 | break; |
452 | case IB_QPT_UC: |
453 | rlid = rdma_ah_get_dlid(attr: &qp->remote_ah_attr); |
454 | rqpn = qp->remote_qpn; |
455 | svc_type = IB_CC_SVCTYPE_UC; |
456 | break; |
457 | case IB_QPT_RC: |
458 | rlid = rdma_ah_get_dlid(attr: &qp->remote_ah_attr); |
459 | rqpn = qp->remote_qpn; |
460 | svc_type = IB_CC_SVCTYPE_RC; |
461 | break; |
462 | default: |
463 | return false; |
464 | } |
465 | |
466 | ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) || |
467 | (opcode == IB_OPCODE_RC_ACKNOWLEDGE); |
468 | /* |
469 | * ACKNOWLEDGE packets do not get a CNP but this will be |
470 | * guarded by ignore_fecn above. |
471 | */ |
472 | do_cnp = prescan || |
473 | (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && |
474 | opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) || |
475 | opcode == TID_OP(READ_RESP) || |
476 | opcode == TID_OP(ACK); |
477 | |
478 | /* Call appropriate CNP handler */ |
479 | if (!ignore_fecn && do_cnp && fecn) |
480 | hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, |
481 | dlid, rlid, sc, grh); |
482 | |
483 | if (becn) { |
484 | u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; |
485 | u8 sl = ibp->sc_to_sl[sc]; |
486 | |
487 | process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); |
488 | } |
489 | return !ignore_fecn && fecn; |
490 | } |
491 | |
492 | struct ps_mdata { |
493 | struct hfi1_ctxtdata *rcd; |
494 | u32 rsize; |
495 | u32 maxcnt; |
496 | u32 ps_head; |
497 | u32 ps_tail; |
498 | u32 ps_seq; |
499 | }; |
500 | |
501 | static inline void init_ps_mdata(struct ps_mdata *mdata, |
502 | struct hfi1_packet *packet) |
503 | { |
504 | struct hfi1_ctxtdata *rcd = packet->rcd; |
505 | |
506 | mdata->rcd = rcd; |
507 | mdata->rsize = packet->rsize; |
508 | mdata->maxcnt = packet->maxcnt; |
509 | mdata->ps_head = packet->rhqoff; |
510 | |
511 | if (get_dma_rtail_setting(rcd)) { |
512 | mdata->ps_tail = get_rcvhdrtail(rcd); |
513 | if (rcd->ctxt == HFI1_CTRL_CTXT) |
514 | mdata->ps_seq = hfi1_seq_cnt(rcd); |
515 | else |
516 | mdata->ps_seq = 0; /* not used with DMA_RTAIL */ |
517 | } else { |
518 | mdata->ps_tail = 0; /* used only with DMA_RTAIL*/ |
519 | mdata->ps_seq = hfi1_seq_cnt(rcd); |
520 | } |
521 | } |
522 | |
523 | static inline int ps_done(struct ps_mdata *mdata, u64 rhf, |
524 | struct hfi1_ctxtdata *rcd) |
525 | { |
526 | if (get_dma_rtail_setting(rcd)) |
527 | return mdata->ps_head == mdata->ps_tail; |
528 | return mdata->ps_seq != rhf_rcv_seq(rhf); |
529 | } |
530 | |
531 | static inline int ps_skip(struct ps_mdata *mdata, u64 rhf, |
532 | struct hfi1_ctxtdata *rcd) |
533 | { |
534 | /* |
535 | * Control context can potentially receive an invalid rhf. |
536 | * Drop such packets. |
537 | */ |
538 | if ((rcd->ctxt == HFI1_CTRL_CTXT) && (mdata->ps_head != mdata->ps_tail)) |
539 | return mdata->ps_seq != rhf_rcv_seq(rhf); |
540 | |
541 | return 0; |
542 | } |
543 | |
544 | static inline void update_ps_mdata(struct ps_mdata *mdata, |
545 | struct hfi1_ctxtdata *rcd) |
546 | { |
547 | mdata->ps_head += mdata->rsize; |
548 | if (mdata->ps_head >= mdata->maxcnt) |
549 | mdata->ps_head = 0; |
550 | |
551 | /* Control context must do seq counting */ |
552 | if (!get_dma_rtail_setting(rcd) || |
553 | rcd->ctxt == HFI1_CTRL_CTXT) |
554 | mdata->ps_seq = hfi1_seq_incr_wrap(seq: mdata->ps_seq); |
555 | } |
556 | |
557 | /* |
558 | * prescan_rxq - search through the receive queue looking for packets |
559 | * containing Excplicit Congestion Notifications (FECNs, or BECNs). |
560 | * When an ECN is found, process the Congestion Notification, and toggle |
561 | * it off. |
562 | * This is declared as a macro to allow quick checking of the port to avoid |
563 | * the overhead of a function call if not enabled. |
564 | */ |
565 | #define prescan_rxq(rcd, packet) \ |
566 | do { \ |
567 | if (rcd->ppd->cc_prescan) \ |
568 | __prescan_rxq(packet); \ |
569 | } while (0) |
570 | static void __prescan_rxq(struct hfi1_packet *packet) |
571 | { |
572 | struct hfi1_ctxtdata *rcd = packet->rcd; |
573 | struct ps_mdata mdata; |
574 | |
575 | init_ps_mdata(mdata: &mdata, packet); |
576 | |
577 | while (1) { |
578 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
579 | __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + |
580 | packet->rcd->rhf_offset; |
581 | struct rvt_qp *qp; |
582 | struct ib_header *hdr; |
583 | struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; |
584 | u64 rhf = rhf_to_cpu(rbuf: rhf_addr); |
585 | u32 etype = rhf_rcv_type(rhf), qpn, bth1; |
586 | u8 lnh; |
587 | |
588 | if (ps_done(mdata: &mdata, rhf, rcd)) |
589 | break; |
590 | |
591 | if (ps_skip(mdata: &mdata, rhf, rcd)) |
592 | goto next; |
593 | |
594 | if (etype != RHF_RCV_TYPE_IB) |
595 | goto next; |
596 | |
597 | packet->hdr = hfi1_get_msgheader(rcd: packet->rcd, rhf_addr); |
598 | hdr = packet->hdr; |
599 | lnh = ib_get_lnh(hdr); |
600 | |
601 | if (lnh == HFI1_LRH_BTH) { |
602 | packet->ohdr = &hdr->u.oth; |
603 | packet->grh = NULL; |
604 | } else if (lnh == HFI1_LRH_GRH) { |
605 | packet->ohdr = &hdr->u.l.oth; |
606 | packet->grh = &hdr->u.l.grh; |
607 | } else { |
608 | goto next; /* just in case */ |
609 | } |
610 | |
611 | if (!hfi1_may_ecn(pkt: packet)) |
612 | goto next; |
613 | |
614 | bth1 = be32_to_cpu(packet->ohdr->bth[1]); |
615 | qpn = bth1 & RVT_QPN_MASK; |
616 | rcu_read_lock(); |
617 | qp = rvt_lookup_qpn(rdi, rvp: &ibp->rvp, qpn); |
618 | |
619 | if (!qp) { |
620 | rcu_read_unlock(); |
621 | goto next; |
622 | } |
623 | |
624 | hfi1_process_ecn_slowpath(qp, pkt: packet, prescan: true); |
625 | rcu_read_unlock(); |
626 | |
627 | /* turn off BECN, FECN */ |
628 | bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK); |
629 | packet->ohdr->bth[1] = cpu_to_be32(bth1); |
630 | next: |
631 | update_ps_mdata(mdata: &mdata, rcd); |
632 | } |
633 | } |
634 | |
635 | static void process_rcv_qp_work(struct hfi1_packet *packet) |
636 | { |
637 | struct rvt_qp *qp, *nqp; |
638 | struct hfi1_ctxtdata *rcd = packet->rcd; |
639 | |
640 | /* |
641 | * Iterate over all QPs waiting to respond. |
642 | * The list won't change since the IRQ is only run on one CPU. |
643 | */ |
644 | list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { |
645 | list_del_init(entry: &qp->rspwait); |
646 | if (qp->r_flags & RVT_R_RSP_NAK) { |
647 | qp->r_flags &= ~RVT_R_RSP_NAK; |
648 | packet->qp = qp; |
649 | hfi1_send_rc_ack(packet, is_fecn: 0); |
650 | } |
651 | if (qp->r_flags & RVT_R_RSP_SEND) { |
652 | unsigned long flags; |
653 | |
654 | qp->r_flags &= ~RVT_R_RSP_SEND; |
655 | spin_lock_irqsave(&qp->s_lock, flags); |
656 | if (ib_rvt_state_ops[qp->state] & |
657 | RVT_PROCESS_OR_FLUSH_SEND) |
658 | hfi1_schedule_send(qp); |
659 | spin_unlock_irqrestore(lock: &qp->s_lock, flags); |
660 | } |
661 | rvt_put_qp(qp); |
662 | } |
663 | } |
664 | |
665 | static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread) |
666 | { |
667 | if (thread) { |
668 | if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0) |
669 | /* allow defered processing */ |
670 | process_rcv_qp_work(packet); |
671 | cond_resched(); |
672 | return RCV_PKT_OK; |
673 | } else { |
674 | this_cpu_inc(*packet->rcd->dd->rcv_limit); |
675 | return RCV_PKT_LIMIT; |
676 | } |
677 | } |
678 | |
679 | static inline int check_max_packet(struct hfi1_packet *packet, int thread) |
680 | { |
681 | int ret = RCV_PKT_OK; |
682 | |
683 | if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) |
684 | ret = max_packet_exceeded(packet, thread); |
685 | return ret; |
686 | } |
687 | |
688 | static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) |
689 | { |
690 | int ret; |
691 | |
692 | packet->rcd->dd->ctx0_seq_drop++; |
693 | /* Set up for the next packet */ |
694 | packet->rhqoff += packet->rsize; |
695 | if (packet->rhqoff >= packet->maxcnt) |
696 | packet->rhqoff = 0; |
697 | |
698 | packet->numpkt++; |
699 | ret = check_max_packet(packet, thread); |
700 | |
701 | packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + |
702 | packet->rcd->rhf_offset; |
703 | packet->rhf = rhf_to_cpu(rbuf: packet->rhf_addr); |
704 | |
705 | return ret; |
706 | } |
707 | |
708 | static void process_rcv_packet_napi(struct hfi1_packet *packet) |
709 | { |
710 | packet->etype = rhf_rcv_type(rhf: packet->rhf); |
711 | |
712 | /* total length */ |
713 | packet->tlen = rhf_pkt_len(rhf: packet->rhf); /* in bytes */ |
714 | /* retrieve eager buffer details */ |
715 | packet->etail = rhf_egr_index(rhf: packet->rhf); |
716 | packet->ebuf = get_egrbuf(rcd: packet->rcd, rhf: packet->rhf, |
717 | update: &packet->updegr); |
718 | /* |
719 | * Prefetch the contents of the eager buffer. It is |
720 | * OK to send a negative length to prefetch_range(). |
721 | * The +2 is the size of the RHF. |
722 | */ |
723 | prefetch_range(addr: packet->ebuf, |
724 | len: packet->tlen - ((packet->rcd->rcvhdrqentsize - |
725 | (rhf_hdrq_offset(rhf: packet->rhf) |
726 | + 2)) * 4)); |
727 | |
728 | packet->rcd->rhf_rcv_function_map[packet->etype](packet); |
729 | packet->numpkt++; |
730 | |
731 | /* Set up for the next packet */ |
732 | packet->rhqoff += packet->rsize; |
733 | if (packet->rhqoff >= packet->maxcnt) |
734 | packet->rhqoff = 0; |
735 | |
736 | packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + |
737 | packet->rcd->rhf_offset; |
738 | packet->rhf = rhf_to_cpu(rbuf: packet->rhf_addr); |
739 | } |
740 | |
741 | static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) |
742 | { |
743 | int ret; |
744 | |
745 | packet->etype = rhf_rcv_type(rhf: packet->rhf); |
746 | |
747 | /* total length */ |
748 | packet->tlen = rhf_pkt_len(rhf: packet->rhf); /* in bytes */ |
749 | /* retrieve eager buffer details */ |
750 | packet->ebuf = NULL; |
751 | if (rhf_use_egr_bfr(rhf: packet->rhf)) { |
752 | packet->etail = rhf_egr_index(rhf: packet->rhf); |
753 | packet->ebuf = get_egrbuf(rcd: packet->rcd, rhf: packet->rhf, |
754 | update: &packet->updegr); |
755 | /* |
756 | * Prefetch the contents of the eager buffer. It is |
757 | * OK to send a negative length to prefetch_range(). |
758 | * The +2 is the size of the RHF. |
759 | */ |
760 | prefetch_range(addr: packet->ebuf, |
761 | len: packet->tlen - ((get_hdrqentsize(rcd: packet->rcd) - |
762 | (rhf_hdrq_offset(rhf: packet->rhf) |
763 | + 2)) * 4)); |
764 | } |
765 | |
766 | /* |
767 | * Call a type specific handler for the packet. We |
768 | * should be able to trust that etype won't be beyond |
769 | * the range of valid indexes. If so something is really |
770 | * wrong and we can probably just let things come |
771 | * crashing down. There is no need to eat another |
772 | * comparison in this performance critical code. |
773 | */ |
774 | packet->rcd->rhf_rcv_function_map[packet->etype](packet); |
775 | packet->numpkt++; |
776 | |
777 | /* Set up for the next packet */ |
778 | packet->rhqoff += packet->rsize; |
779 | if (packet->rhqoff >= packet->maxcnt) |
780 | packet->rhqoff = 0; |
781 | |
782 | ret = check_max_packet(packet, thread); |
783 | |
784 | packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + |
785 | packet->rcd->rhf_offset; |
786 | packet->rhf = rhf_to_cpu(rbuf: packet->rhf_addr); |
787 | |
788 | return ret; |
789 | } |
790 | |
791 | static inline void process_rcv_update(int last, struct hfi1_packet *packet) |
792 | { |
793 | /* |
794 | * Update head regs etc., every 16 packets, if not last pkt, |
795 | * to help prevent rcvhdrq overflows, when many packets |
796 | * are processed and queue is nearly full. |
797 | * Don't request an interrupt for intermediate updates. |
798 | */ |
799 | if (!last && !(packet->numpkt & 0xf)) { |
800 | update_usrhead(rcd: packet->rcd, hd: packet->rhqoff, updegr: packet->updegr, |
801 | egrhd: packet->etail, intr_adjust: 0, npkts: 0); |
802 | packet->updegr = 0; |
803 | } |
804 | packet->grh = NULL; |
805 | } |
806 | |
807 | static inline void finish_packet(struct hfi1_packet *packet) |
808 | { |
809 | /* |
810 | * Nothing we need to free for the packet. |
811 | * |
812 | * The only thing we need to do is a final update and call for an |
813 | * interrupt |
814 | */ |
815 | update_usrhead(rcd: packet->rcd, hd: hfi1_rcd_head(rcd: packet->rcd), updegr: packet->updegr, |
816 | egrhd: packet->etail, intr_adjust: rcv_intr_dynamic, npkts: packet->numpkt); |
817 | } |
818 | |
819 | /* |
820 | * handle_receive_interrupt_napi_fp - receive a packet |
821 | * @rcd: the context |
822 | * @budget: polling budget |
823 | * |
824 | * Called from interrupt handler for receive interrupt. |
825 | * This is the fast path interrupt handler |
826 | * when executing napi soft irq environment. |
827 | */ |
828 | int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget) |
829 | { |
830 | struct hfi1_packet packet; |
831 | |
832 | init_packet(rcd, packet: &packet); |
833 | if (last_rcv_seq(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
834 | goto bail; |
835 | |
836 | while (packet.numpkt < budget) { |
837 | process_rcv_packet_napi(packet: &packet); |
838 | if (hfi1_seq_incr(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
839 | break; |
840 | |
841 | process_rcv_update(last: 0, packet: &packet); |
842 | } |
843 | hfi1_set_rcd_head(rcd, head: packet.rhqoff); |
844 | bail: |
845 | finish_packet(packet: &packet); |
846 | return packet.numpkt; |
847 | } |
848 | |
849 | /* |
850 | * Handle receive interrupts when using the no dma rtail option. |
851 | */ |
852 | int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) |
853 | { |
854 | int last = RCV_PKT_OK; |
855 | struct hfi1_packet packet; |
856 | |
857 | init_packet(rcd, packet: &packet); |
858 | if (last_rcv_seq(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) { |
859 | last = RCV_PKT_DONE; |
860 | goto bail; |
861 | } |
862 | |
863 | prescan_rxq(rcd, &packet); |
864 | |
865 | while (last == RCV_PKT_OK) { |
866 | last = process_rcv_packet(packet: &packet, thread); |
867 | if (hfi1_seq_incr(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
868 | last = RCV_PKT_DONE; |
869 | process_rcv_update(last, packet: &packet); |
870 | } |
871 | process_rcv_qp_work(packet: &packet); |
872 | hfi1_set_rcd_head(rcd, head: packet.rhqoff); |
873 | bail: |
874 | finish_packet(packet: &packet); |
875 | return last; |
876 | } |
877 | |
878 | int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) |
879 | { |
880 | u32 hdrqtail; |
881 | int last = RCV_PKT_OK; |
882 | struct hfi1_packet packet; |
883 | |
884 | init_packet(rcd, packet: &packet); |
885 | hdrqtail = get_rcvhdrtail(rcd); |
886 | if (packet.rhqoff == hdrqtail) { |
887 | last = RCV_PKT_DONE; |
888 | goto bail; |
889 | } |
890 | smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ |
891 | |
892 | prescan_rxq(rcd, &packet); |
893 | |
894 | while (last == RCV_PKT_OK) { |
895 | last = process_rcv_packet(packet: &packet, thread); |
896 | if (packet.rhqoff == hdrqtail) |
897 | last = RCV_PKT_DONE; |
898 | process_rcv_update(last, packet: &packet); |
899 | } |
900 | process_rcv_qp_work(packet: &packet); |
901 | hfi1_set_rcd_head(rcd, head: packet.rhqoff); |
902 | bail: |
903 | finish_packet(packet: &packet); |
904 | return last; |
905 | } |
906 | |
907 | static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) |
908 | { |
909 | u16 i; |
910 | |
911 | /* |
912 | * For dynamically allocated kernel contexts (like vnic) switch |
913 | * interrupt handler only for that context. Otherwise, switch |
914 | * interrupt handler for all statically allocated kernel contexts. |
915 | */ |
916 | if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) { |
917 | hfi1_rcd_get(rcd); |
918 | hfi1_set_fast(rcd); |
919 | hfi1_rcd_put(rcd); |
920 | return; |
921 | } |
922 | |
923 | for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { |
924 | rcd = hfi1_rcd_get_by_index(dd, ctxt: i); |
925 | if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)) |
926 | hfi1_set_fast(rcd); |
927 | hfi1_rcd_put(rcd); |
928 | } |
929 | } |
930 | |
931 | void set_all_slowpath(struct hfi1_devdata *dd) |
932 | { |
933 | struct hfi1_ctxtdata *rcd; |
934 | u16 i; |
935 | |
936 | /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ |
937 | for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { |
938 | rcd = hfi1_rcd_get_by_index(dd, ctxt: i); |
939 | if (!rcd) |
940 | continue; |
941 | if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic) |
942 | rcd->do_interrupt = rcd->slow_handler; |
943 | |
944 | hfi1_rcd_put(rcd); |
945 | } |
946 | } |
947 | |
948 | static bool __set_armed_to_active(struct hfi1_packet *packet) |
949 | { |
950 | u8 etype = rhf_rcv_type(rhf: packet->rhf); |
951 | u8 sc = SC15_PACKET; |
952 | |
953 | if (etype == RHF_RCV_TYPE_IB) { |
954 | struct ib_header *hdr = hfi1_get_msgheader(rcd: packet->rcd, |
955 | rhf_addr: packet->rhf_addr); |
956 | sc = hfi1_9B_get_sc5(hdr, rhf: packet->rhf); |
957 | } else if (etype == RHF_RCV_TYPE_BYPASS) { |
958 | struct hfi1_16b_header *hdr = hfi1_get_16B_header( |
959 | rcd: packet->rcd, |
960 | rhf_addr: packet->rhf_addr); |
961 | sc = hfi1_16B_get_sc(hdr); |
962 | } |
963 | if (sc != SC15_PACKET) { |
964 | int hwstate = driver_lstate(ppd: packet->rcd->ppd); |
965 | struct work_struct *lsaw = |
966 | &packet->rcd->ppd->linkstate_active_work; |
967 | |
968 | if (hwstate != IB_PORT_ACTIVE) { |
969 | dd_dev_info(packet->rcd->dd, |
970 | "Unexpected link state %s\n" , |
971 | opa_lstate_name(hwstate)); |
972 | return false; |
973 | } |
974 | |
975 | queue_work(wq: packet->rcd->ppd->link_wq, work: lsaw); |
976 | return true; |
977 | } |
978 | return false; |
979 | } |
980 | |
981 | /** |
982 | * set_armed_to_active - the fast path for armed to active |
983 | * @packet: the packet structure |
984 | * |
985 | * Return true if packet processing needs to bail. |
986 | */ |
987 | static bool set_armed_to_active(struct hfi1_packet *packet) |
988 | { |
989 | if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED)) |
990 | return false; |
991 | return __set_armed_to_active(packet); |
992 | } |
993 | |
994 | /* |
995 | * handle_receive_interrupt - receive a packet |
996 | * @rcd: the context |
997 | * |
998 | * Called from interrupt handler for errors or receive interrupt. |
999 | * This is the slow path interrupt handler. |
1000 | */ |
1001 | int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) |
1002 | { |
1003 | struct hfi1_devdata *dd = rcd->dd; |
1004 | u32 hdrqtail; |
1005 | int needset, last = RCV_PKT_OK; |
1006 | struct hfi1_packet packet; |
1007 | int skip_pkt = 0; |
1008 | |
1009 | if (!rcd->rcvhdrq) |
1010 | return RCV_PKT_OK; |
1011 | /* Control context will always use the slow path interrupt handler */ |
1012 | needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1; |
1013 | |
1014 | init_packet(rcd, packet: &packet); |
1015 | |
1016 | if (!get_dma_rtail_setting(rcd)) { |
1017 | if (last_rcv_seq(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) { |
1018 | last = RCV_PKT_DONE; |
1019 | goto bail; |
1020 | } |
1021 | hdrqtail = 0; |
1022 | } else { |
1023 | hdrqtail = get_rcvhdrtail(rcd); |
1024 | if (packet.rhqoff == hdrqtail) { |
1025 | last = RCV_PKT_DONE; |
1026 | goto bail; |
1027 | } |
1028 | smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ |
1029 | |
1030 | /* |
1031 | * Control context can potentially receive an invalid |
1032 | * rhf. Drop such packets. |
1033 | */ |
1034 | if (rcd->ctxt == HFI1_CTRL_CTXT) |
1035 | if (last_rcv_seq(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
1036 | skip_pkt = 1; |
1037 | } |
1038 | |
1039 | prescan_rxq(rcd, &packet); |
1040 | |
1041 | while (last == RCV_PKT_OK) { |
1042 | if (hfi1_need_drop(dd)) { |
1043 | /* On to the next packet */ |
1044 | packet.rhqoff += packet.rsize; |
1045 | packet.rhf_addr = (__le32 *)rcd->rcvhdrq + |
1046 | packet.rhqoff + |
1047 | rcd->rhf_offset; |
1048 | packet.rhf = rhf_to_cpu(rbuf: packet.rhf_addr); |
1049 | |
1050 | } else if (skip_pkt) { |
1051 | last = skip_rcv_packet(packet: &packet, thread); |
1052 | skip_pkt = 0; |
1053 | } else { |
1054 | if (set_armed_to_active(&packet)) |
1055 | goto bail; |
1056 | last = process_rcv_packet(packet: &packet, thread); |
1057 | } |
1058 | |
1059 | if (!get_dma_rtail_setting(rcd)) { |
1060 | if (hfi1_seq_incr(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
1061 | last = RCV_PKT_DONE; |
1062 | } else { |
1063 | if (packet.rhqoff == hdrqtail) |
1064 | last = RCV_PKT_DONE; |
1065 | /* |
1066 | * Control context can potentially receive an invalid |
1067 | * rhf. Drop such packets. |
1068 | */ |
1069 | if (rcd->ctxt == HFI1_CTRL_CTXT) { |
1070 | bool lseq; |
1071 | |
1072 | lseq = hfi1_seq_incr(rcd, |
1073 | seq: rhf_rcv_seq(rhf: packet.rhf)); |
1074 | if (!last && lseq) |
1075 | skip_pkt = 1; |
1076 | } |
1077 | } |
1078 | |
1079 | if (needset) { |
1080 | needset = false; |
1081 | set_all_fastpath(dd, rcd); |
1082 | } |
1083 | process_rcv_update(last, packet: &packet); |
1084 | } |
1085 | |
1086 | process_rcv_qp_work(packet: &packet); |
1087 | hfi1_set_rcd_head(rcd, head: packet.rhqoff); |
1088 | |
1089 | bail: |
1090 | /* |
1091 | * Always write head at end, and setup rcv interrupt, even |
1092 | * if no packets were processed. |
1093 | */ |
1094 | finish_packet(packet: &packet); |
1095 | return last; |
1096 | } |
1097 | |
1098 | /* |
1099 | * handle_receive_interrupt_napi_sp - receive a packet |
1100 | * @rcd: the context |
1101 | * @budget: polling budget |
1102 | * |
1103 | * Called from interrupt handler for errors or receive interrupt. |
1104 | * This is the slow path interrupt handler |
1105 | * when executing napi soft irq environment. |
1106 | */ |
1107 | int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget) |
1108 | { |
1109 | struct hfi1_devdata *dd = rcd->dd; |
1110 | int last = RCV_PKT_OK; |
1111 | bool needset = true; |
1112 | struct hfi1_packet packet; |
1113 | |
1114 | init_packet(rcd, packet: &packet); |
1115 | if (last_rcv_seq(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
1116 | goto bail; |
1117 | |
1118 | while (last != RCV_PKT_DONE && packet.numpkt < budget) { |
1119 | if (hfi1_need_drop(dd)) { |
1120 | /* On to the next packet */ |
1121 | packet.rhqoff += packet.rsize; |
1122 | packet.rhf_addr = (__le32 *)rcd->rcvhdrq + |
1123 | packet.rhqoff + |
1124 | rcd->rhf_offset; |
1125 | packet.rhf = rhf_to_cpu(rbuf: packet.rhf_addr); |
1126 | |
1127 | } else { |
1128 | if (set_armed_to_active(&packet)) |
1129 | goto bail; |
1130 | process_rcv_packet_napi(packet: &packet); |
1131 | } |
1132 | |
1133 | if (hfi1_seq_incr(rcd, seq: rhf_rcv_seq(rhf: packet.rhf))) |
1134 | last = RCV_PKT_DONE; |
1135 | |
1136 | if (needset) { |
1137 | needset = false; |
1138 | set_all_fastpath(dd, rcd); |
1139 | } |
1140 | |
1141 | process_rcv_update(last, packet: &packet); |
1142 | } |
1143 | |
1144 | hfi1_set_rcd_head(rcd, head: packet.rhqoff); |
1145 | |
1146 | bail: |
1147 | /* |
1148 | * Always write head at end, and setup rcv interrupt, even |
1149 | * if no packets were processed. |
1150 | */ |
1151 | finish_packet(packet: &packet); |
1152 | return packet.numpkt; |
1153 | } |
1154 | |
1155 | /* |
1156 | * We may discover in the interrupt that the hardware link state has |
1157 | * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet), |
1158 | * and we need to update the driver's notion of the link state. We cannot |
1159 | * run set_link_state from interrupt context, so we queue this function on |
1160 | * a workqueue. |
1161 | * |
1162 | * We delay the regular interrupt processing until after the state changes |
1163 | * so that the link will be in the correct state by the time any application |
1164 | * we wake up attempts to send a reply to any message it received. |
1165 | * (Subsequent receive interrupts may possibly force the wakeup before we |
1166 | * update the link state.) |
1167 | * |
1168 | * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes |
1169 | * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues, |
1170 | * so we're safe from use-after-free of the rcd. |
1171 | */ |
1172 | void receive_interrupt_work(struct work_struct *work) |
1173 | { |
1174 | struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, |
1175 | linkstate_active_work); |
1176 | struct hfi1_devdata *dd = ppd->dd; |
1177 | struct hfi1_ctxtdata *rcd; |
1178 | u16 i; |
1179 | |
1180 | /* Received non-SC15 packet implies neighbor_normal */ |
1181 | ppd->neighbor_normal = 1; |
1182 | set_link_state(ppd, HLS_UP_ACTIVE); |
1183 | |
1184 | /* |
1185 | * Interrupt all statically allocated kernel contexts that could |
1186 | * have had an interrupt during auto activation. |
1187 | */ |
1188 | for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) { |
1189 | rcd = hfi1_rcd_get_by_index(dd, ctxt: i); |
1190 | if (rcd) |
1191 | force_recv_intr(rcd); |
1192 | hfi1_rcd_put(rcd); |
1193 | } |
1194 | } |
1195 | |
1196 | /* |
1197 | * Convert a given MTU size to the on-wire MAD packet enumeration. |
1198 | * Return -1 if the size is invalid. |
1199 | */ |
1200 | int mtu_to_enum(u32 mtu, int default_if_bad) |
1201 | { |
1202 | switch (mtu) { |
1203 | case 0: return OPA_MTU_0; |
1204 | case 256: return OPA_MTU_256; |
1205 | case 512: return OPA_MTU_512; |
1206 | case 1024: return OPA_MTU_1024; |
1207 | case 2048: return OPA_MTU_2048; |
1208 | case 4096: return OPA_MTU_4096; |
1209 | case 8192: return OPA_MTU_8192; |
1210 | case 10240: return OPA_MTU_10240; |
1211 | } |
1212 | return default_if_bad; |
1213 | } |
1214 | |
1215 | u16 enum_to_mtu(int mtu) |
1216 | { |
1217 | switch (mtu) { |
1218 | case OPA_MTU_0: return 0; |
1219 | case OPA_MTU_256: return 256; |
1220 | case OPA_MTU_512: return 512; |
1221 | case OPA_MTU_1024: return 1024; |
1222 | case OPA_MTU_2048: return 2048; |
1223 | case OPA_MTU_4096: return 4096; |
1224 | case OPA_MTU_8192: return 8192; |
1225 | case OPA_MTU_10240: return 10240; |
1226 | default: return 0xffff; |
1227 | } |
1228 | } |
1229 | |
1230 | /* |
1231 | * set_mtu - set the MTU |
1232 | * @ppd: the per port data |
1233 | * |
1234 | * We can handle "any" incoming size, the issue here is whether we |
1235 | * need to restrict our outgoing size. We do not deal with what happens |
1236 | * to programs that are already running when the size changes. |
1237 | */ |
1238 | int set_mtu(struct hfi1_pportdata *ppd) |
1239 | { |
1240 | struct hfi1_devdata *dd = ppd->dd; |
1241 | int i, drain, ret = 0, is_up = 0; |
1242 | |
1243 | ppd->ibmtu = 0; |
1244 | for (i = 0; i < ppd->vls_supported; i++) |
1245 | if (ppd->ibmtu < dd->vld[i].mtu) |
1246 | ppd->ibmtu = dd->vld[i].mtu; |
1247 | ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(dd: ppd->dd); |
1248 | |
1249 | mutex_lock(&ppd->hls_lock); |
1250 | if (ppd->host_link_state == HLS_UP_INIT || |
1251 | ppd->host_link_state == HLS_UP_ARMED || |
1252 | ppd->host_link_state == HLS_UP_ACTIVE) |
1253 | is_up = 1; |
1254 | |
1255 | drain = !is_ax(dd) && is_up; |
1256 | |
1257 | if (drain) |
1258 | /* |
1259 | * MTU is specified per-VL. To ensure that no packet gets |
1260 | * stuck (due, e.g., to the MTU for the packet's VL being |
1261 | * reduced), empty the per-VL FIFOs before adjusting MTU. |
1262 | */ |
1263 | ret = stop_drain_data_vls(dd); |
1264 | |
1265 | if (ret) { |
1266 | dd_dev_err(dd, "%s: cannot stop/drain VLs - refusing to change per-VL MTUs\n" , |
1267 | __func__); |
1268 | goto err; |
1269 | } |
1270 | |
1271 | hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_MTU, val: 0); |
1272 | |
1273 | if (drain) |
1274 | open_fill_data_vls(dd); /* reopen all VLs */ |
1275 | |
1276 | err: |
1277 | mutex_unlock(lock: &ppd->hls_lock); |
1278 | |
1279 | return ret; |
1280 | } |
1281 | |
1282 | int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc) |
1283 | { |
1284 | struct hfi1_devdata *dd = ppd->dd; |
1285 | |
1286 | ppd->lid = lid; |
1287 | ppd->lmc = lmc; |
1288 | hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, val: 0); |
1289 | |
1290 | dd_dev_info(dd, "port %u: got a lid: 0x%x\n" , ppd->port, lid); |
1291 | |
1292 | return 0; |
1293 | } |
1294 | |
1295 | void shutdown_led_override(struct hfi1_pportdata *ppd) |
1296 | { |
1297 | struct hfi1_devdata *dd = ppd->dd; |
1298 | |
1299 | /* |
1300 | * This pairs with the memory barrier in hfi1_start_led_override to |
1301 | * ensure that we read the correct state of LED beaconing represented |
1302 | * by led_override_timer_active |
1303 | */ |
1304 | smp_rmb(); |
1305 | if (atomic_read(v: &ppd->led_override_timer_active)) { |
1306 | del_timer_sync(timer: &ppd->led_override_timer); |
1307 | atomic_set(v: &ppd->led_override_timer_active, i: 0); |
1308 | /* Ensure the atomic_set is visible to all CPUs */ |
1309 | smp_wmb(); |
1310 | } |
1311 | |
1312 | /* Hand control of the LED to the DC for normal operation */ |
1313 | write_csr(dd, DCC_CFG_LED_CNTRL, value: 0); |
1314 | } |
1315 | |
1316 | static void run_led_override(struct timer_list *t) |
1317 | { |
1318 | struct hfi1_pportdata *ppd = from_timer(ppd, t, led_override_timer); |
1319 | struct hfi1_devdata *dd = ppd->dd; |
1320 | unsigned long timeout; |
1321 | int phase_idx; |
1322 | |
1323 | if (!(dd->flags & HFI1_INITTED)) |
1324 | return; |
1325 | |
1326 | phase_idx = ppd->led_override_phase & 1; |
1327 | |
1328 | setextled(dd, on: phase_idx); |
1329 | |
1330 | timeout = ppd->led_override_vals[phase_idx]; |
1331 | |
1332 | /* Set up for next phase */ |
1333 | ppd->led_override_phase = !ppd->led_override_phase; |
1334 | |
1335 | mod_timer(timer: &ppd->led_override_timer, expires: jiffies + timeout); |
1336 | } |
1337 | |
1338 | /* |
1339 | * To have the LED blink in a particular pattern, provide timeon and timeoff |
1340 | * in milliseconds. |
1341 | * To turn off custom blinking and return to normal operation, use |
1342 | * shutdown_led_override() |
1343 | */ |
1344 | void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, |
1345 | unsigned int timeoff) |
1346 | { |
1347 | if (!(ppd->dd->flags & HFI1_INITTED)) |
1348 | return; |
1349 | |
1350 | /* Convert to jiffies for direct use in timer */ |
1351 | ppd->led_override_vals[0] = msecs_to_jiffies(m: timeoff); |
1352 | ppd->led_override_vals[1] = msecs_to_jiffies(m: timeon); |
1353 | |
1354 | /* Arbitrarily start from LED on phase */ |
1355 | ppd->led_override_phase = 1; |
1356 | |
1357 | /* |
1358 | * If the timer has not already been started, do so. Use a "quick" |
1359 | * timeout so the handler will be called soon to look at our request. |
1360 | */ |
1361 | if (!timer_pending(timer: &ppd->led_override_timer)) { |
1362 | timer_setup(&ppd->led_override_timer, run_led_override, 0); |
1363 | ppd->led_override_timer.expires = jiffies + 1; |
1364 | add_timer(timer: &ppd->led_override_timer); |
1365 | atomic_set(v: &ppd->led_override_timer_active, i: 1); |
1366 | /* Ensure the atomic_set is visible to all CPUs */ |
1367 | smp_wmb(); |
1368 | } |
1369 | } |
1370 | |
1371 | /** |
1372 | * hfi1_reset_device - reset the chip if possible |
1373 | * @unit: the device to reset |
1374 | * |
1375 | * Whether or not reset is successful, we attempt to re-initialize the chip |
1376 | * (that is, much like a driver unload/reload). We clear the INITTED flag |
1377 | * so that the various entry points will fail until we reinitialize. For |
1378 | * now, we only allow this if no user contexts are open that use chip resources |
1379 | */ |
1380 | int hfi1_reset_device(int unit) |
1381 | { |
1382 | int ret; |
1383 | struct hfi1_devdata *dd = hfi1_lookup(unit); |
1384 | struct hfi1_pportdata *ppd; |
1385 | int pidx; |
1386 | |
1387 | if (!dd) { |
1388 | ret = -ENODEV; |
1389 | goto bail; |
1390 | } |
1391 | |
1392 | dd_dev_info(dd, "Reset on unit %u requested\n" , unit); |
1393 | |
1394 | if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) { |
1395 | dd_dev_info(dd, |
1396 | "Invalid unit number %u or not initialized or not present\n" , |
1397 | unit); |
1398 | ret = -ENXIO; |
1399 | goto bail; |
1400 | } |
1401 | |
1402 | /* If there are any user/vnic contexts, we cannot reset */ |
1403 | mutex_lock(&hfi1_mutex); |
1404 | if (dd->rcd) |
1405 | if (hfi1_stats.sps_ctxts) { |
1406 | mutex_unlock(lock: &hfi1_mutex); |
1407 | ret = -EBUSY; |
1408 | goto bail; |
1409 | } |
1410 | mutex_unlock(lock: &hfi1_mutex); |
1411 | |
1412 | for (pidx = 0; pidx < dd->num_pports; ++pidx) { |
1413 | ppd = dd->pport + pidx; |
1414 | |
1415 | shutdown_led_override(ppd); |
1416 | } |
1417 | if (dd->flags & HFI1_HAS_SEND_DMA) |
1418 | sdma_exit(dd); |
1419 | |
1420 | hfi1_reset_cpu_counters(dd); |
1421 | |
1422 | ret = hfi1_init(dd, reinit: 1); |
1423 | |
1424 | if (ret) |
1425 | dd_dev_err(dd, |
1426 | "Reinitialize unit %u after reset failed with %d\n" , |
1427 | unit, ret); |
1428 | else |
1429 | dd_dev_info(dd, "Reinitialized unit %u after resetting\n" , |
1430 | unit); |
1431 | |
1432 | bail: |
1433 | return ret; |
1434 | } |
1435 | |
1436 | static inline void (struct hfi1_packet *packet) |
1437 | { |
1438 | packet->hdr = (struct hfi1_ib_message_header *) |
1439 | hfi1_get_msgheader(rcd: packet->rcd, |
1440 | rhf_addr: packet->rhf_addr); |
1441 | packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; |
1442 | } |
1443 | |
1444 | static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) |
1445 | { |
1446 | struct hfi1_pportdata *ppd = packet->rcd->ppd; |
1447 | |
1448 | /* slid and dlid cannot be 0 */ |
1449 | if ((!packet->slid) || (!packet->dlid)) |
1450 | return -EINVAL; |
1451 | |
1452 | /* Compare port lid with incoming packet dlid */ |
1453 | if ((!(hfi1_is_16B_mcast(lid: packet->dlid))) && |
1454 | (packet->dlid != |
1455 | opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { |
1456 | if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid) |
1457 | return -EINVAL; |
1458 | } |
1459 | |
1460 | /* No multicast packets with SC15 */ |
1461 | if ((hfi1_is_16B_mcast(lid: packet->dlid)) && (packet->sc == 0xF)) |
1462 | return -EINVAL; |
1463 | |
1464 | /* Packets with permissive DLID always on SC15 */ |
1465 | if ((packet->dlid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), |
1466 | 16B)) && |
1467 | (packet->sc != 0xF)) |
1468 | return -EINVAL; |
1469 | |
1470 | return 0; |
1471 | } |
1472 | |
1473 | static int hfi1_setup_9B_packet(struct hfi1_packet *packet) |
1474 | { |
1475 | struct hfi1_ibport *ibp = rcd_to_iport(rcd: packet->rcd); |
1476 | struct ib_header *hdr; |
1477 | u8 lnh; |
1478 | |
1479 | hfi1_setup_ib_header(packet); |
1480 | hdr = packet->hdr; |
1481 | |
1482 | lnh = ib_get_lnh(hdr); |
1483 | if (lnh == HFI1_LRH_BTH) { |
1484 | packet->ohdr = &hdr->u.oth; |
1485 | packet->grh = NULL; |
1486 | } else if (lnh == HFI1_LRH_GRH) { |
1487 | u32 vtf; |
1488 | |
1489 | packet->ohdr = &hdr->u.l.oth; |
1490 | packet->grh = &hdr->u.l.grh; |
1491 | if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) |
1492 | goto drop; |
1493 | vtf = be32_to_cpu(packet->grh->version_tclass_flow); |
1494 | if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) |
1495 | goto drop; |
1496 | } else { |
1497 | goto drop; |
1498 | } |
1499 | |
1500 | /* Query commonly used fields from packet header */ |
1501 | packet->payload = packet->ebuf; |
1502 | packet->opcode = ib_bth_get_opcode(ohdr: packet->ohdr); |
1503 | packet->slid = ib_get_slid(hdr); |
1504 | packet->dlid = ib_get_dlid(hdr); |
1505 | if (unlikely((packet->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && |
1506 | (packet->dlid != be16_to_cpu(IB_LID_PERMISSIVE)))) |
1507 | packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - |
1508 | be16_to_cpu(IB_MULTICAST_LID_BASE); |
1509 | packet->sl = ib_get_sl(hdr); |
1510 | packet->sc = hfi1_9B_get_sc5(hdr, rhf: packet->rhf); |
1511 | packet->pad = ib_bth_get_pad(ohdr: packet->ohdr); |
1512 | packet->extra_byte = 0; |
1513 | packet->pkey = ib_bth_get_pkey(ohdr: packet->ohdr); |
1514 | packet->migrated = ib_bth_is_migration(ohdr: packet->ohdr); |
1515 | |
1516 | return 0; |
1517 | drop: |
1518 | ibp->rvp.n_pkt_drops++; |
1519 | return -EINVAL; |
1520 | } |
1521 | |
1522 | static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) |
1523 | { |
1524 | /* |
1525 | * Bypass packets have a different header/payload split |
1526 | * compared to an IB packet. |
1527 | * Current split is set such that 16 bytes of the actual |
1528 | * header is in the header buffer and the remining is in |
1529 | * the eager buffer. We chose 16 since hfi1 driver only |
1530 | * supports 16B bypass packets and we will be able to |
1531 | * receive the entire LRH with such a split. |
1532 | */ |
1533 | |
1534 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1535 | struct hfi1_pportdata *ppd = rcd->ppd; |
1536 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
1537 | u8 l4; |
1538 | |
1539 | packet->hdr = (struct hfi1_16b_header *) |
1540 | hfi1_get_16B_header(rcd: packet->rcd, |
1541 | rhf_addr: packet->rhf_addr); |
1542 | l4 = hfi1_16B_get_l4(hdr: packet->hdr); |
1543 | if (l4 == OPA_16B_L4_IB_LOCAL) { |
1544 | packet->ohdr = packet->ebuf; |
1545 | packet->grh = NULL; |
1546 | packet->opcode = ib_bth_get_opcode(ohdr: packet->ohdr); |
1547 | packet->pad = hfi1_16B_bth_get_pad(ohdr: packet->ohdr); |
1548 | /* hdr_len_by_opcode already has an IB LRH factored in */ |
1549 | packet->hlen = hdr_len_by_opcode[packet->opcode] + |
1550 | (LRH_16B_BYTES - LRH_9B_BYTES); |
1551 | packet->migrated = opa_bth_is_migration(ohdr: packet->ohdr); |
1552 | } else if (l4 == OPA_16B_L4_IB_GLOBAL) { |
1553 | u32 vtf; |
1554 | u8 grh_len = sizeof(struct ib_grh); |
1555 | |
1556 | packet->ohdr = packet->ebuf + grh_len; |
1557 | packet->grh = packet->ebuf; |
1558 | packet->opcode = ib_bth_get_opcode(ohdr: packet->ohdr); |
1559 | packet->pad = hfi1_16B_bth_get_pad(ohdr: packet->ohdr); |
1560 | /* hdr_len_by_opcode already has an IB LRH factored in */ |
1561 | packet->hlen = hdr_len_by_opcode[packet->opcode] + |
1562 | (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; |
1563 | packet->migrated = opa_bth_is_migration(ohdr: packet->ohdr); |
1564 | |
1565 | if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) |
1566 | goto drop; |
1567 | vtf = be32_to_cpu(packet->grh->version_tclass_flow); |
1568 | if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) |
1569 | goto drop; |
1570 | } else if (l4 == OPA_16B_L4_FM) { |
1571 | packet->mgmt = packet->ebuf; |
1572 | packet->ohdr = NULL; |
1573 | packet->grh = NULL; |
1574 | packet->opcode = IB_OPCODE_UD_SEND_ONLY; |
1575 | packet->pad = OPA_16B_L4_FM_PAD; |
1576 | packet->hlen = OPA_16B_L4_FM_HLEN; |
1577 | packet->migrated = false; |
1578 | } else { |
1579 | goto drop; |
1580 | } |
1581 | |
1582 | /* Query commonly used fields from packet header */ |
1583 | packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; |
1584 | packet->slid = hfi1_16B_get_slid(hdr: packet->hdr); |
1585 | packet->dlid = hfi1_16B_get_dlid(hdr: packet->hdr); |
1586 | if (unlikely(hfi1_is_16B_mcast(packet->dlid))) |
1587 | packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - |
1588 | opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), |
1589 | 16B); |
1590 | packet->sc = hfi1_16B_get_sc(hdr: packet->hdr); |
1591 | packet->sl = ibp->sc_to_sl[packet->sc]; |
1592 | packet->extra_byte = SIZE_OF_LT; |
1593 | packet->pkey = hfi1_16B_get_pkey(hdr: packet->hdr); |
1594 | |
1595 | if (hfi1_bypass_ingress_pkt_check(packet)) |
1596 | goto drop; |
1597 | |
1598 | return 0; |
1599 | drop: |
1600 | hfi1_cdbg(PKT, "%s: packet dropped" , __func__); |
1601 | ibp->rvp.n_pkt_drops++; |
1602 | return -EINVAL; |
1603 | } |
1604 | |
1605 | static void show_eflags_errs(struct hfi1_packet *packet) |
1606 | { |
1607 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1608 | u32 rte = rhf_rcv_type_err(rhf: packet->rhf); |
1609 | |
1610 | dd_dev_err(rcd->dd, |
1611 | "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s] rte 0x%x\n" , |
1612 | rcd->ctxt, packet->rhf, |
1613 | packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "" , |
1614 | packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "" , |
1615 | packet->rhf & RHF_DC_ERR ? "dc " : "" , |
1616 | packet->rhf & RHF_TID_ERR ? "tid " : "" , |
1617 | packet->rhf & RHF_LEN_ERR ? "len " : "" , |
1618 | packet->rhf & RHF_ECC_ERR ? "ecc " : "" , |
1619 | packet->rhf & RHF_ICRC_ERR ? "icrc " : "" , |
1620 | rte); |
1621 | } |
1622 | |
1623 | void handle_eflags(struct hfi1_packet *packet) |
1624 | { |
1625 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1626 | |
1627 | rcv_hdrerr(rcd, ppd: rcd->ppd, packet); |
1628 | if (rhf_err_flags(rhf: packet->rhf)) |
1629 | show_eflags_errs(packet); |
1630 | } |
1631 | |
1632 | static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet) |
1633 | { |
1634 | struct hfi1_ibport *ibp; |
1635 | struct net_device *netdev; |
1636 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1637 | struct napi_struct *napi = rcd->napi; |
1638 | struct sk_buff *skb; |
1639 | struct hfi1_netdev_rxq *rxq = container_of(napi, |
1640 | struct hfi1_netdev_rxq, napi); |
1641 | u32 ; |
1642 | u32 tlen, qpnum; |
1643 | bool do_work, do_cnp; |
1644 | |
1645 | trace_hfi1_rcvhdr(packet); |
1646 | |
1647 | hfi1_setup_ib_header(packet); |
1648 | |
1649 | packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth; |
1650 | packet->grh = NULL; |
1651 | |
1652 | if (unlikely(rhf_err_flags(packet->rhf))) { |
1653 | handle_eflags(packet); |
1654 | return; |
1655 | } |
1656 | |
1657 | qpnum = ib_bth_get_qpn(ohdr: packet->ohdr); |
1658 | netdev = hfi1_netdev_get_data(dd: rcd->dd, id: qpnum); |
1659 | if (!netdev) |
1660 | goto drop_no_nd; |
1661 | |
1662 | trace_input_ibhdr(dd: rcd->dd, packet, sc5: !!(rhf_dc_info(rhf: packet->rhf))); |
1663 | trace_ctxt_rsm_hist(ctxt: rcd->ctxt); |
1664 | |
1665 | /* handle congestion notifications */ |
1666 | do_work = hfi1_may_ecn(pkt: packet); |
1667 | if (unlikely(do_work)) { |
1668 | do_cnp = (packet->opcode != IB_OPCODE_CNP); |
1669 | (void)hfi1_process_ecn_slowpath(qp: hfi1_ipoib_priv(dev: netdev)->qp, |
1670 | pkt: packet, prescan: do_cnp); |
1671 | } |
1672 | |
1673 | /* |
1674 | * We have split point after last byte of DETH |
1675 | * lets strip padding and CRC and ICRC. |
1676 | * tlen is whole packet len so we need to |
1677 | * subtract header size as well. |
1678 | */ |
1679 | tlen = packet->tlen; |
1680 | extra_bytes = ib_bth_get_pad(ohdr: packet->ohdr) + (SIZE_OF_CRC << 2) + |
1681 | packet->hlen; |
1682 | if (unlikely(tlen < extra_bytes)) |
1683 | goto drop; |
1684 | |
1685 | tlen -= extra_bytes; |
1686 | |
1687 | skb = hfi1_ipoib_prepare_skb(rxq, size: tlen, data: packet->ebuf); |
1688 | if (unlikely(!skb)) |
1689 | goto drop; |
1690 | |
1691 | dev_sw_netstats_rx_add(dev: netdev, len: skb->len); |
1692 | |
1693 | skb->dev = netdev; |
1694 | skb->pkt_type = PACKET_HOST; |
1695 | netif_receive_skb(skb); |
1696 | |
1697 | return; |
1698 | |
1699 | drop: |
1700 | ++netdev->stats.rx_dropped; |
1701 | drop_no_nd: |
1702 | ibp = rcd_to_iport(rcd: packet->rcd); |
1703 | ++ibp->rvp.n_pkt_drops; |
1704 | } |
1705 | |
1706 | /* |
1707 | * The following functions are called by the interrupt handler. They are type |
1708 | * specific handlers for each packet type. |
1709 | */ |
1710 | static void process_receive_ib(struct hfi1_packet *packet) |
1711 | { |
1712 | if (hfi1_setup_9B_packet(packet)) |
1713 | return; |
1714 | |
1715 | if (unlikely(hfi1_dbg_should_fault_rx(packet))) |
1716 | return; |
1717 | |
1718 | trace_hfi1_rcvhdr(packet); |
1719 | |
1720 | if (unlikely(rhf_err_flags(packet->rhf))) { |
1721 | handle_eflags(packet); |
1722 | return; |
1723 | } |
1724 | |
1725 | hfi1_ib_rcv(packet); |
1726 | } |
1727 | |
1728 | static void process_receive_bypass(struct hfi1_packet *packet) |
1729 | { |
1730 | struct hfi1_devdata *dd = packet->rcd->dd; |
1731 | |
1732 | if (hfi1_setup_bypass_packet(packet)) |
1733 | return; |
1734 | |
1735 | trace_hfi1_rcvhdr(packet); |
1736 | |
1737 | if (unlikely(rhf_err_flags(packet->rhf))) { |
1738 | handle_eflags(packet); |
1739 | return; |
1740 | } |
1741 | |
1742 | if (hfi1_16B_get_l2(hdr: packet->hdr) == 0x2) { |
1743 | hfi1_16B_rcv(packet); |
1744 | } else { |
1745 | dd_dev_err(dd, |
1746 | "Bypass packets other than 16B are not supported in normal operation. Dropping\n" ); |
1747 | incr_cntr64(cntr: &dd->sw_rcv_bypass_packet_errors); |
1748 | if (!(dd->err_info_rcvport.status_and_code & |
1749 | OPA_EI_STATUS_SMASK)) { |
1750 | u64 *flits = packet->ebuf; |
1751 | |
1752 | if (flits && !(packet->rhf & RHF_LEN_ERR)) { |
1753 | dd->err_info_rcvport.packet_flit1 = flits[0]; |
1754 | dd->err_info_rcvport.packet_flit2 = |
1755 | packet->tlen > sizeof(flits[0]) ? |
1756 | flits[1] : 0; |
1757 | } |
1758 | dd->err_info_rcvport.status_and_code |= |
1759 | (OPA_EI_STATUS_SMASK | BAD_L2_ERR); |
1760 | } |
1761 | } |
1762 | } |
1763 | |
1764 | static void process_receive_error(struct hfi1_packet *packet) |
1765 | { |
1766 | /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ |
1767 | if (unlikely( |
1768 | hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && |
1769 | (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR || |
1770 | packet->rhf & RHF_DC_ERR))) |
1771 | return; |
1772 | |
1773 | hfi1_setup_ib_header(packet); |
1774 | handle_eflags(packet); |
1775 | |
1776 | if (unlikely(rhf_err_flags(packet->rhf))) |
1777 | dd_dev_err(packet->rcd->dd, |
1778 | "Unhandled error packet received. Dropping.\n" ); |
1779 | } |
1780 | |
1781 | static void kdeth_process_expected(struct hfi1_packet *packet) |
1782 | { |
1783 | hfi1_setup_9B_packet(packet); |
1784 | if (unlikely(hfi1_dbg_should_fault_rx(packet))) |
1785 | return; |
1786 | |
1787 | if (unlikely(rhf_err_flags(packet->rhf))) { |
1788 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1789 | |
1790 | if (hfi1_handle_kdeth_eflags(rcd, ppd: rcd->ppd, packet)) |
1791 | return; |
1792 | } |
1793 | |
1794 | hfi1_kdeth_expected_rcv(packet); |
1795 | } |
1796 | |
1797 | static void kdeth_process_eager(struct hfi1_packet *packet) |
1798 | { |
1799 | hfi1_setup_9B_packet(packet); |
1800 | if (unlikely(hfi1_dbg_should_fault_rx(packet))) |
1801 | return; |
1802 | |
1803 | trace_hfi1_rcvhdr(packet); |
1804 | if (unlikely(rhf_err_flags(packet->rhf))) { |
1805 | struct hfi1_ctxtdata *rcd = packet->rcd; |
1806 | |
1807 | show_eflags_errs(packet); |
1808 | if (hfi1_handle_kdeth_eflags(rcd, ppd: rcd->ppd, packet)) |
1809 | return; |
1810 | } |
1811 | |
1812 | hfi1_kdeth_eager_rcv(packet); |
1813 | } |
1814 | |
1815 | static void process_receive_invalid(struct hfi1_packet *packet) |
1816 | { |
1817 | dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n" , |
1818 | rhf_rcv_type(packet->rhf)); |
1819 | } |
1820 | |
1821 | #define HFI1_RCVHDR_DUMP_MAX 5 |
1822 | |
1823 | void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) |
1824 | { |
1825 | struct hfi1_packet packet; |
1826 | struct ps_mdata mdata; |
1827 | int i; |
1828 | |
1829 | seq_printf(m: s, fmt: "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n" , |
1830 | rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd), |
1831 | get_dma_rtail_setting(rcd) ? |
1832 | "dma_rtail" : "nodma_rtail" , |
1833 | read_kctxt_csr(dd: rcd->dd, ctxt: rcd->ctxt, RCV_CTXT_CTRL), |
1834 | read_kctxt_csr(dd: rcd->dd, ctxt: rcd->ctxt, RCV_CTXT_STATUS), |
1835 | read_uctxt_csr(dd: rcd->dd, ctxt: rcd->ctxt, RCV_HDR_HEAD) & |
1836 | RCV_HDR_HEAD_HEAD_MASK, |
1837 | read_uctxt_csr(dd: rcd->dd, ctxt: rcd->ctxt, RCV_HDR_TAIL), |
1838 | rcd->head); |
1839 | |
1840 | init_packet(rcd, packet: &packet); |
1841 | init_ps_mdata(mdata: &mdata, packet: &packet); |
1842 | |
1843 | for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) { |
1844 | __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + |
1845 | rcd->rhf_offset; |
1846 | struct ib_header *hdr; |
1847 | u64 rhf = rhf_to_cpu(rbuf: rhf_addr); |
1848 | u32 etype = rhf_rcv_type(rhf), qpn; |
1849 | u8 opcode; |
1850 | u32 psn; |
1851 | u8 lnh; |
1852 | |
1853 | if (ps_done(mdata: &mdata, rhf, rcd)) |
1854 | break; |
1855 | |
1856 | if (ps_skip(mdata: &mdata, rhf, rcd)) |
1857 | goto next; |
1858 | |
1859 | if (etype > RHF_RCV_TYPE_IB) |
1860 | goto next; |
1861 | |
1862 | packet.hdr = hfi1_get_msgheader(rcd, rhf_addr); |
1863 | hdr = packet.hdr; |
1864 | |
1865 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; |
1866 | |
1867 | if (lnh == HFI1_LRH_BTH) |
1868 | packet.ohdr = &hdr->u.oth; |
1869 | else if (lnh == HFI1_LRH_GRH) |
1870 | packet.ohdr = &hdr->u.l.oth; |
1871 | else |
1872 | goto next; /* just in case */ |
1873 | |
1874 | opcode = (be32_to_cpu(packet.ohdr->bth[0]) >> 24); |
1875 | qpn = be32_to_cpu(packet.ohdr->bth[1]) & RVT_QPN_MASK; |
1876 | psn = mask_psn(be32_to_cpu(packet.ohdr->bth[2])); |
1877 | |
1878 | seq_printf(m: s, fmt: "\tEnt %u: opcode 0x%x, qpn 0x%x, psn 0x%x\n" , |
1879 | mdata.ps_head, opcode, qpn, psn); |
1880 | next: |
1881 | update_ps_mdata(mdata: &mdata, rcd); |
1882 | } |
1883 | } |
1884 | |
1885 | const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { |
1886 | [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected, |
1887 | [RHF_RCV_TYPE_EAGER] = kdeth_process_eager, |
1888 | [RHF_RCV_TYPE_IB] = process_receive_ib, |
1889 | [RHF_RCV_TYPE_ERROR] = process_receive_error, |
1890 | [RHF_RCV_TYPE_BYPASS] = process_receive_bypass, |
1891 | [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, |
1892 | [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, |
1893 | [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, |
1894 | }; |
1895 | |
1896 | const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = { |
1897 | [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid, |
1898 | [RHF_RCV_TYPE_EAGER] = process_receive_invalid, |
1899 | [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv, |
1900 | [RHF_RCV_TYPE_ERROR] = process_receive_error, |
1901 | [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv, |
1902 | [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, |
1903 | [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, |
1904 | [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, |
1905 | }; |
1906 | |