1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2020 Cornelis Networks, Inc. |
4 | * Copyright(c) 2015-2020 Intel Corporation. |
5 | */ |
6 | |
7 | #include <linux/poll.h> |
8 | #include <linux/cdev.h> |
9 | #include <linux/vmalloc.h> |
10 | #include <linux/io.h> |
11 | #include <linux/sched/mm.h> |
12 | #include <linux/bitmap.h> |
13 | |
14 | #include <rdma/ib.h> |
15 | |
16 | #include "hfi.h" |
17 | #include "pio.h" |
18 | #include "device.h" |
19 | #include "common.h" |
20 | #include "trace.h" |
21 | #include "mmu_rb.h" |
22 | #include "user_sdma.h" |
23 | #include "user_exp_rcv.h" |
24 | #include "aspm.h" |
25 | |
26 | #undef pr_fmt |
27 | #define pr_fmt(fmt) DRIVER_NAME ": " fmt |
28 | |
29 | #define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */ |
30 | |
31 | /* |
32 | * File operation functions |
33 | */ |
34 | static int hfi1_file_open(struct inode *inode, struct file *fp); |
35 | static int hfi1_file_close(struct inode *inode, struct file *fp); |
36 | static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); |
37 | static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt); |
38 | static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); |
39 | |
40 | static u64 kvirt_to_phys(void *addr); |
41 | static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len); |
42 | static void init_subctxts(struct hfi1_ctxtdata *uctxt, |
43 | const struct hfi1_user_info *uinfo); |
44 | static int init_user_ctxt(struct hfi1_filedata *fd, |
45 | struct hfi1_ctxtdata *uctxt); |
46 | static void user_init(struct hfi1_ctxtdata *uctxt); |
47 | static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); |
48 | static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); |
49 | static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, |
50 | u32 len); |
51 | static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, |
52 | u32 len); |
53 | static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, |
54 | u32 len); |
55 | static int setup_base_ctxt(struct hfi1_filedata *fd, |
56 | struct hfi1_ctxtdata *uctxt); |
57 | static int setup_subctxt(struct hfi1_ctxtdata *uctxt); |
58 | |
59 | static int find_sub_ctxt(struct hfi1_filedata *fd, |
60 | const struct hfi1_user_info *uinfo); |
61 | static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, |
62 | struct hfi1_user_info *uinfo, |
63 | struct hfi1_ctxtdata **cd); |
64 | static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); |
65 | static __poll_t poll_urgent(struct file *fp, struct poll_table_struct *pt); |
66 | static __poll_t poll_next(struct file *fp, struct poll_table_struct *pt); |
67 | static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, |
68 | unsigned long arg); |
69 | static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); |
70 | static int ctxt_reset(struct hfi1_ctxtdata *uctxt); |
71 | static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, |
72 | unsigned long arg); |
73 | static vm_fault_t vma_fault(struct vm_fault *vmf); |
74 | static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, |
75 | unsigned long arg); |
76 | |
77 | static const struct file_operations hfi1_file_ops = { |
78 | .owner = THIS_MODULE, |
79 | .write_iter = hfi1_write_iter, |
80 | .open = hfi1_file_open, |
81 | .release = hfi1_file_close, |
82 | .unlocked_ioctl = hfi1_file_ioctl, |
83 | .poll = hfi1_poll, |
84 | .mmap = hfi1_file_mmap, |
85 | .llseek = noop_llseek, |
86 | }; |
87 | |
88 | static const struct vm_operations_struct vm_ops = { |
89 | .fault = vma_fault, |
90 | }; |
91 | |
92 | /* |
93 | * Types of memories mapped into user processes' space |
94 | */ |
95 | enum mmap_types { |
96 | PIO_BUFS = 1, |
97 | PIO_BUFS_SOP, |
98 | PIO_CRED, |
99 | RCV_HDRQ, |
100 | RCV_EGRBUF, |
101 | UREGS, |
102 | EVENTS, |
103 | STATUS, |
104 | RTAIL, |
105 | SUBCTXT_UREGS, |
106 | SUBCTXT_RCV_HDRQ, |
107 | SUBCTXT_EGRBUF, |
108 | SDMA_COMP |
109 | }; |
110 | |
111 | /* |
112 | * Masks and offsets defining the mmap tokens |
113 | */ |
114 | #define HFI1_MMAP_OFFSET_MASK 0xfffULL |
115 | #define HFI1_MMAP_OFFSET_SHIFT 0 |
116 | #define HFI1_MMAP_SUBCTXT_MASK 0xfULL |
117 | #define HFI1_MMAP_SUBCTXT_SHIFT 12 |
118 | #define HFI1_MMAP_CTXT_MASK 0xffULL |
119 | #define HFI1_MMAP_CTXT_SHIFT 16 |
120 | #define HFI1_MMAP_TYPE_MASK 0xfULL |
121 | #define HFI1_MMAP_TYPE_SHIFT 24 |
122 | #define HFI1_MMAP_MAGIC_MASK 0xffffffffULL |
123 | #define HFI1_MMAP_MAGIC_SHIFT 32 |
124 | |
125 | #define HFI1_MMAP_MAGIC 0xdabbad00 |
126 | |
127 | #define HFI1_MMAP_TOKEN_SET(field, val) \ |
128 | (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT) |
129 | #define HFI1_MMAP_TOKEN_GET(field, token) \ |
130 | (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK) |
131 | #define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \ |
132 | (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \ |
133 | HFI1_MMAP_TOKEN_SET(TYPE, type) | \ |
134 | HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \ |
135 | HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \ |
136 | HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr)))) |
137 | |
138 | #define dbg(fmt, ...) \ |
139 | pr_info(fmt, ##__VA_ARGS__) |
140 | |
141 | static inline int is_valid_mmap(u64 token) |
142 | { |
143 | return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC); |
144 | } |
145 | |
146 | static int hfi1_file_open(struct inode *inode, struct file *fp) |
147 | { |
148 | struct hfi1_filedata *fd; |
149 | struct hfi1_devdata *dd = container_of(inode->i_cdev, |
150 | struct hfi1_devdata, |
151 | user_cdev); |
152 | |
153 | if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) |
154 | return -EINVAL; |
155 | |
156 | if (!refcount_inc_not_zero(r: &dd->user_refcount)) |
157 | return -ENXIO; |
158 | |
159 | /* The real work is performed later in assign_ctxt() */ |
160 | |
161 | fd = kzalloc(size: sizeof(*fd), GFP_KERNEL); |
162 | |
163 | if (!fd || init_srcu_struct(&fd->pq_srcu)) |
164 | goto nomem; |
165 | spin_lock_init(&fd->pq_rcu_lock); |
166 | spin_lock_init(&fd->tid_lock); |
167 | spin_lock_init(&fd->invalid_lock); |
168 | fd->rec_cpu_num = -1; /* no cpu affinity by default */ |
169 | fd->dd = dd; |
170 | fp->private_data = fd; |
171 | return 0; |
172 | nomem: |
173 | kfree(objp: fd); |
174 | fp->private_data = NULL; |
175 | if (refcount_dec_and_test(r: &dd->user_refcount)) |
176 | complete(&dd->user_comp); |
177 | return -ENOMEM; |
178 | } |
179 | |
180 | static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, |
181 | unsigned long arg) |
182 | { |
183 | struct hfi1_filedata *fd = fp->private_data; |
184 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
185 | int ret = 0; |
186 | int uval = 0; |
187 | |
188 | hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x" , cmd); |
189 | if (cmd != HFI1_IOCTL_ASSIGN_CTXT && |
190 | cmd != HFI1_IOCTL_GET_VERS && |
191 | !uctxt) |
192 | return -EINVAL; |
193 | |
194 | switch (cmd) { |
195 | case HFI1_IOCTL_ASSIGN_CTXT: |
196 | ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd)); |
197 | break; |
198 | |
199 | case HFI1_IOCTL_CTXT_INFO: |
200 | ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd)); |
201 | break; |
202 | |
203 | case HFI1_IOCTL_USER_INFO: |
204 | ret = get_base_info(fd, arg, _IOC_SIZE(cmd)); |
205 | break; |
206 | |
207 | case HFI1_IOCTL_CREDIT_UPD: |
208 | if (uctxt) |
209 | sc_return_credits(sc: uctxt->sc); |
210 | break; |
211 | |
212 | case HFI1_IOCTL_TID_UPDATE: |
213 | ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd)); |
214 | break; |
215 | |
216 | case HFI1_IOCTL_TID_FREE: |
217 | ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd)); |
218 | break; |
219 | |
220 | case HFI1_IOCTL_TID_INVAL_READ: |
221 | ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd)); |
222 | break; |
223 | |
224 | case HFI1_IOCTL_RECV_CTRL: |
225 | ret = manage_rcvq(uctxt, subctxt: fd->subctxt, arg); |
226 | break; |
227 | |
228 | case HFI1_IOCTL_POLL_TYPE: |
229 | if (get_user(uval, (int __user *)arg)) |
230 | return -EFAULT; |
231 | uctxt->poll_type = (typeof(uctxt->poll_type))uval; |
232 | break; |
233 | |
234 | case HFI1_IOCTL_ACK_EVENT: |
235 | ret = user_event_ack(uctxt, subctxt: fd->subctxt, arg); |
236 | break; |
237 | |
238 | case HFI1_IOCTL_SET_PKEY: |
239 | ret = set_ctxt_pkey(uctxt, arg); |
240 | break; |
241 | |
242 | case HFI1_IOCTL_CTXT_RESET: |
243 | ret = ctxt_reset(uctxt); |
244 | break; |
245 | |
246 | case HFI1_IOCTL_GET_VERS: |
247 | uval = HFI1_USER_SWVERSION; |
248 | if (put_user(uval, (int __user *)arg)) |
249 | return -EFAULT; |
250 | break; |
251 | |
252 | default: |
253 | return -EINVAL; |
254 | } |
255 | |
256 | return ret; |
257 | } |
258 | |
259 | static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) |
260 | { |
261 | struct hfi1_filedata *fd = kiocb->ki_filp->private_data; |
262 | struct hfi1_user_sdma_pkt_q *pq; |
263 | struct hfi1_user_sdma_comp_q *cq = fd->cq; |
264 | int done = 0, reqs = 0; |
265 | unsigned long dim = from->nr_segs; |
266 | int idx; |
267 | |
268 | if (!HFI1_CAP_IS_KSET(SDMA)) |
269 | return -EINVAL; |
270 | if (!user_backed_iter(i: from)) |
271 | return -EINVAL; |
272 | idx = srcu_read_lock(ssp: &fd->pq_srcu); |
273 | pq = srcu_dereference(fd->pq, &fd->pq_srcu); |
274 | if (!cq || !pq) { |
275 | srcu_read_unlock(ssp: &fd->pq_srcu, idx); |
276 | return -EIO; |
277 | } |
278 | |
279 | trace_hfi1_sdma_request(dd: fd->dd, ctxt: fd->uctxt->ctxt, subctxt: fd->subctxt, dim); |
280 | |
281 | if (atomic_read(v: &pq->n_reqs) == pq->n_max_reqs) { |
282 | srcu_read_unlock(ssp: &fd->pq_srcu, idx); |
283 | return -ENOSPC; |
284 | } |
285 | |
286 | while (dim) { |
287 | const struct iovec *iov = iter_iov(iter: from); |
288 | int ret; |
289 | unsigned long count = 0; |
290 | |
291 | ret = hfi1_user_sdma_process_request( |
292 | fd, iovec: (struct iovec *)(iov + done), |
293 | dim, count: &count); |
294 | if (ret) { |
295 | reqs = ret; |
296 | break; |
297 | } |
298 | dim -= count; |
299 | done += count; |
300 | reqs++; |
301 | } |
302 | |
303 | srcu_read_unlock(ssp: &fd->pq_srcu, idx); |
304 | return reqs; |
305 | } |
306 | |
307 | static inline void mmap_cdbg(u16 ctxt, u8 subctxt, u8 type, u8 mapio, u8 vmf, |
308 | u64 memaddr, void *memvirt, dma_addr_t memdma, |
309 | ssize_t memlen, struct vm_area_struct *vma) |
310 | { |
311 | hfi1_cdbg(PROC, |
312 | "%u:%u type:%u io/vf/dma:%d/%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx" , |
313 | ctxt, subctxt, type, mapio, vmf, !!memdma, |
314 | memaddr ?: (u64)memvirt, memlen, |
315 | vma->vm_end - vma->vm_start, vma->vm_flags); |
316 | } |
317 | |
318 | static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) |
319 | { |
320 | struct hfi1_filedata *fd = fp->private_data; |
321 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
322 | struct hfi1_devdata *dd; |
323 | unsigned long flags; |
324 | u64 token = vma->vm_pgoff << PAGE_SHIFT, |
325 | memaddr = 0; |
326 | void *memvirt = NULL; |
327 | dma_addr_t memdma = 0; |
328 | u8 subctxt, mapio = 0, vmf = 0, type; |
329 | ssize_t memlen = 0; |
330 | int ret = 0; |
331 | u16 ctxt; |
332 | |
333 | if (!is_valid_mmap(token) || !uctxt || |
334 | !(vma->vm_flags & VM_SHARED)) { |
335 | ret = -EINVAL; |
336 | goto done; |
337 | } |
338 | dd = uctxt->dd; |
339 | ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token); |
340 | subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token); |
341 | type = HFI1_MMAP_TOKEN_GET(TYPE, token); |
342 | if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) { |
343 | ret = -EINVAL; |
344 | goto done; |
345 | } |
346 | |
347 | /* |
348 | * vm_pgoff is used as a buffer selector cookie. Always mmap from |
349 | * the beginning. |
350 | */ |
351 | vma->vm_pgoff = 0; |
352 | flags = vma->vm_flags; |
353 | |
354 | switch (type) { |
355 | case PIO_BUFS: |
356 | case PIO_BUFS_SOP: |
357 | memaddr = ((dd->physaddr + TXE_PIO_SEND) + |
358 | /* chip pio base */ |
359 | (uctxt->sc->hw_context * BIT(16))) + |
360 | /* 64K PIO space / ctxt */ |
361 | (type == PIO_BUFS_SOP ? |
362 | (TXE_PIO_SIZE / 2) : 0); /* sop? */ |
363 | /* |
364 | * Map only the amount allocated to the context, not the |
365 | * entire available context's PIO space. |
366 | */ |
367 | memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE); |
368 | flags &= ~VM_MAYREAD; |
369 | flags |= VM_DONTCOPY | VM_DONTEXPAND; |
370 | vma->vm_page_prot = pgprot_writecombine(prot: vma->vm_page_prot); |
371 | mapio = 1; |
372 | break; |
373 | case PIO_CRED: { |
374 | u64 cr_page_offset; |
375 | if (flags & VM_WRITE) { |
376 | ret = -EPERM; |
377 | goto done; |
378 | } |
379 | /* |
380 | * The credit return location for this context could be on the |
381 | * second or third page allocated for credit returns (if number |
382 | * of enabled contexts > 64 and 128 respectively). |
383 | */ |
384 | cr_page_offset = ((u64)uctxt->sc->hw_free - |
385 | (u64)dd->cr_base[uctxt->numa_id].va) & |
386 | PAGE_MASK; |
387 | memvirt = dd->cr_base[uctxt->numa_id].va + cr_page_offset; |
388 | memdma = dd->cr_base[uctxt->numa_id].dma + cr_page_offset; |
389 | memlen = PAGE_SIZE; |
390 | flags &= ~VM_MAYWRITE; |
391 | flags |= VM_DONTCOPY | VM_DONTEXPAND; |
392 | /* |
393 | * The driver has already allocated memory for credit |
394 | * returns and programmed it into the chip. Has that |
395 | * memory been flagged as non-cached? |
396 | */ |
397 | /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ |
398 | break; |
399 | } |
400 | case RCV_HDRQ: |
401 | memlen = rcvhdrq_size(rcd: uctxt); |
402 | memvirt = uctxt->rcvhdrq; |
403 | memdma = uctxt->rcvhdrq_dma; |
404 | break; |
405 | case RCV_EGRBUF: { |
406 | unsigned long vm_start_save; |
407 | unsigned long vm_end_save; |
408 | int i; |
409 | /* |
410 | * The RcvEgr buffer need to be handled differently |
411 | * as multiple non-contiguous pages need to be mapped |
412 | * into the user process. |
413 | */ |
414 | memlen = uctxt->egrbufs.size; |
415 | if ((vma->vm_end - vma->vm_start) != memlen) { |
416 | dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n" , |
417 | (vma->vm_end - vma->vm_start), memlen); |
418 | ret = -EINVAL; |
419 | goto done; |
420 | } |
421 | if (vma->vm_flags & VM_WRITE) { |
422 | ret = -EPERM; |
423 | goto done; |
424 | } |
425 | vm_flags_clear(vma, VM_MAYWRITE); |
426 | /* |
427 | * Mmap multiple separate allocations into a single vma. From |
428 | * here, dma_mmap_coherent() calls dma_direct_mmap(), which |
429 | * requires the mmap to exactly fill the vma starting at |
430 | * vma_start. Adjust the vma start and end for each eager |
431 | * buffer segment mapped. Restore the originals when done. |
432 | */ |
433 | vm_start_save = vma->vm_start; |
434 | vm_end_save = vma->vm_end; |
435 | vma->vm_end = vma->vm_start; |
436 | for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { |
437 | memlen = uctxt->egrbufs.buffers[i].len; |
438 | memvirt = uctxt->egrbufs.buffers[i].addr; |
439 | memdma = uctxt->egrbufs.buffers[i].dma; |
440 | vma->vm_end += memlen; |
441 | mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, |
442 | memvirt, memdma, memlen, vma); |
443 | ret = dma_mmap_coherent(&dd->pcidev->dev, vma, |
444 | memvirt, memdma, memlen); |
445 | if (ret < 0) { |
446 | vma->vm_start = vm_start_save; |
447 | vma->vm_end = vm_end_save; |
448 | goto done; |
449 | } |
450 | vma->vm_start += memlen; |
451 | } |
452 | vma->vm_start = vm_start_save; |
453 | vma->vm_end = vm_end_save; |
454 | ret = 0; |
455 | goto done; |
456 | } |
457 | case UREGS: |
458 | /* |
459 | * Map only the page that contains this context's user |
460 | * registers. |
461 | */ |
462 | memaddr = (unsigned long) |
463 | (dd->physaddr + RXE_PER_CONTEXT_USER) |
464 | + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE); |
465 | /* |
466 | * TidFlow table is on the same page as the rest of the |
467 | * user registers. |
468 | */ |
469 | memlen = PAGE_SIZE; |
470 | flags |= VM_DONTCOPY | VM_DONTEXPAND; |
471 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); |
472 | mapio = 1; |
473 | break; |
474 | case EVENTS: |
475 | /* |
476 | * Use the page where this context's flags are. User level |
477 | * knows where it's own bitmap is within the page. |
478 | */ |
479 | memaddr = (unsigned long) |
480 | (dd->events + uctxt_offset(uctxt)) & PAGE_MASK; |
481 | memlen = PAGE_SIZE; |
482 | /* |
483 | * v3.7 removes VM_RESERVED but the effect is kept by |
484 | * using VM_IO. |
485 | */ |
486 | flags |= VM_IO | VM_DONTEXPAND; |
487 | vmf = 1; |
488 | break; |
489 | case STATUS: |
490 | if (flags & VM_WRITE) { |
491 | ret = -EPERM; |
492 | goto done; |
493 | } |
494 | memaddr = kvirt_to_phys(addr: (void *)dd->status); |
495 | memlen = PAGE_SIZE; |
496 | flags |= VM_IO | VM_DONTEXPAND; |
497 | break; |
498 | case RTAIL: |
499 | if (!HFI1_CAP_IS_USET(DMA_RTAIL)) { |
500 | /* |
501 | * If the memory allocation failed, the context alloc |
502 | * also would have failed, so we would never get here |
503 | */ |
504 | ret = -EINVAL; |
505 | goto done; |
506 | } |
507 | if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(rcd: uctxt)) { |
508 | ret = -EPERM; |
509 | goto done; |
510 | } |
511 | memlen = PAGE_SIZE; |
512 | memvirt = (void *)hfi1_rcvhdrtail_kvaddr(rcd: uctxt); |
513 | memdma = uctxt->rcvhdrqtailaddr_dma; |
514 | flags &= ~VM_MAYWRITE; |
515 | break; |
516 | case SUBCTXT_UREGS: |
517 | memaddr = (u64)uctxt->subctxt_uregbase; |
518 | memlen = PAGE_SIZE; |
519 | flags |= VM_IO | VM_DONTEXPAND; |
520 | vmf = 1; |
521 | break; |
522 | case SUBCTXT_RCV_HDRQ: |
523 | memaddr = (u64)uctxt->subctxt_rcvhdr_base; |
524 | memlen = rcvhdrq_size(rcd: uctxt) * uctxt->subctxt_cnt; |
525 | flags |= VM_IO | VM_DONTEXPAND; |
526 | vmf = 1; |
527 | break; |
528 | case SUBCTXT_EGRBUF: |
529 | memaddr = (u64)uctxt->subctxt_rcvegrbuf; |
530 | memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt; |
531 | flags |= VM_IO | VM_DONTEXPAND; |
532 | flags &= ~VM_MAYWRITE; |
533 | vmf = 1; |
534 | break; |
535 | case SDMA_COMP: { |
536 | struct hfi1_user_sdma_comp_q *cq = fd->cq; |
537 | |
538 | if (!cq) { |
539 | ret = -EFAULT; |
540 | goto done; |
541 | } |
542 | memaddr = (u64)cq->comps; |
543 | memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries); |
544 | flags |= VM_IO | VM_DONTEXPAND; |
545 | vmf = 1; |
546 | break; |
547 | } |
548 | default: |
549 | ret = -EINVAL; |
550 | break; |
551 | } |
552 | |
553 | if ((vma->vm_end - vma->vm_start) != memlen) { |
554 | hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu" , |
555 | uctxt->ctxt, fd->subctxt, |
556 | (vma->vm_end - vma->vm_start), memlen); |
557 | ret = -EINVAL; |
558 | goto done; |
559 | } |
560 | |
561 | vm_flags_reset(vma, flags); |
562 | mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, memvirt, memdma, |
563 | memlen, vma); |
564 | if (vmf) { |
565 | vma->vm_pgoff = PFN_DOWN(memaddr); |
566 | vma->vm_ops = &vm_ops; |
567 | ret = 0; |
568 | } else if (memdma) { |
569 | ret = dma_mmap_coherent(&dd->pcidev->dev, vma, |
570 | memvirt, memdma, memlen); |
571 | } else if (mapio) { |
572 | ret = io_remap_pfn_range(vma, addr: vma->vm_start, |
573 | PFN_DOWN(memaddr), |
574 | size: memlen, |
575 | prot: vma->vm_page_prot); |
576 | } else if (memvirt) { |
577 | ret = remap_pfn_range(vma, addr: vma->vm_start, |
578 | PFN_DOWN(__pa(memvirt)), |
579 | size: memlen, |
580 | vma->vm_page_prot); |
581 | } else { |
582 | ret = remap_pfn_range(vma, addr: vma->vm_start, |
583 | PFN_DOWN(memaddr), |
584 | size: memlen, |
585 | vma->vm_page_prot); |
586 | } |
587 | done: |
588 | return ret; |
589 | } |
590 | |
591 | /* |
592 | * Local (non-chip) user memory is not mapped right away but as it is |
593 | * accessed by the user-level code. |
594 | */ |
595 | static vm_fault_t vma_fault(struct vm_fault *vmf) |
596 | { |
597 | struct page *page; |
598 | |
599 | page = vmalloc_to_page(addr: (void *)(vmf->pgoff << PAGE_SHIFT)); |
600 | if (!page) |
601 | return VM_FAULT_SIGBUS; |
602 | |
603 | get_page(page); |
604 | vmf->page = page; |
605 | |
606 | return 0; |
607 | } |
608 | |
609 | static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt) |
610 | { |
611 | struct hfi1_ctxtdata *uctxt; |
612 | __poll_t pollflag; |
613 | |
614 | uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt; |
615 | if (!uctxt) |
616 | pollflag = EPOLLERR; |
617 | else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT) |
618 | pollflag = poll_urgent(fp, pt); |
619 | else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV) |
620 | pollflag = poll_next(fp, pt); |
621 | else /* invalid */ |
622 | pollflag = EPOLLERR; |
623 | |
624 | return pollflag; |
625 | } |
626 | |
627 | static int hfi1_file_close(struct inode *inode, struct file *fp) |
628 | { |
629 | struct hfi1_filedata *fdata = fp->private_data; |
630 | struct hfi1_ctxtdata *uctxt = fdata->uctxt; |
631 | struct hfi1_devdata *dd = container_of(inode->i_cdev, |
632 | struct hfi1_devdata, |
633 | user_cdev); |
634 | unsigned long flags, *ev; |
635 | |
636 | fp->private_data = NULL; |
637 | |
638 | if (!uctxt) |
639 | goto done; |
640 | |
641 | hfi1_cdbg(PROC, "closing ctxt %u:%u" , uctxt->ctxt, fdata->subctxt); |
642 | |
643 | flush_wc(); |
644 | /* drain user sdma queue */ |
645 | hfi1_user_sdma_free_queues(fd: fdata, uctxt); |
646 | |
647 | /* release the cpu */ |
648 | hfi1_put_proc_affinity(cpu: fdata->rec_cpu_num); |
649 | |
650 | /* clean up rcv side */ |
651 | hfi1_user_exp_rcv_free(fd: fdata); |
652 | |
653 | /* |
654 | * fdata->uctxt is used in the above cleanup. It is not ready to be |
655 | * removed until here. |
656 | */ |
657 | fdata->uctxt = NULL; |
658 | hfi1_rcd_put(rcd: uctxt); |
659 | |
660 | /* |
661 | * Clear any left over, unhandled events so the next process that |
662 | * gets this context doesn't get confused. |
663 | */ |
664 | ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt; |
665 | *ev = 0; |
666 | |
667 | spin_lock_irqsave(&dd->uctxt_lock, flags); |
668 | __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); |
669 | if (!bitmap_empty(src: uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { |
670 | spin_unlock_irqrestore(lock: &dd->uctxt_lock, flags); |
671 | goto done; |
672 | } |
673 | spin_unlock_irqrestore(lock: &dd->uctxt_lock, flags); |
674 | |
675 | /* |
676 | * Disable receive context and interrupt available, reset all |
677 | * RcvCtxtCtrl bits to default values. |
678 | */ |
679 | hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | |
680 | HFI1_RCVCTRL_TIDFLOW_DIS | |
681 | HFI1_RCVCTRL_INTRAVAIL_DIS | |
682 | HFI1_RCVCTRL_TAILUPD_DIS | |
683 | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | |
684 | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | |
685 | HFI1_RCVCTRL_NO_EGR_DROP_DIS | |
686 | HFI1_RCVCTRL_URGENT_DIS, rcd: uctxt); |
687 | /* Clear the context's J_KEY */ |
688 | hfi1_clear_ctxt_jkey(dd, ctxt: uctxt); |
689 | /* |
690 | * If a send context is allocated, reset context integrity |
691 | * checks to default and disable the send context. |
692 | */ |
693 | if (uctxt->sc) { |
694 | sc_disable(sc: uctxt->sc); |
695 | set_pio_integrity(uctxt->sc); |
696 | } |
697 | |
698 | hfi1_free_ctxt_rcv_groups(rcd: uctxt); |
699 | hfi1_clear_ctxt_pkey(dd, ctxt: uctxt); |
700 | |
701 | uctxt->event_flags = 0; |
702 | |
703 | deallocate_ctxt(uctxt); |
704 | done: |
705 | |
706 | if (refcount_dec_and_test(r: &dd->user_refcount)) |
707 | complete(&dd->user_comp); |
708 | |
709 | cleanup_srcu_struct(ssp: &fdata->pq_srcu); |
710 | kfree(objp: fdata); |
711 | return 0; |
712 | } |
713 | |
714 | /* |
715 | * Convert kernel *virtual* addresses to physical addresses. |
716 | * This is used to vmalloc'ed addresses. |
717 | */ |
718 | static u64 kvirt_to_phys(void *addr) |
719 | { |
720 | struct page *page; |
721 | u64 paddr = 0; |
722 | |
723 | page = vmalloc_to_page(addr); |
724 | if (page) |
725 | paddr = page_to_pfn(page) << PAGE_SHIFT; |
726 | |
727 | return paddr; |
728 | } |
729 | |
730 | /** |
731 | * complete_subctxt - complete sub-context info |
732 | * @fd: valid filedata pointer |
733 | * |
734 | * Sub-context info can only be set up after the base context |
735 | * has been completed. This is indicated by the clearing of the |
736 | * HFI1_CTXT_BASE_UINIT bit. |
737 | * |
738 | * Wait for the bit to be cleared, and then complete the subcontext |
739 | * initialization. |
740 | * |
741 | */ |
742 | static int complete_subctxt(struct hfi1_filedata *fd) |
743 | { |
744 | int ret; |
745 | unsigned long flags; |
746 | |
747 | /* |
748 | * sub-context info can only be set up after the base context |
749 | * has been completed. |
750 | */ |
751 | ret = wait_event_interruptible( |
752 | fd->uctxt->wait, |
753 | !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); |
754 | |
755 | if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) |
756 | ret = -ENOMEM; |
757 | |
758 | /* Finish the sub-context init */ |
759 | if (!ret) { |
760 | fd->rec_cpu_num = hfi1_get_proc_affinity(node: fd->uctxt->numa_id); |
761 | ret = init_user_ctxt(fd, uctxt: fd->uctxt); |
762 | } |
763 | |
764 | if (ret) { |
765 | spin_lock_irqsave(&fd->dd->uctxt_lock, flags); |
766 | __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); |
767 | spin_unlock_irqrestore(lock: &fd->dd->uctxt_lock, flags); |
768 | hfi1_rcd_put(rcd: fd->uctxt); |
769 | fd->uctxt = NULL; |
770 | } |
771 | |
772 | return ret; |
773 | } |
774 | |
775 | static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) |
776 | { |
777 | int ret; |
778 | unsigned int swmajor; |
779 | struct hfi1_ctxtdata *uctxt = NULL; |
780 | struct hfi1_user_info uinfo; |
781 | |
782 | if (fd->uctxt) |
783 | return -EINVAL; |
784 | |
785 | if (sizeof(uinfo) != len) |
786 | return -EINVAL; |
787 | |
788 | if (copy_from_user(to: &uinfo, from: (void __user *)arg, n: sizeof(uinfo))) |
789 | return -EFAULT; |
790 | |
791 | swmajor = uinfo.userversion >> 16; |
792 | if (swmajor != HFI1_USER_SWMAJOR) |
793 | return -ENODEV; |
794 | |
795 | if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) |
796 | return -EINVAL; |
797 | |
798 | /* |
799 | * Acquire the mutex to protect against multiple creations of what |
800 | * could be a shared base context. |
801 | */ |
802 | mutex_lock(&hfi1_mutex); |
803 | /* |
804 | * Get a sub context if available (fd->uctxt will be set). |
805 | * ret < 0 error, 0 no context, 1 sub-context found |
806 | */ |
807 | ret = find_sub_ctxt(fd, uinfo: &uinfo); |
808 | |
809 | /* |
810 | * Allocate a base context if context sharing is not required or a |
811 | * sub context wasn't found. |
812 | */ |
813 | if (!ret) |
814 | ret = allocate_ctxt(fd, dd: fd->dd, uinfo: &uinfo, cd: &uctxt); |
815 | |
816 | mutex_unlock(lock: &hfi1_mutex); |
817 | |
818 | /* Depending on the context type, finish the appropriate init */ |
819 | switch (ret) { |
820 | case 0: |
821 | ret = setup_base_ctxt(fd, uctxt); |
822 | if (ret) |
823 | deallocate_ctxt(uctxt); |
824 | break; |
825 | case 1: |
826 | ret = complete_subctxt(fd); |
827 | break; |
828 | default: |
829 | break; |
830 | } |
831 | |
832 | return ret; |
833 | } |
834 | |
835 | /** |
836 | * match_ctxt - match context |
837 | * @fd: valid filedata pointer |
838 | * @uinfo: user info to compare base context with |
839 | * @uctxt: context to compare uinfo to. |
840 | * |
841 | * Compare the given context with the given information to see if it |
842 | * can be used for a sub context. |
843 | */ |
844 | static int match_ctxt(struct hfi1_filedata *fd, |
845 | const struct hfi1_user_info *uinfo, |
846 | struct hfi1_ctxtdata *uctxt) |
847 | { |
848 | struct hfi1_devdata *dd = fd->dd; |
849 | unsigned long flags; |
850 | u16 subctxt; |
851 | |
852 | /* Skip dynamically allocated kernel contexts */ |
853 | if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) |
854 | return 0; |
855 | |
856 | /* Skip ctxt if it doesn't match the requested one */ |
857 | if (memcmp(p: uctxt->uuid, q: uinfo->uuid, size: sizeof(uctxt->uuid)) || |
858 | uctxt->jkey != generate_jkey(current_uid()) || |
859 | uctxt->subctxt_id != uinfo->subctxt_id || |
860 | uctxt->subctxt_cnt != uinfo->subctxt_cnt) |
861 | return 0; |
862 | |
863 | /* Verify the sharing process matches the base */ |
864 | if (uctxt->userversion != uinfo->userversion) |
865 | return -EINVAL; |
866 | |
867 | /* Find an unused sub context */ |
868 | spin_lock_irqsave(&dd->uctxt_lock, flags); |
869 | if (bitmap_empty(src: uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { |
870 | /* context is being closed, do not use */ |
871 | spin_unlock_irqrestore(lock: &dd->uctxt_lock, flags); |
872 | return 0; |
873 | } |
874 | |
875 | subctxt = find_first_zero_bit(addr: uctxt->in_use_ctxts, |
876 | HFI1_MAX_SHARED_CTXTS); |
877 | if (subctxt >= uctxt->subctxt_cnt) { |
878 | spin_unlock_irqrestore(lock: &dd->uctxt_lock, flags); |
879 | return -EBUSY; |
880 | } |
881 | |
882 | fd->subctxt = subctxt; |
883 | __set_bit(fd->subctxt, uctxt->in_use_ctxts); |
884 | spin_unlock_irqrestore(lock: &dd->uctxt_lock, flags); |
885 | |
886 | fd->uctxt = uctxt; |
887 | hfi1_rcd_get(rcd: uctxt); |
888 | |
889 | return 1; |
890 | } |
891 | |
892 | /** |
893 | * find_sub_ctxt - fund sub-context |
894 | * @fd: valid filedata pointer |
895 | * @uinfo: matching info to use to find a possible context to share. |
896 | * |
897 | * The hfi1_mutex must be held when this function is called. It is |
898 | * necessary to ensure serialized creation of shared contexts. |
899 | * |
900 | * Return: |
901 | * 0 No sub-context found |
902 | * 1 Subcontext found and allocated |
903 | * errno EINVAL (incorrect parameters) |
904 | * EBUSY (all sub contexts in use) |
905 | */ |
906 | static int find_sub_ctxt(struct hfi1_filedata *fd, |
907 | const struct hfi1_user_info *uinfo) |
908 | { |
909 | struct hfi1_ctxtdata *uctxt; |
910 | struct hfi1_devdata *dd = fd->dd; |
911 | u16 i; |
912 | int ret; |
913 | |
914 | if (!uinfo->subctxt_cnt) |
915 | return 0; |
916 | |
917 | for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { |
918 | uctxt = hfi1_rcd_get_by_index(dd, ctxt: i); |
919 | if (uctxt) { |
920 | ret = match_ctxt(fd, uinfo, uctxt); |
921 | hfi1_rcd_put(rcd: uctxt); |
922 | /* value of != 0 will return */ |
923 | if (ret) |
924 | return ret; |
925 | } |
926 | } |
927 | |
928 | return 0; |
929 | } |
930 | |
931 | static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, |
932 | struct hfi1_user_info *uinfo, |
933 | struct hfi1_ctxtdata **rcd) |
934 | { |
935 | struct hfi1_ctxtdata *uctxt; |
936 | int ret, numa; |
937 | |
938 | if (dd->flags & HFI1_FROZEN) { |
939 | /* |
940 | * Pick an error that is unique from all other errors |
941 | * that are returned so the user process knows that |
942 | * it tried to allocate while the SPC was frozen. It |
943 | * it should be able to retry with success in a short |
944 | * while. |
945 | */ |
946 | return -EIO; |
947 | } |
948 | |
949 | if (!dd->freectxts) |
950 | return -EBUSY; |
951 | |
952 | /* |
953 | * If we don't have a NUMA node requested, preference is towards |
954 | * device NUMA node. |
955 | */ |
956 | fd->rec_cpu_num = hfi1_get_proc_affinity(node: dd->node); |
957 | if (fd->rec_cpu_num != -1) |
958 | numa = cpu_to_node(cpu: fd->rec_cpu_num); |
959 | else |
960 | numa = numa_node_id(); |
961 | ret = hfi1_create_ctxtdata(ppd: dd->pport, numa, rcd: &uctxt); |
962 | if (ret < 0) { |
963 | dd_dev_err(dd, "user ctxtdata allocation failed\n" ); |
964 | return ret; |
965 | } |
966 | hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)" , |
967 | uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, |
968 | uctxt->numa_id); |
969 | |
970 | /* |
971 | * Allocate and enable a PIO send context. |
972 | */ |
973 | uctxt->sc = sc_alloc(dd, SC_USER, hdrqentsize: uctxt->rcvhdrqentsize, numa: dd->node); |
974 | if (!uctxt->sc) { |
975 | ret = -ENOMEM; |
976 | goto ctxdata_free; |
977 | } |
978 | hfi1_cdbg(PROC, "allocated send context %u(%u)" , uctxt->sc->sw_index, |
979 | uctxt->sc->hw_context); |
980 | ret = sc_enable(sc: uctxt->sc); |
981 | if (ret) |
982 | goto ctxdata_free; |
983 | |
984 | /* |
985 | * Setup sub context information if the user-level has requested |
986 | * sub contexts. |
987 | * This has to be done here so the rest of the sub-contexts find the |
988 | * proper base context. |
989 | * NOTE: _set_bit() can be used here because the context creation is |
990 | * protected by the mutex (rather than the spin_lock), and will be the |
991 | * very first instance of this context. |
992 | */ |
993 | __set_bit(0, uctxt->in_use_ctxts); |
994 | if (uinfo->subctxt_cnt) |
995 | init_subctxts(uctxt, uinfo); |
996 | uctxt->userversion = uinfo->userversion; |
997 | uctxt->flags = hfi1_cap_mask; /* save current flag state */ |
998 | init_waitqueue_head(&uctxt->wait); |
999 | strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); |
1000 | memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); |
1001 | uctxt->jkey = generate_jkey(current_uid()); |
1002 | hfi1_stats.sps_ctxts++; |
1003 | /* |
1004 | * Disable ASPM when there are open user/PSM contexts to avoid |
1005 | * issues with ASPM L1 exit latency |
1006 | */ |
1007 | if (dd->freectxts-- == dd->num_user_contexts) |
1008 | aspm_disable_all(dd); |
1009 | |
1010 | *rcd = uctxt; |
1011 | |
1012 | return 0; |
1013 | |
1014 | ctxdata_free: |
1015 | hfi1_free_ctxt(rcd: uctxt); |
1016 | return ret; |
1017 | } |
1018 | |
1019 | static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) |
1020 | { |
1021 | mutex_lock(&hfi1_mutex); |
1022 | hfi1_stats.sps_ctxts--; |
1023 | if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) |
1024 | aspm_enable_all(dd: uctxt->dd); |
1025 | mutex_unlock(lock: &hfi1_mutex); |
1026 | |
1027 | hfi1_free_ctxt(rcd: uctxt); |
1028 | } |
1029 | |
1030 | static void init_subctxts(struct hfi1_ctxtdata *uctxt, |
1031 | const struct hfi1_user_info *uinfo) |
1032 | { |
1033 | uctxt->subctxt_cnt = uinfo->subctxt_cnt; |
1034 | uctxt->subctxt_id = uinfo->subctxt_id; |
1035 | set_bit(HFI1_CTXT_BASE_UNINIT, addr: &uctxt->event_flags); |
1036 | } |
1037 | |
1038 | static int setup_subctxt(struct hfi1_ctxtdata *uctxt) |
1039 | { |
1040 | int ret = 0; |
1041 | u16 num_subctxts = uctxt->subctxt_cnt; |
1042 | |
1043 | uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); |
1044 | if (!uctxt->subctxt_uregbase) |
1045 | return -ENOMEM; |
1046 | |
1047 | /* We can take the size of the RcvHdr Queue from the master */ |
1048 | uctxt->subctxt_rcvhdr_base = vmalloc_user(size: rcvhdrq_size(rcd: uctxt) * |
1049 | num_subctxts); |
1050 | if (!uctxt->subctxt_rcvhdr_base) { |
1051 | ret = -ENOMEM; |
1052 | goto bail_ureg; |
1053 | } |
1054 | |
1055 | uctxt->subctxt_rcvegrbuf = vmalloc_user(size: uctxt->egrbufs.size * |
1056 | num_subctxts); |
1057 | if (!uctxt->subctxt_rcvegrbuf) { |
1058 | ret = -ENOMEM; |
1059 | goto bail_rhdr; |
1060 | } |
1061 | |
1062 | return 0; |
1063 | |
1064 | bail_rhdr: |
1065 | vfree(addr: uctxt->subctxt_rcvhdr_base); |
1066 | uctxt->subctxt_rcvhdr_base = NULL; |
1067 | bail_ureg: |
1068 | vfree(addr: uctxt->subctxt_uregbase); |
1069 | uctxt->subctxt_uregbase = NULL; |
1070 | |
1071 | return ret; |
1072 | } |
1073 | |
1074 | static void user_init(struct hfi1_ctxtdata *uctxt) |
1075 | { |
1076 | unsigned int rcvctrl_ops = 0; |
1077 | |
1078 | /* initialize poll variables... */ |
1079 | uctxt->urgent = 0; |
1080 | uctxt->urgent_poll = 0; |
1081 | |
1082 | /* |
1083 | * Now enable the ctxt for receive. |
1084 | * For chips that are set to DMA the tail register to memory |
1085 | * when they change (and when the update bit transitions from |
1086 | * 0 to 1. So for those chips, we turn it off and then back on. |
1087 | * This will (very briefly) affect any other open ctxts, but the |
1088 | * duration is very short, and therefore isn't an issue. We |
1089 | * explicitly set the in-memory tail copy to 0 beforehand, so we |
1090 | * don't have to wait to be sure the DMA update has happened |
1091 | * (chip resets head/tail to 0 on transition to enable). |
1092 | */ |
1093 | if (hfi1_rcvhdrtail_kvaddr(rcd: uctxt)) |
1094 | clear_rcvhdrtail(rcd: uctxt); |
1095 | |
1096 | /* Setup J_KEY before enabling the context */ |
1097 | hfi1_set_ctxt_jkey(dd: uctxt->dd, rcd: uctxt, jkey: uctxt->jkey); |
1098 | |
1099 | rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; |
1100 | rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB; |
1101 | if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) |
1102 | rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; |
1103 | /* |
1104 | * Ignore the bit in the flags for now until proper |
1105 | * support for multiple packet per rcv array entry is |
1106 | * added. |
1107 | */ |
1108 | if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) |
1109 | rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; |
1110 | if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) |
1111 | rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; |
1112 | if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) |
1113 | rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; |
1114 | /* |
1115 | * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. |
1116 | * We can't rely on the correct value to be set from prior |
1117 | * uses of the chip or ctxt. Therefore, add the rcvctrl op |
1118 | * for both cases. |
1119 | */ |
1120 | if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) |
1121 | rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; |
1122 | else |
1123 | rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; |
1124 | hfi1_rcvctrl(dd: uctxt->dd, op: rcvctrl_ops, rcd: uctxt); |
1125 | } |
1126 | |
1127 | static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) |
1128 | { |
1129 | struct hfi1_ctxt_info cinfo; |
1130 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
1131 | |
1132 | if (sizeof(cinfo) != len) |
1133 | return -EINVAL; |
1134 | |
1135 | memset(&cinfo, 0, sizeof(cinfo)); |
1136 | cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & |
1137 | HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | |
1138 | HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | |
1139 | HFI1_CAP_KGET_MASK(uctxt->flags, K2U); |
1140 | /* adjust flag if this fd is not able to cache */ |
1141 | if (!fd->use_mn) |
1142 | cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ |
1143 | |
1144 | cinfo.num_active = hfi1_count_active_units(); |
1145 | cinfo.unit = uctxt->dd->unit; |
1146 | cinfo.ctxt = uctxt->ctxt; |
1147 | cinfo.subctxt = fd->subctxt; |
1148 | cinfo.rcvtids = roundup(uctxt->egrbufs.alloced, |
1149 | uctxt->dd->rcv_entries.group_size) + |
1150 | uctxt->expected_count; |
1151 | cinfo.credits = uctxt->sc->credits; |
1152 | cinfo.numa_node = uctxt->numa_id; |
1153 | cinfo.rec_cpu = fd->rec_cpu_num; |
1154 | cinfo.send_ctxt = uctxt->sc->hw_context; |
1155 | |
1156 | cinfo.egrtids = uctxt->egrbufs.alloced; |
1157 | cinfo.rcvhdrq_cnt = get_hdrq_cnt(rcd: uctxt); |
1158 | cinfo.rcvhdrq_entsize = get_hdrqentsize(rcd: uctxt) << 2; |
1159 | cinfo.sdma_ring_size = fd->cq->nentries; |
1160 | cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; |
1161 | |
1162 | trace_hfi1_ctxt_info(dd: uctxt->dd, ctxt: uctxt->ctxt, subctxt: fd->subctxt, cinfo: &cinfo); |
1163 | if (copy_to_user(to: (void __user *)arg, from: &cinfo, n: len)) |
1164 | return -EFAULT; |
1165 | |
1166 | return 0; |
1167 | } |
1168 | |
1169 | static int init_user_ctxt(struct hfi1_filedata *fd, |
1170 | struct hfi1_ctxtdata *uctxt) |
1171 | { |
1172 | int ret; |
1173 | |
1174 | ret = hfi1_user_sdma_alloc_queues(uctxt, fd); |
1175 | if (ret) |
1176 | return ret; |
1177 | |
1178 | ret = hfi1_user_exp_rcv_init(fd, uctxt); |
1179 | if (ret) |
1180 | hfi1_user_sdma_free_queues(fd, uctxt); |
1181 | |
1182 | return ret; |
1183 | } |
1184 | |
1185 | static int setup_base_ctxt(struct hfi1_filedata *fd, |
1186 | struct hfi1_ctxtdata *uctxt) |
1187 | { |
1188 | struct hfi1_devdata *dd = uctxt->dd; |
1189 | int ret = 0; |
1190 | |
1191 | hfi1_init_ctxt(sc: uctxt->sc); |
1192 | |
1193 | /* Now allocate the RcvHdr queue and eager buffers. */ |
1194 | ret = hfi1_create_rcvhdrq(dd, rcd: uctxt); |
1195 | if (ret) |
1196 | goto done; |
1197 | |
1198 | ret = hfi1_setup_eagerbufs(rcd: uctxt); |
1199 | if (ret) |
1200 | goto done; |
1201 | |
1202 | /* If sub-contexts are enabled, do the appropriate setup */ |
1203 | if (uctxt->subctxt_cnt) |
1204 | ret = setup_subctxt(uctxt); |
1205 | if (ret) |
1206 | goto done; |
1207 | |
1208 | ret = hfi1_alloc_ctxt_rcv_groups(rcd: uctxt); |
1209 | if (ret) |
1210 | goto done; |
1211 | |
1212 | ret = init_user_ctxt(fd, uctxt); |
1213 | if (ret) { |
1214 | hfi1_free_ctxt_rcv_groups(rcd: uctxt); |
1215 | goto done; |
1216 | } |
1217 | |
1218 | user_init(uctxt); |
1219 | |
1220 | /* Now that the context is set up, the fd can get a reference. */ |
1221 | fd->uctxt = uctxt; |
1222 | hfi1_rcd_get(rcd: uctxt); |
1223 | |
1224 | done: |
1225 | if (uctxt->subctxt_cnt) { |
1226 | /* |
1227 | * On error, set the failed bit so sub-contexts will clean up |
1228 | * correctly. |
1229 | */ |
1230 | if (ret) |
1231 | set_bit(HFI1_CTXT_BASE_FAILED, addr: &uctxt->event_flags); |
1232 | |
1233 | /* |
1234 | * Base context is done (successfully or not), notify anybody |
1235 | * using a sub-context that is waiting for this completion. |
1236 | */ |
1237 | clear_bit(HFI1_CTXT_BASE_UNINIT, addr: &uctxt->event_flags); |
1238 | wake_up(&uctxt->wait); |
1239 | } |
1240 | |
1241 | return ret; |
1242 | } |
1243 | |
1244 | static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) |
1245 | { |
1246 | struct hfi1_base_info binfo; |
1247 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
1248 | struct hfi1_devdata *dd = uctxt->dd; |
1249 | unsigned offset; |
1250 | |
1251 | trace_hfi1_uctxtdata(dd: uctxt->dd, uctxt, subctxt: fd->subctxt); |
1252 | |
1253 | if (sizeof(binfo) != len) |
1254 | return -EINVAL; |
1255 | |
1256 | memset(&binfo, 0, sizeof(binfo)); |
1257 | binfo.hw_version = dd->revision; |
1258 | binfo.sw_version = HFI1_USER_SWVERSION; |
1259 | binfo.bthqp = RVT_KDETH_QP_PREFIX; |
1260 | binfo.jkey = uctxt->jkey; |
1261 | /* |
1262 | * If more than 64 contexts are enabled the allocated credit |
1263 | * return will span two or three contiguous pages. Since we only |
1264 | * map the page containing the context's credit return address, |
1265 | * we need to calculate the offset in the proper page. |
1266 | */ |
1267 | offset = ((u64)uctxt->sc->hw_free - |
1268 | (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE; |
1269 | binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt, |
1270 | fd->subctxt, offset); |
1271 | binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt, |
1272 | fd->subctxt, |
1273 | uctxt->sc->base_addr); |
1274 | binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP, |
1275 | uctxt->ctxt, |
1276 | fd->subctxt, |
1277 | uctxt->sc->base_addr); |
1278 | binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt, |
1279 | fd->subctxt, |
1280 | uctxt->rcvhdrq); |
1281 | binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, |
1282 | fd->subctxt, |
1283 | uctxt->egrbufs.rcvtids[0].dma); |
1284 | binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, |
1285 | fd->subctxt, 0); |
1286 | /* |
1287 | * user regs are at |
1288 | * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) |
1289 | */ |
1290 | binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, |
1291 | fd->subctxt, 0); |
1292 | offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * |
1293 | sizeof(*dd->events)); |
1294 | binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, |
1295 | fd->subctxt, |
1296 | offset); |
1297 | binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, |
1298 | fd->subctxt, |
1299 | dd->status); |
1300 | if (HFI1_CAP_IS_USET(DMA_RTAIL)) |
1301 | binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, |
1302 | fd->subctxt, 0); |
1303 | if (uctxt->subctxt_cnt) { |
1304 | binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, |
1305 | uctxt->ctxt, |
1306 | fd->subctxt, 0); |
1307 | binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, |
1308 | uctxt->ctxt, |
1309 | fd->subctxt, 0); |
1310 | binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, |
1311 | uctxt->ctxt, |
1312 | fd->subctxt, 0); |
1313 | } |
1314 | |
1315 | if (copy_to_user(to: (void __user *)arg, from: &binfo, n: len)) |
1316 | return -EFAULT; |
1317 | |
1318 | return 0; |
1319 | } |
1320 | |
1321 | /** |
1322 | * user_exp_rcv_setup - Set up the given tid rcv list |
1323 | * @fd: file data of the current driver instance |
1324 | * @arg: ioctl argumnent for user space information |
1325 | * @len: length of data structure associated with ioctl command |
1326 | * |
1327 | * Wrapper to validate ioctl information before doing _rcv_setup. |
1328 | * |
1329 | */ |
1330 | static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, |
1331 | u32 len) |
1332 | { |
1333 | int ret; |
1334 | unsigned long addr; |
1335 | struct hfi1_tid_info tinfo; |
1336 | |
1337 | if (sizeof(tinfo) != len) |
1338 | return -EINVAL; |
1339 | |
1340 | if (copy_from_user(to: &tinfo, from: (void __user *)arg, n: (sizeof(tinfo)))) |
1341 | return -EFAULT; |
1342 | |
1343 | ret = hfi1_user_exp_rcv_setup(fd, tinfo: &tinfo); |
1344 | if (!ret) { |
1345 | /* |
1346 | * Copy the number of tidlist entries we used |
1347 | * and the length of the buffer we registered. |
1348 | */ |
1349 | addr = arg + offsetof(struct hfi1_tid_info, tidcnt); |
1350 | if (copy_to_user(to: (void __user *)addr, from: &tinfo.tidcnt, |
1351 | n: sizeof(tinfo.tidcnt))) |
1352 | ret = -EFAULT; |
1353 | |
1354 | addr = arg + offsetof(struct hfi1_tid_info, length); |
1355 | if (!ret && copy_to_user(to: (void __user *)addr, from: &tinfo.length, |
1356 | n: sizeof(tinfo.length))) |
1357 | ret = -EFAULT; |
1358 | |
1359 | if (ret) |
1360 | hfi1_user_exp_rcv_invalid(fd, tinfo: &tinfo); |
1361 | } |
1362 | |
1363 | return ret; |
1364 | } |
1365 | |
1366 | /** |
1367 | * user_exp_rcv_clear - Clear the given tid rcv list |
1368 | * @fd: file data of the current driver instance |
1369 | * @arg: ioctl argumnent for user space information |
1370 | * @len: length of data structure associated with ioctl command |
1371 | * |
1372 | * The hfi1_user_exp_rcv_clear() can be called from the error path. Because |
1373 | * of this, we need to use this wrapper to copy the user space information |
1374 | * before doing the clear. |
1375 | */ |
1376 | static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, |
1377 | u32 len) |
1378 | { |
1379 | int ret; |
1380 | unsigned long addr; |
1381 | struct hfi1_tid_info tinfo; |
1382 | |
1383 | if (sizeof(tinfo) != len) |
1384 | return -EINVAL; |
1385 | |
1386 | if (copy_from_user(to: &tinfo, from: (void __user *)arg, n: (sizeof(tinfo)))) |
1387 | return -EFAULT; |
1388 | |
1389 | ret = hfi1_user_exp_rcv_clear(fd, tinfo: &tinfo); |
1390 | if (!ret) { |
1391 | addr = arg + offsetof(struct hfi1_tid_info, tidcnt); |
1392 | if (copy_to_user(to: (void __user *)addr, from: &tinfo.tidcnt, |
1393 | n: sizeof(tinfo.tidcnt))) |
1394 | return -EFAULT; |
1395 | } |
1396 | |
1397 | return ret; |
1398 | } |
1399 | |
1400 | /** |
1401 | * user_exp_rcv_invalid - Invalidate the given tid rcv list |
1402 | * @fd: file data of the current driver instance |
1403 | * @arg: ioctl argumnent for user space information |
1404 | * @len: length of data structure associated with ioctl command |
1405 | * |
1406 | * Wrapper to validate ioctl information before doing _rcv_invalid. |
1407 | * |
1408 | */ |
1409 | static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, |
1410 | u32 len) |
1411 | { |
1412 | int ret; |
1413 | unsigned long addr; |
1414 | struct hfi1_tid_info tinfo; |
1415 | |
1416 | if (sizeof(tinfo) != len) |
1417 | return -EINVAL; |
1418 | |
1419 | if (!fd->invalid_tids) |
1420 | return -EINVAL; |
1421 | |
1422 | if (copy_from_user(to: &tinfo, from: (void __user *)arg, n: (sizeof(tinfo)))) |
1423 | return -EFAULT; |
1424 | |
1425 | ret = hfi1_user_exp_rcv_invalid(fd, tinfo: &tinfo); |
1426 | if (ret) |
1427 | return ret; |
1428 | |
1429 | addr = arg + offsetof(struct hfi1_tid_info, tidcnt); |
1430 | if (copy_to_user(to: (void __user *)addr, from: &tinfo.tidcnt, |
1431 | n: sizeof(tinfo.tidcnt))) |
1432 | ret = -EFAULT; |
1433 | |
1434 | return ret; |
1435 | } |
1436 | |
1437 | static __poll_t poll_urgent(struct file *fp, |
1438 | struct poll_table_struct *pt) |
1439 | { |
1440 | struct hfi1_filedata *fd = fp->private_data; |
1441 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
1442 | struct hfi1_devdata *dd = uctxt->dd; |
1443 | __poll_t pollflag; |
1444 | |
1445 | poll_wait(filp: fp, wait_address: &uctxt->wait, p: pt); |
1446 | |
1447 | spin_lock_irq(lock: &dd->uctxt_lock); |
1448 | if (uctxt->urgent != uctxt->urgent_poll) { |
1449 | pollflag = EPOLLIN | EPOLLRDNORM; |
1450 | uctxt->urgent_poll = uctxt->urgent; |
1451 | } else { |
1452 | pollflag = 0; |
1453 | set_bit(HFI1_CTXT_WAITING_URG, addr: &uctxt->event_flags); |
1454 | } |
1455 | spin_unlock_irq(lock: &dd->uctxt_lock); |
1456 | |
1457 | return pollflag; |
1458 | } |
1459 | |
1460 | static __poll_t poll_next(struct file *fp, |
1461 | struct poll_table_struct *pt) |
1462 | { |
1463 | struct hfi1_filedata *fd = fp->private_data; |
1464 | struct hfi1_ctxtdata *uctxt = fd->uctxt; |
1465 | struct hfi1_devdata *dd = uctxt->dd; |
1466 | __poll_t pollflag; |
1467 | |
1468 | poll_wait(filp: fp, wait_address: &uctxt->wait, p: pt); |
1469 | |
1470 | spin_lock_irq(lock: &dd->uctxt_lock); |
1471 | if (hdrqempty(rcd: uctxt)) { |
1472 | set_bit(HFI1_CTXT_WAITING_RCV, addr: &uctxt->event_flags); |
1473 | hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, rcd: uctxt); |
1474 | pollflag = 0; |
1475 | } else { |
1476 | pollflag = EPOLLIN | EPOLLRDNORM; |
1477 | } |
1478 | spin_unlock_irq(lock: &dd->uctxt_lock); |
1479 | |
1480 | return pollflag; |
1481 | } |
1482 | |
1483 | /* |
1484 | * Find all user contexts in use, and set the specified bit in their |
1485 | * event mask. |
1486 | * See also find_ctxt() for a similar use, that is specific to send buffers. |
1487 | */ |
1488 | int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) |
1489 | { |
1490 | struct hfi1_ctxtdata *uctxt; |
1491 | struct hfi1_devdata *dd = ppd->dd; |
1492 | u16 ctxt; |
1493 | |
1494 | if (!dd->events) |
1495 | return -EINVAL; |
1496 | |
1497 | for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; |
1498 | ctxt++) { |
1499 | uctxt = hfi1_rcd_get_by_index(dd, ctxt); |
1500 | if (uctxt) { |
1501 | unsigned long *evs; |
1502 | int i; |
1503 | /* |
1504 | * subctxt_cnt is 0 if not shared, so do base |
1505 | * separately, first, then remaining subctxt, if any |
1506 | */ |
1507 | evs = dd->events + uctxt_offset(uctxt); |
1508 | set_bit(nr: evtbit, addr: evs); |
1509 | for (i = 1; i < uctxt->subctxt_cnt; i++) |
1510 | set_bit(nr: evtbit, addr: evs + i); |
1511 | hfi1_rcd_put(rcd: uctxt); |
1512 | } |
1513 | } |
1514 | |
1515 | return 0; |
1516 | } |
1517 | |
1518 | /** |
1519 | * manage_rcvq - manage a context's receive queue |
1520 | * @uctxt: the context |
1521 | * @subctxt: the sub-context |
1522 | * @arg: start/stop action to carry out |
1523 | * |
1524 | * start_stop == 0 disables receive on the context, for use in queue |
1525 | * overflow conditions. start_stop==1 re-enables, to be used to |
1526 | * re-init the software copy of the head register |
1527 | */ |
1528 | static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, |
1529 | unsigned long arg) |
1530 | { |
1531 | struct hfi1_devdata *dd = uctxt->dd; |
1532 | unsigned int rcvctrl_op; |
1533 | int start_stop; |
1534 | |
1535 | if (subctxt) |
1536 | return 0; |
1537 | |
1538 | if (get_user(start_stop, (int __user *)arg)) |
1539 | return -EFAULT; |
1540 | |
1541 | /* atomically clear receive enable ctxt. */ |
1542 | if (start_stop) { |
1543 | /* |
1544 | * On enable, force in-memory copy of the tail register to |
1545 | * 0, so that protocol code doesn't have to worry about |
1546 | * whether or not the chip has yet updated the in-memory |
1547 | * copy or not on return from the system call. The chip |
1548 | * always resets it's tail register back to 0 on a |
1549 | * transition from disabled to enabled. |
1550 | */ |
1551 | if (hfi1_rcvhdrtail_kvaddr(rcd: uctxt)) |
1552 | clear_rcvhdrtail(rcd: uctxt); |
1553 | rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB; |
1554 | } else { |
1555 | rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; |
1556 | } |
1557 | hfi1_rcvctrl(dd, op: rcvctrl_op, rcd: uctxt); |
1558 | /* always; new head should be equal to new tail; see above */ |
1559 | |
1560 | return 0; |
1561 | } |
1562 | |
1563 | /* |
1564 | * clear the event notifier events for this context. |
1565 | * User process then performs actions appropriate to bit having been |
1566 | * set, if desired, and checks again in future. |
1567 | */ |
1568 | static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, |
1569 | unsigned long arg) |
1570 | { |
1571 | int i; |
1572 | struct hfi1_devdata *dd = uctxt->dd; |
1573 | unsigned long *evs; |
1574 | unsigned long events; |
1575 | |
1576 | if (!dd->events) |
1577 | return 0; |
1578 | |
1579 | if (get_user(events, (unsigned long __user *)arg)) |
1580 | return -EFAULT; |
1581 | |
1582 | evs = dd->events + uctxt_offset(uctxt) + subctxt; |
1583 | |
1584 | for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { |
1585 | if (!test_bit(i, &events)) |
1586 | continue; |
1587 | clear_bit(nr: i, addr: evs); |
1588 | } |
1589 | return 0; |
1590 | } |
1591 | |
1592 | static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) |
1593 | { |
1594 | int i; |
1595 | struct hfi1_pportdata *ppd = uctxt->ppd; |
1596 | struct hfi1_devdata *dd = uctxt->dd; |
1597 | u16 pkey; |
1598 | |
1599 | if (!HFI1_CAP_IS_USET(PKEY_CHECK)) |
1600 | return -EPERM; |
1601 | |
1602 | if (get_user(pkey, (u16 __user *)arg)) |
1603 | return -EFAULT; |
1604 | |
1605 | if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) |
1606 | return -EINVAL; |
1607 | |
1608 | for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) |
1609 | if (pkey == ppd->pkeys[i]) |
1610 | return hfi1_set_ctxt_pkey(dd, ctxt: uctxt, pkey); |
1611 | |
1612 | return -ENOENT; |
1613 | } |
1614 | |
1615 | /** |
1616 | * ctxt_reset - Reset the user context |
1617 | * @uctxt: valid user context |
1618 | */ |
1619 | static int ctxt_reset(struct hfi1_ctxtdata *uctxt) |
1620 | { |
1621 | struct send_context *sc; |
1622 | struct hfi1_devdata *dd; |
1623 | int ret = 0; |
1624 | |
1625 | if (!uctxt || !uctxt->dd || !uctxt->sc) |
1626 | return -EINVAL; |
1627 | |
1628 | /* |
1629 | * There is no protection here. User level has to guarantee that |
1630 | * no one will be writing to the send context while it is being |
1631 | * re-initialized. If user level breaks that guarantee, it will |
1632 | * break it's own context and no one else's. |
1633 | */ |
1634 | dd = uctxt->dd; |
1635 | sc = uctxt->sc; |
1636 | |
1637 | /* |
1638 | * Wait until the interrupt handler has marked the context as |
1639 | * halted or frozen. Report error if we time out. |
1640 | */ |
1641 | wait_event_interruptible_timeout( |
1642 | sc->halt_wait, (sc->flags & SCF_HALTED), |
1643 | msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); |
1644 | if (!(sc->flags & SCF_HALTED)) |
1645 | return -ENOLCK; |
1646 | |
1647 | /* |
1648 | * If the send context was halted due to a Freeze, wait until the |
1649 | * device has been "unfrozen" before resetting the context. |
1650 | */ |
1651 | if (sc->flags & SCF_FROZEN) { |
1652 | wait_event_interruptible_timeout( |
1653 | dd->event_queue, |
1654 | !(READ_ONCE(dd->flags) & HFI1_FROZEN), |
1655 | msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); |
1656 | if (dd->flags & HFI1_FROZEN) |
1657 | return -ENOLCK; |
1658 | |
1659 | if (dd->flags & HFI1_FORCED_FREEZE) |
1660 | /* |
1661 | * Don't allow context reset if we are into |
1662 | * forced freeze |
1663 | */ |
1664 | return -ENODEV; |
1665 | |
1666 | sc_disable(sc); |
1667 | ret = sc_enable(sc); |
1668 | hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, rcd: uctxt); |
1669 | } else { |
1670 | ret = sc_restart(sc); |
1671 | } |
1672 | if (!ret) |
1673 | sc_return_credits(sc); |
1674 | |
1675 | return ret; |
1676 | } |
1677 | |
1678 | static void user_remove(struct hfi1_devdata *dd) |
1679 | { |
1680 | |
1681 | hfi1_cdev_cleanup(cdev: &dd->user_cdev, devp: &dd->user_device); |
1682 | } |
1683 | |
1684 | static int user_add(struct hfi1_devdata *dd) |
1685 | { |
1686 | char name[10]; |
1687 | int ret; |
1688 | |
1689 | snprintf(buf: name, size: sizeof(name), fmt: "%s_%d" , class_name(), dd->unit); |
1690 | ret = hfi1_cdev_init(minor: dd->unit, name, fops: &hfi1_file_ops, |
1691 | cdev: &dd->user_cdev, devp: &dd->user_device, |
1692 | user_accessible: true, parent: &dd->verbs_dev.rdi.ibdev.dev.kobj); |
1693 | if (ret) |
1694 | user_remove(dd); |
1695 | |
1696 | return ret; |
1697 | } |
1698 | |
1699 | /* |
1700 | * Create per-unit files in /dev |
1701 | */ |
1702 | int hfi1_device_create(struct hfi1_devdata *dd) |
1703 | { |
1704 | return user_add(dd); |
1705 | } |
1706 | |
1707 | /* |
1708 | * Remove per-unit files in /dev |
1709 | * void, core kernel returns no errors for this stuff |
1710 | */ |
1711 | void hfi1_device_remove(struct hfi1_devdata *dd) |
1712 | { |
1713 | user_remove(dd); |
1714 | } |
1715 | |