1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ |
3 | #include <linux/init.h> |
4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> |
6 | #include <linux/pci.h> |
7 | #include <linux/device.h> |
8 | #include <linux/sched/task.h> |
9 | #include <linux/io-64-nonatomic-lo-hi.h> |
10 | #include <linux/cdev.h> |
11 | #include <linux/fs.h> |
12 | #include <linux/poll.h> |
13 | #include <linux/iommu.h> |
14 | #include <linux/highmem.h> |
15 | #include <uapi/linux/idxd.h> |
16 | #include <linux/xarray.h> |
17 | #include "registers.h" |
18 | #include "idxd.h" |
19 | |
20 | struct idxd_cdev_context { |
21 | const char *name; |
22 | dev_t devt; |
23 | struct ida minor_ida; |
24 | }; |
25 | |
26 | /* |
27 | * Since user file names are global in DSA devices, define their ida's as |
28 | * global to avoid conflict file names. |
29 | */ |
30 | static DEFINE_IDA(file_ida); |
31 | static DEFINE_MUTEX(ida_lock); |
32 | |
33 | /* |
34 | * ictx is an array based off of accelerator types. enum idxd_type |
35 | * is used as index |
36 | */ |
37 | static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = { |
38 | { .name = "dsa" }, |
39 | { .name = "iax" } |
40 | }; |
41 | |
42 | struct idxd_user_context { |
43 | struct idxd_wq *wq; |
44 | struct task_struct *task; |
45 | unsigned int pasid; |
46 | struct mm_struct *mm; |
47 | unsigned int flags; |
48 | struct iommu_sva *sva; |
49 | struct idxd_dev idxd_dev; |
50 | u64 counters[COUNTER_MAX]; |
51 | int id; |
52 | pid_t pid; |
53 | }; |
54 | |
55 | static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); |
56 | static void idxd_xa_pasid_remove(struct idxd_user_context *ctx); |
57 | |
58 | static inline struct idxd_user_context *dev_to_uctx(struct device *dev) |
59 | { |
60 | struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); |
61 | |
62 | return container_of(idxd_dev, struct idxd_user_context, idxd_dev); |
63 | } |
64 | |
65 | static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf) |
66 | { |
67 | struct idxd_user_context *ctx = dev_to_uctx(dev); |
68 | |
69 | return sysfs_emit(buf, fmt: "%llu\n" , ctx->counters[COUNTER_FAULTS]); |
70 | } |
71 | static DEVICE_ATTR_RO(cr_faults); |
72 | |
73 | static ssize_t cr_fault_failures_show(struct device *dev, |
74 | struct device_attribute *attr, char *buf) |
75 | { |
76 | struct idxd_user_context *ctx = dev_to_uctx(dev); |
77 | |
78 | return sysfs_emit(buf, fmt: "%llu\n" , ctx->counters[COUNTER_FAULT_FAILS]); |
79 | } |
80 | static DEVICE_ATTR_RO(cr_fault_failures); |
81 | |
82 | static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf) |
83 | { |
84 | struct idxd_user_context *ctx = dev_to_uctx(dev); |
85 | |
86 | return sysfs_emit(buf, fmt: "%u\n" , ctx->pid); |
87 | } |
88 | static DEVICE_ATTR_RO(pid); |
89 | |
90 | static struct attribute *cdev_file_attributes[] = { |
91 | &dev_attr_cr_faults.attr, |
92 | &dev_attr_cr_fault_failures.attr, |
93 | &dev_attr_pid.attr, |
94 | NULL |
95 | }; |
96 | |
97 | static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n) |
98 | { |
99 | struct device *dev = container_of(kobj, typeof(*dev), kobj); |
100 | struct idxd_user_context *ctx = dev_to_uctx(dev); |
101 | struct idxd_wq *wq = ctx->wq; |
102 | |
103 | if (!wq_pasid_enabled(wq)) |
104 | return 0; |
105 | |
106 | return a->mode; |
107 | } |
108 | |
109 | static const struct attribute_group cdev_file_attribute_group = { |
110 | .attrs = cdev_file_attributes, |
111 | .is_visible = cdev_file_attr_visible, |
112 | }; |
113 | |
114 | static const struct attribute_group *cdev_file_attribute_groups[] = { |
115 | &cdev_file_attribute_group, |
116 | NULL |
117 | }; |
118 | |
119 | static void idxd_file_dev_release(struct device *dev) |
120 | { |
121 | struct idxd_user_context *ctx = dev_to_uctx(dev); |
122 | struct idxd_wq *wq = ctx->wq; |
123 | struct idxd_device *idxd = wq->idxd; |
124 | int rc; |
125 | |
126 | mutex_lock(&ida_lock); |
127 | ida_free(&file_ida, id: ctx->id); |
128 | mutex_unlock(lock: &ida_lock); |
129 | |
130 | /* Wait for in-flight operations to complete. */ |
131 | if (wq_shared(wq)) { |
132 | idxd_device_drain_pasid(idxd, pasid: ctx->pasid); |
133 | } else { |
134 | if (device_user_pasid_enabled(idxd)) { |
135 | /* The wq disable in the disable pasid function will drain the wq */ |
136 | rc = idxd_wq_disable_pasid(wq); |
137 | if (rc < 0) |
138 | dev_err(dev, "wq disable pasid failed.\n" ); |
139 | } else { |
140 | idxd_wq_drain(wq); |
141 | } |
142 | } |
143 | |
144 | if (ctx->sva) { |
145 | idxd_cdev_evl_drain_pasid(wq, pasid: ctx->pasid); |
146 | iommu_sva_unbind_device(handle: ctx->sva); |
147 | idxd_xa_pasid_remove(ctx); |
148 | } |
149 | kfree(objp: ctx); |
150 | mutex_lock(&wq->wq_lock); |
151 | idxd_wq_put(wq); |
152 | mutex_unlock(lock: &wq->wq_lock); |
153 | } |
154 | |
155 | static const struct device_type idxd_cdev_file_type = { |
156 | .name = "idxd_file" , |
157 | .release = idxd_file_dev_release, |
158 | .groups = cdev_file_attribute_groups, |
159 | }; |
160 | |
161 | static void idxd_cdev_dev_release(struct device *dev) |
162 | { |
163 | struct idxd_cdev *idxd_cdev = dev_to_cdev(dev); |
164 | struct idxd_cdev_context *cdev_ctx; |
165 | struct idxd_wq *wq = idxd_cdev->wq; |
166 | |
167 | cdev_ctx = &ictx[wq->idxd->data->type]; |
168 | ida_free(&cdev_ctx->minor_ida, id: idxd_cdev->minor); |
169 | kfree(objp: idxd_cdev); |
170 | } |
171 | |
172 | static const struct device_type idxd_cdev_device_type = { |
173 | .name = "idxd_cdev" , |
174 | .release = idxd_cdev_dev_release, |
175 | }; |
176 | |
177 | static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode) |
178 | { |
179 | struct cdev *cdev = inode->i_cdev; |
180 | |
181 | return container_of(cdev, struct idxd_cdev, cdev); |
182 | } |
183 | |
184 | static inline struct idxd_wq *inode_wq(struct inode *inode) |
185 | { |
186 | struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode); |
187 | |
188 | return idxd_cdev->wq; |
189 | } |
190 | |
191 | static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) |
192 | { |
193 | struct idxd_wq *wq = ctx->wq; |
194 | void *ptr; |
195 | |
196 | mutex_lock(&wq->uc_lock); |
197 | ptr = xa_cmpxchg(xa: &wq->upasid_xa, index: ctx->pasid, old: ctx, NULL, GFP_KERNEL); |
198 | if (ptr != (void *)ctx) |
199 | dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n" , |
200 | ctx->pasid); |
201 | mutex_unlock(lock: &wq->uc_lock); |
202 | } |
203 | |
204 | void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index) |
205 | { |
206 | struct idxd_user_context *ctx; |
207 | |
208 | if (index >= COUNTER_MAX) |
209 | return; |
210 | |
211 | mutex_lock(&wq->uc_lock); |
212 | ctx = xa_load(&wq->upasid_xa, index: pasid); |
213 | if (!ctx) { |
214 | mutex_unlock(lock: &wq->uc_lock); |
215 | return; |
216 | } |
217 | ctx->counters[index]++; |
218 | mutex_unlock(lock: &wq->uc_lock); |
219 | } |
220 | |
221 | static int idxd_cdev_open(struct inode *inode, struct file *filp) |
222 | { |
223 | struct idxd_user_context *ctx; |
224 | struct idxd_device *idxd; |
225 | struct idxd_wq *wq; |
226 | struct device *dev, *fdev; |
227 | int rc = 0; |
228 | struct iommu_sva *sva; |
229 | unsigned int pasid; |
230 | struct idxd_cdev *idxd_cdev; |
231 | |
232 | wq = inode_wq(inode); |
233 | idxd = wq->idxd; |
234 | dev = &idxd->pdev->dev; |
235 | |
236 | dev_dbg(dev, "%s called: %d\n" , __func__, idxd_wq_refcount(wq)); |
237 | |
238 | ctx = kzalloc(size: sizeof(*ctx), GFP_KERNEL); |
239 | if (!ctx) |
240 | return -ENOMEM; |
241 | |
242 | mutex_lock(&wq->wq_lock); |
243 | |
244 | if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq)) { |
245 | rc = -EBUSY; |
246 | goto failed; |
247 | } |
248 | |
249 | ctx->wq = wq; |
250 | filp->private_data = ctx; |
251 | ctx->pid = current->pid; |
252 | |
253 | if (device_user_pasid_enabled(idxd)) { |
254 | sva = iommu_sva_bind_device(dev, current->mm); |
255 | if (IS_ERR(ptr: sva)) { |
256 | rc = PTR_ERR(ptr: sva); |
257 | dev_err(dev, "pasid allocation failed: %d\n" , rc); |
258 | goto failed; |
259 | } |
260 | |
261 | pasid = iommu_sva_get_pasid(handle: sva); |
262 | if (pasid == IOMMU_PASID_INVALID) { |
263 | rc = -EINVAL; |
264 | goto failed_get_pasid; |
265 | } |
266 | |
267 | ctx->sva = sva; |
268 | ctx->pasid = pasid; |
269 | ctx->mm = current->mm; |
270 | |
271 | mutex_lock(&wq->uc_lock); |
272 | rc = xa_insert(xa: &wq->upasid_xa, index: pasid, entry: ctx, GFP_KERNEL); |
273 | mutex_unlock(lock: &wq->uc_lock); |
274 | if (rc < 0) |
275 | dev_warn(dev, "PASID entry already exist in xarray.\n" ); |
276 | |
277 | if (wq_dedicated(wq)) { |
278 | rc = idxd_wq_set_pasid(wq, pasid); |
279 | if (rc < 0) { |
280 | dev_err(dev, "wq set pasid failed: %d\n" , rc); |
281 | goto failed_set_pasid; |
282 | } |
283 | } |
284 | } |
285 | |
286 | idxd_cdev = wq->idxd_cdev; |
287 | mutex_lock(&ida_lock); |
288 | ctx->id = ida_alloc(ida: &file_ida, GFP_KERNEL); |
289 | mutex_unlock(lock: &ida_lock); |
290 | if (ctx->id < 0) { |
291 | dev_warn(dev, "ida alloc failure\n" ); |
292 | goto failed_ida; |
293 | } |
294 | ctx->idxd_dev.type = IDXD_DEV_CDEV_FILE; |
295 | fdev = user_ctx_dev(ctx); |
296 | device_initialize(dev: fdev); |
297 | fdev->parent = cdev_dev(idxd_cdev); |
298 | fdev->bus = &dsa_bus_type; |
299 | fdev->type = &idxd_cdev_file_type; |
300 | |
301 | rc = dev_set_name(dev: fdev, name: "file%d" , ctx->id); |
302 | if (rc < 0) { |
303 | dev_warn(dev, "set name failure\n" ); |
304 | goto failed_dev_name; |
305 | } |
306 | |
307 | rc = device_add(dev: fdev); |
308 | if (rc < 0) { |
309 | dev_warn(dev, "file device add failure\n" ); |
310 | goto failed_dev_add; |
311 | } |
312 | |
313 | idxd_wq_get(wq); |
314 | mutex_unlock(lock: &wq->wq_lock); |
315 | return 0; |
316 | |
317 | failed_dev_add: |
318 | failed_dev_name: |
319 | put_device(dev: fdev); |
320 | failed_ida: |
321 | failed_set_pasid: |
322 | if (device_user_pasid_enabled(idxd)) |
323 | idxd_xa_pasid_remove(ctx); |
324 | failed_get_pasid: |
325 | if (device_user_pasid_enabled(idxd)) |
326 | iommu_sva_unbind_device(handle: sva); |
327 | failed: |
328 | mutex_unlock(lock: &wq->wq_lock); |
329 | kfree(objp: ctx); |
330 | return rc; |
331 | } |
332 | |
333 | static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) |
334 | { |
335 | struct idxd_device *idxd = wq->idxd; |
336 | struct idxd_evl *evl = idxd->evl; |
337 | union evl_status_reg status; |
338 | u16 h, t, size; |
339 | int ent_size = evl_ent_size(idxd); |
340 | struct __evl_entry *entry_head; |
341 | |
342 | if (!evl) |
343 | return; |
344 | |
345 | spin_lock(lock: &evl->lock); |
346 | status.bits = ioread64(addr: idxd->reg_base + IDXD_EVLSTATUS_OFFSET); |
347 | t = status.tail; |
348 | h = status.head; |
349 | size = evl->size; |
350 | |
351 | while (h != t) { |
352 | entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); |
353 | if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id) |
354 | set_bit(nr: h, addr: evl->bmap); |
355 | h = (h + 1) % size; |
356 | } |
357 | spin_unlock(lock: &evl->lock); |
358 | |
359 | drain_workqueue(wq: wq->wq); |
360 | } |
361 | |
362 | static int idxd_cdev_release(struct inode *node, struct file *filep) |
363 | { |
364 | struct idxd_user_context *ctx = filep->private_data; |
365 | struct idxd_wq *wq = ctx->wq; |
366 | struct idxd_device *idxd = wq->idxd; |
367 | struct device *dev = &idxd->pdev->dev; |
368 | |
369 | dev_dbg(dev, "%s called\n" , __func__); |
370 | filep->private_data = NULL; |
371 | |
372 | device_unregister(user_ctx_dev(ctx)); |
373 | |
374 | return 0; |
375 | } |
376 | |
377 | static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma, |
378 | const char *func) |
379 | { |
380 | struct device *dev = &wq->idxd->pdev->dev; |
381 | |
382 | if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { |
383 | dev_info_ratelimited(dev, |
384 | "%s: %s: mapping too large: %lu\n" , |
385 | current->comm, func, |
386 | vma->vm_end - vma->vm_start); |
387 | return -EINVAL; |
388 | } |
389 | |
390 | return 0; |
391 | } |
392 | |
393 | static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) |
394 | { |
395 | struct idxd_user_context *ctx = filp->private_data; |
396 | struct idxd_wq *wq = ctx->wq; |
397 | struct idxd_device *idxd = wq->idxd; |
398 | struct pci_dev *pdev = idxd->pdev; |
399 | phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR); |
400 | unsigned long pfn; |
401 | int rc; |
402 | |
403 | dev_dbg(&pdev->dev, "%s called\n" , __func__); |
404 | rc = check_vma(wq, vma, func: __func__); |
405 | if (rc < 0) |
406 | return rc; |
407 | |
408 | vm_flags_set(vma, VM_DONTCOPY); |
409 | pfn = (base + idxd_get_wq_portal_full_offset(wq_id: wq->id, |
410 | prot: IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT; |
411 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); |
412 | vma->vm_private_data = ctx; |
413 | |
414 | return io_remap_pfn_range(vma, addr: vma->vm_start, pfn, PAGE_SIZE, |
415 | prot: vma->vm_page_prot); |
416 | } |
417 | |
418 | static __poll_t idxd_cdev_poll(struct file *filp, |
419 | struct poll_table_struct *wait) |
420 | { |
421 | struct idxd_user_context *ctx = filp->private_data; |
422 | struct idxd_wq *wq = ctx->wq; |
423 | struct idxd_device *idxd = wq->idxd; |
424 | __poll_t out = 0; |
425 | |
426 | poll_wait(filp, wait_address: &wq->err_queue, p: wait); |
427 | spin_lock(lock: &idxd->dev_lock); |
428 | if (idxd->sw_err.valid) |
429 | out = EPOLLIN | EPOLLRDNORM; |
430 | spin_unlock(lock: &idxd->dev_lock); |
431 | |
432 | return out; |
433 | } |
434 | |
435 | static const struct file_operations idxd_cdev_fops = { |
436 | .owner = THIS_MODULE, |
437 | .open = idxd_cdev_open, |
438 | .release = idxd_cdev_release, |
439 | .mmap = idxd_cdev_mmap, |
440 | .poll = idxd_cdev_poll, |
441 | }; |
442 | |
443 | int idxd_cdev_get_major(struct idxd_device *idxd) |
444 | { |
445 | return MAJOR(ictx[idxd->data->type].devt); |
446 | } |
447 | |
448 | int idxd_wq_add_cdev(struct idxd_wq *wq) |
449 | { |
450 | struct idxd_device *idxd = wq->idxd; |
451 | struct idxd_cdev *idxd_cdev; |
452 | struct cdev *cdev; |
453 | struct device *dev; |
454 | struct idxd_cdev_context *cdev_ctx; |
455 | int rc, minor; |
456 | |
457 | idxd_cdev = kzalloc(size: sizeof(*idxd_cdev), GFP_KERNEL); |
458 | if (!idxd_cdev) |
459 | return -ENOMEM; |
460 | |
461 | idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV; |
462 | idxd_cdev->wq = wq; |
463 | cdev = &idxd_cdev->cdev; |
464 | dev = cdev_dev(idxd_cdev); |
465 | cdev_ctx = &ictx[wq->idxd->data->type]; |
466 | minor = ida_alloc_max(ida: &cdev_ctx->minor_ida, MINORMASK, GFP_KERNEL); |
467 | if (minor < 0) { |
468 | kfree(objp: idxd_cdev); |
469 | return minor; |
470 | } |
471 | idxd_cdev->minor = minor; |
472 | |
473 | device_initialize(dev); |
474 | dev->parent = wq_confdev(wq); |
475 | dev->bus = &dsa_bus_type; |
476 | dev->type = &idxd_cdev_device_type; |
477 | dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor); |
478 | |
479 | rc = dev_set_name(dev, name: "%s/wq%u.%u" , idxd->data->name_prefix, idxd->id, wq->id); |
480 | if (rc < 0) |
481 | goto err; |
482 | |
483 | wq->idxd_cdev = idxd_cdev; |
484 | cdev_init(cdev, &idxd_cdev_fops); |
485 | rc = cdev_device_add(cdev, dev); |
486 | if (rc) { |
487 | dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n" , rc); |
488 | goto err; |
489 | } |
490 | |
491 | return 0; |
492 | |
493 | err: |
494 | put_device(dev); |
495 | wq->idxd_cdev = NULL; |
496 | return rc; |
497 | } |
498 | |
499 | void idxd_wq_del_cdev(struct idxd_wq *wq) |
500 | { |
501 | struct idxd_cdev *idxd_cdev; |
502 | |
503 | idxd_cdev = wq->idxd_cdev; |
504 | ida_destroy(ida: &file_ida); |
505 | wq->idxd_cdev = NULL; |
506 | cdev_device_del(cdev: &idxd_cdev->cdev, cdev_dev(idxd_cdev)); |
507 | put_device(cdev_dev(idxd_cdev)); |
508 | } |
509 | |
510 | static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) |
511 | { |
512 | struct device *dev = &idxd_dev->conf_dev; |
513 | struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); |
514 | struct idxd_device *idxd = wq->idxd; |
515 | int rc; |
516 | |
517 | if (idxd->state != IDXD_DEV_ENABLED) |
518 | return -ENXIO; |
519 | |
520 | /* |
521 | * User type WQ is enabled only when SVA is enabled for two reasons: |
522 | * - If no IOMMU or IOMMU Passthrough without SVA, userspace |
523 | * can directly access physical address through the WQ. |
524 | * - The IDXD cdev driver does not provide any ways to pin |
525 | * user pages and translate the address from user VA to IOVA or |
526 | * PA without IOMMU SVA. Therefore the application has no way |
527 | * to instruct the device to perform DMA function. This makes |
528 | * the cdev not usable for normal application usage. |
529 | */ |
530 | if (!device_user_pasid_enabled(idxd)) { |
531 | idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU; |
532 | dev_dbg(&idxd->pdev->dev, |
533 | "User type WQ cannot be enabled without SVA.\n" ); |
534 | |
535 | return -EOPNOTSUPP; |
536 | } |
537 | |
538 | mutex_lock(&wq->wq_lock); |
539 | |
540 | if (!idxd_wq_driver_name_match(wq, dev)) { |
541 | idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME; |
542 | rc = -ENODEV; |
543 | goto wq_err; |
544 | } |
545 | |
546 | wq->wq = create_workqueue(dev_name(wq_confdev(wq))); |
547 | if (!wq->wq) { |
548 | rc = -ENOMEM; |
549 | goto wq_err; |
550 | } |
551 | |
552 | wq->type = IDXD_WQT_USER; |
553 | rc = idxd_drv_enable_wq(wq); |
554 | if (rc < 0) |
555 | goto err; |
556 | |
557 | rc = idxd_wq_add_cdev(wq); |
558 | if (rc < 0) { |
559 | idxd->cmd_status = IDXD_SCMD_CDEV_ERR; |
560 | goto err_cdev; |
561 | } |
562 | |
563 | idxd->cmd_status = 0; |
564 | mutex_unlock(lock: &wq->wq_lock); |
565 | return 0; |
566 | |
567 | err_cdev: |
568 | idxd_drv_disable_wq(wq); |
569 | err: |
570 | destroy_workqueue(wq: wq->wq); |
571 | wq->type = IDXD_WQT_NONE; |
572 | wq_err: |
573 | mutex_unlock(lock: &wq->wq_lock); |
574 | return rc; |
575 | } |
576 | |
577 | static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) |
578 | { |
579 | struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); |
580 | |
581 | mutex_lock(&wq->wq_lock); |
582 | idxd_wq_del_cdev(wq); |
583 | idxd_drv_disable_wq(wq); |
584 | wq->type = IDXD_WQT_NONE; |
585 | destroy_workqueue(wq: wq->wq); |
586 | wq->wq = NULL; |
587 | mutex_unlock(lock: &wq->wq_lock); |
588 | } |
589 | |
590 | static enum idxd_dev_type dev_types[] = { |
591 | IDXD_DEV_WQ, |
592 | IDXD_DEV_NONE, |
593 | }; |
594 | |
595 | struct idxd_device_driver idxd_user_drv = { |
596 | .probe = idxd_user_drv_probe, |
597 | .remove = idxd_user_drv_remove, |
598 | .name = "user" , |
599 | .type = dev_types, |
600 | }; |
601 | EXPORT_SYMBOL_GPL(idxd_user_drv); |
602 | |
603 | int idxd_cdev_register(void) |
604 | { |
605 | int rc, i; |
606 | |
607 | for (i = 0; i < IDXD_TYPE_MAX; i++) { |
608 | ida_init(ida: &ictx[i].minor_ida); |
609 | rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK, |
610 | ictx[i].name); |
611 | if (rc) |
612 | goto err_free_chrdev_region; |
613 | } |
614 | |
615 | return 0; |
616 | |
617 | err_free_chrdev_region: |
618 | for (i--; i >= 0; i--) |
619 | unregister_chrdev_region(ictx[i].devt, MINORMASK); |
620 | |
621 | return rc; |
622 | } |
623 | |
624 | void idxd_cdev_remove(void) |
625 | { |
626 | int i; |
627 | |
628 | for (i = 0; i < IDXD_TYPE_MAX; i++) { |
629 | unregister_chrdev_region(ictx[i].devt, MINORMASK); |
630 | ida_destroy(ida: &ictx[i].minor_ida); |
631 | } |
632 | } |
633 | |
634 | /** |
635 | * idxd_copy_cr - copy completion record to user address space found by wq and |
636 | * PASID |
637 | * @wq: work queue |
638 | * @pasid: PASID |
639 | * @addr: user fault address to write |
640 | * @cr: completion record |
641 | * @len: number of bytes to copy |
642 | * |
643 | * This is called by a work that handles completion record fault. |
644 | * |
645 | * Return: number of bytes copied. |
646 | */ |
647 | int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, |
648 | void *cr, int len) |
649 | { |
650 | struct device *dev = &wq->idxd->pdev->dev; |
651 | int left = len, status_size = 1; |
652 | struct idxd_user_context *ctx; |
653 | struct mm_struct *mm; |
654 | |
655 | mutex_lock(&wq->uc_lock); |
656 | |
657 | ctx = xa_load(&wq->upasid_xa, index: pasid); |
658 | if (!ctx) { |
659 | dev_warn(dev, "No user context\n" ); |
660 | goto out; |
661 | } |
662 | |
663 | mm = ctx->mm; |
664 | /* |
665 | * The completion record fault handling work is running in kernel |
666 | * thread context. It temporarily switches to the mm to copy cr |
667 | * to addr in the mm. |
668 | */ |
669 | kthread_use_mm(mm); |
670 | left = copy_to_user(to: (void __user *)addr + status_size, from: cr + status_size, |
671 | n: len - status_size); |
672 | /* |
673 | * Copy status only after the rest of completion record is copied |
674 | * successfully so that the user gets the complete completion record |
675 | * when a non-zero status is polled. |
676 | */ |
677 | if (!left) { |
678 | u8 status; |
679 | |
680 | /* |
681 | * Ensure that the completion record's status field is written |
682 | * after the rest of the completion record has been written. |
683 | * This ensures that the user receives the correct completion |
684 | * record information once polling for a non-zero status. |
685 | */ |
686 | wmb(); |
687 | status = *(u8 *)cr; |
688 | if (put_user(status, (u8 __user *)addr)) |
689 | left += status_size; |
690 | } else { |
691 | left += status_size; |
692 | } |
693 | kthread_unuse_mm(mm); |
694 | |
695 | out: |
696 | mutex_unlock(lock: &wq->uc_lock); |
697 | |
698 | return len - left; |
699 | } |
700 | |