1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ |
3 | #include <linux/init.h> |
4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> |
6 | #include <linux/slab.h> |
7 | #include <linux/pci.h> |
8 | #include <linux/interrupt.h> |
9 | #include <linux/delay.h> |
10 | #include <linux/dma-mapping.h> |
11 | #include <linux/workqueue.h> |
12 | #include <linux/fs.h> |
13 | #include <linux/io-64-nonatomic-lo-hi.h> |
14 | #include <linux/device.h> |
15 | #include <linux/idr.h> |
16 | #include <linux/iommu.h> |
17 | #include <uapi/linux/idxd.h> |
18 | #include <linux/dmaengine.h> |
19 | #include "../dmaengine.h" |
20 | #include "registers.h" |
21 | #include "idxd.h" |
22 | #include "perfmon.h" |
23 | |
24 | MODULE_VERSION(IDXD_DRIVER_VERSION); |
25 | MODULE_LICENSE("GPL v2" ); |
26 | MODULE_AUTHOR("Intel Corporation" ); |
27 | MODULE_IMPORT_NS(IDXD); |
28 | |
29 | static bool sva = true; |
30 | module_param(sva, bool, 0644); |
31 | MODULE_PARM_DESC(sva, "Toggle SVA support on/off" ); |
32 | |
33 | bool tc_override; |
34 | module_param(tc_override, bool, 0644); |
35 | MODULE_PARM_DESC(tc_override, "Override traffic class defaults" ); |
36 | |
37 | #define DRV_NAME "idxd" |
38 | |
39 | bool support_enqcmd; |
40 | DEFINE_IDA(idxd_ida); |
41 | |
42 | static struct idxd_driver_data idxd_driver_data[] = { |
43 | [IDXD_TYPE_DSA] = { |
44 | .name_prefix = "dsa" , |
45 | .type = IDXD_TYPE_DSA, |
46 | .compl_size = sizeof(struct dsa_completion_record), |
47 | .align = 32, |
48 | .dev_type = &dsa_device_type, |
49 | .evl_cr_off = offsetof(struct dsa_evl_entry, cr), |
50 | .cr_status_off = offsetof(struct dsa_completion_record, status), |
51 | .cr_result_off = offsetof(struct dsa_completion_record, result), |
52 | }, |
53 | [IDXD_TYPE_IAX] = { |
54 | .name_prefix = "iax" , |
55 | .type = IDXD_TYPE_IAX, |
56 | .compl_size = sizeof(struct iax_completion_record), |
57 | .align = 64, |
58 | .dev_type = &iax_device_type, |
59 | .evl_cr_off = offsetof(struct iax_evl_entry, cr), |
60 | .cr_status_off = offsetof(struct iax_completion_record, status), |
61 | .cr_result_off = offsetof(struct iax_completion_record, error_code), |
62 | .load_device_defaults = idxd_load_iaa_device_defaults, |
63 | }, |
64 | }; |
65 | |
66 | static struct pci_device_id idxd_pci_tbl[] = { |
67 | /* DSA ver 1.0 platforms */ |
68 | { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) }, |
69 | |
70 | /* IAX ver 1.0 platforms */ |
71 | { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) }, |
72 | { 0, } |
73 | }; |
74 | MODULE_DEVICE_TABLE(pci, idxd_pci_tbl); |
75 | |
76 | static int idxd_setup_interrupts(struct idxd_device *idxd) |
77 | { |
78 | struct pci_dev *pdev = idxd->pdev; |
79 | struct device *dev = &pdev->dev; |
80 | struct idxd_irq_entry *ie; |
81 | int i, msixcnt; |
82 | int rc = 0; |
83 | |
84 | msixcnt = pci_msix_vec_count(dev: pdev); |
85 | if (msixcnt < 0) { |
86 | dev_err(dev, "Not MSI-X interrupt capable.\n" ); |
87 | return -ENOSPC; |
88 | } |
89 | idxd->irq_cnt = msixcnt; |
90 | |
91 | rc = pci_alloc_irq_vectors(dev: pdev, min_vecs: msixcnt, max_vecs: msixcnt, PCI_IRQ_MSIX); |
92 | if (rc != msixcnt) { |
93 | dev_err(dev, "Failed enabling %d MSIX entries: %d\n" , msixcnt, rc); |
94 | return -ENOSPC; |
95 | } |
96 | dev_dbg(dev, "Enabled %d msix vectors\n" , msixcnt); |
97 | |
98 | |
99 | ie = idxd_get_ie(idxd, idx: 0); |
100 | ie->vector = pci_irq_vector(dev: pdev, nr: 0); |
101 | rc = request_threaded_irq(irq: ie->vector, NULL, thread_fn: idxd_misc_thread, flags: 0, name: "idxd-misc" , dev: ie); |
102 | if (rc < 0) { |
103 | dev_err(dev, "Failed to allocate misc interrupt.\n" ); |
104 | goto err_misc_irq; |
105 | } |
106 | dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n" , ie->vector); |
107 | |
108 | for (i = 0; i < idxd->max_wqs; i++) { |
109 | int msix_idx = i + 1; |
110 | |
111 | ie = idxd_get_ie(idxd, idx: msix_idx); |
112 | ie->id = msix_idx; |
113 | ie->int_handle = INVALID_INT_HANDLE; |
114 | ie->pasid = IOMMU_PASID_INVALID; |
115 | |
116 | spin_lock_init(&ie->list_lock); |
117 | init_llist_head(list: &ie->pending_llist); |
118 | INIT_LIST_HEAD(list: &ie->work_list); |
119 | } |
120 | |
121 | idxd_unmask_error_interrupts(idxd); |
122 | return 0; |
123 | |
124 | err_misc_irq: |
125 | idxd_mask_error_interrupts(idxd); |
126 | pci_free_irq_vectors(dev: pdev); |
127 | dev_err(dev, "No usable interrupts\n" ); |
128 | return rc; |
129 | } |
130 | |
131 | static void idxd_cleanup_interrupts(struct idxd_device *idxd) |
132 | { |
133 | struct pci_dev *pdev = idxd->pdev; |
134 | struct idxd_irq_entry *ie; |
135 | int msixcnt; |
136 | |
137 | msixcnt = pci_msix_vec_count(dev: pdev); |
138 | if (msixcnt <= 0) |
139 | return; |
140 | |
141 | ie = idxd_get_ie(idxd, idx: 0); |
142 | idxd_mask_error_interrupts(idxd); |
143 | free_irq(ie->vector, ie); |
144 | pci_free_irq_vectors(dev: pdev); |
145 | } |
146 | |
147 | static int idxd_setup_wqs(struct idxd_device *idxd) |
148 | { |
149 | struct device *dev = &idxd->pdev->dev; |
150 | struct idxd_wq *wq; |
151 | struct device *conf_dev; |
152 | int i, rc; |
153 | |
154 | idxd->wqs = kcalloc_node(n: idxd->max_wqs, size: sizeof(struct idxd_wq *), |
155 | GFP_KERNEL, node: dev_to_node(dev)); |
156 | if (!idxd->wqs) |
157 | return -ENOMEM; |
158 | |
159 | idxd->wq_enable_map = bitmap_zalloc_node(nbits: idxd->max_wqs, GFP_KERNEL, node: dev_to_node(dev)); |
160 | if (!idxd->wq_enable_map) { |
161 | kfree(objp: idxd->wqs); |
162 | return -ENOMEM; |
163 | } |
164 | |
165 | for (i = 0; i < idxd->max_wqs; i++) { |
166 | wq = kzalloc_node(size: sizeof(*wq), GFP_KERNEL, node: dev_to_node(dev)); |
167 | if (!wq) { |
168 | rc = -ENOMEM; |
169 | goto err; |
170 | } |
171 | |
172 | idxd_dev_set_type(idev: &wq->idxd_dev, type: IDXD_DEV_WQ); |
173 | conf_dev = wq_confdev(wq); |
174 | wq->id = i; |
175 | wq->idxd = idxd; |
176 | device_initialize(wq_confdev(wq)); |
177 | conf_dev->parent = idxd_confdev(idxd); |
178 | conf_dev->bus = &dsa_bus_type; |
179 | conf_dev->type = &idxd_wq_device_type; |
180 | rc = dev_set_name(dev: conf_dev, name: "wq%d.%d" , idxd->id, wq->id); |
181 | if (rc < 0) { |
182 | put_device(dev: conf_dev); |
183 | goto err; |
184 | } |
185 | |
186 | mutex_init(&wq->wq_lock); |
187 | init_waitqueue_head(&wq->err_queue); |
188 | init_completion(x: &wq->wq_dead); |
189 | init_completion(x: &wq->wq_resurrect); |
190 | wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; |
191 | idxd_wq_set_max_batch_size(idxd_type: idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH); |
192 | wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; |
193 | wq->wqcfg = kzalloc_node(size: idxd->wqcfg_size, GFP_KERNEL, node: dev_to_node(dev)); |
194 | if (!wq->wqcfg) { |
195 | put_device(dev: conf_dev); |
196 | rc = -ENOMEM; |
197 | goto err; |
198 | } |
199 | |
200 | if (idxd->hw.wq_cap.op_config) { |
201 | wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); |
202 | if (!wq->opcap_bmap) { |
203 | put_device(dev: conf_dev); |
204 | rc = -ENOMEM; |
205 | goto err; |
206 | } |
207 | bitmap_copy(dst: wq->opcap_bmap, src: idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); |
208 | } |
209 | mutex_init(&wq->uc_lock); |
210 | xa_init(xa: &wq->upasid_xa); |
211 | idxd->wqs[i] = wq; |
212 | } |
213 | |
214 | return 0; |
215 | |
216 | err: |
217 | while (--i >= 0) { |
218 | wq = idxd->wqs[i]; |
219 | conf_dev = wq_confdev(wq); |
220 | put_device(dev: conf_dev); |
221 | } |
222 | return rc; |
223 | } |
224 | |
225 | static int idxd_setup_engines(struct idxd_device *idxd) |
226 | { |
227 | struct idxd_engine *engine; |
228 | struct device *dev = &idxd->pdev->dev; |
229 | struct device *conf_dev; |
230 | int i, rc; |
231 | |
232 | idxd->engines = kcalloc_node(n: idxd->max_engines, size: sizeof(struct idxd_engine *), |
233 | GFP_KERNEL, node: dev_to_node(dev)); |
234 | if (!idxd->engines) |
235 | return -ENOMEM; |
236 | |
237 | for (i = 0; i < idxd->max_engines; i++) { |
238 | engine = kzalloc_node(size: sizeof(*engine), GFP_KERNEL, node: dev_to_node(dev)); |
239 | if (!engine) { |
240 | rc = -ENOMEM; |
241 | goto err; |
242 | } |
243 | |
244 | idxd_dev_set_type(idev: &engine->idxd_dev, type: IDXD_DEV_ENGINE); |
245 | conf_dev = engine_confdev(engine); |
246 | engine->id = i; |
247 | engine->idxd = idxd; |
248 | device_initialize(dev: conf_dev); |
249 | conf_dev->parent = idxd_confdev(idxd); |
250 | conf_dev->bus = &dsa_bus_type; |
251 | conf_dev->type = &idxd_engine_device_type; |
252 | rc = dev_set_name(dev: conf_dev, name: "engine%d.%d" , idxd->id, engine->id); |
253 | if (rc < 0) { |
254 | put_device(dev: conf_dev); |
255 | goto err; |
256 | } |
257 | |
258 | idxd->engines[i] = engine; |
259 | } |
260 | |
261 | return 0; |
262 | |
263 | err: |
264 | while (--i >= 0) { |
265 | engine = idxd->engines[i]; |
266 | conf_dev = engine_confdev(engine); |
267 | put_device(dev: conf_dev); |
268 | } |
269 | return rc; |
270 | } |
271 | |
272 | static int idxd_setup_groups(struct idxd_device *idxd) |
273 | { |
274 | struct device *dev = &idxd->pdev->dev; |
275 | struct device *conf_dev; |
276 | struct idxd_group *group; |
277 | int i, rc; |
278 | |
279 | idxd->groups = kcalloc_node(n: idxd->max_groups, size: sizeof(struct idxd_group *), |
280 | GFP_KERNEL, node: dev_to_node(dev)); |
281 | if (!idxd->groups) |
282 | return -ENOMEM; |
283 | |
284 | for (i = 0; i < idxd->max_groups; i++) { |
285 | group = kzalloc_node(size: sizeof(*group), GFP_KERNEL, node: dev_to_node(dev)); |
286 | if (!group) { |
287 | rc = -ENOMEM; |
288 | goto err; |
289 | } |
290 | |
291 | idxd_dev_set_type(idev: &group->idxd_dev, type: IDXD_DEV_GROUP); |
292 | conf_dev = group_confdev(group); |
293 | group->id = i; |
294 | group->idxd = idxd; |
295 | device_initialize(dev: conf_dev); |
296 | conf_dev->parent = idxd_confdev(idxd); |
297 | conf_dev->bus = &dsa_bus_type; |
298 | conf_dev->type = &idxd_group_device_type; |
299 | rc = dev_set_name(dev: conf_dev, name: "group%d.%d" , idxd->id, group->id); |
300 | if (rc < 0) { |
301 | put_device(dev: conf_dev); |
302 | goto err; |
303 | } |
304 | |
305 | idxd->groups[i] = group; |
306 | if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) { |
307 | group->tc_a = 1; |
308 | group->tc_b = 1; |
309 | } else { |
310 | group->tc_a = -1; |
311 | group->tc_b = -1; |
312 | } |
313 | /* |
314 | * The default value is the same as the value of |
315 | * total read buffers in GRPCAP. |
316 | */ |
317 | group->rdbufs_allowed = idxd->max_rdbufs; |
318 | } |
319 | |
320 | return 0; |
321 | |
322 | err: |
323 | while (--i >= 0) { |
324 | group = idxd->groups[i]; |
325 | put_device(group_confdev(group)); |
326 | } |
327 | return rc; |
328 | } |
329 | |
330 | static void idxd_cleanup_internals(struct idxd_device *idxd) |
331 | { |
332 | int i; |
333 | |
334 | for (i = 0; i < idxd->max_groups; i++) |
335 | put_device(group_confdev(idxd->groups[i])); |
336 | for (i = 0; i < idxd->max_engines; i++) |
337 | put_device(engine_confdev(idxd->engines[i])); |
338 | for (i = 0; i < idxd->max_wqs; i++) |
339 | put_device(wq_confdev(idxd->wqs[i])); |
340 | destroy_workqueue(wq: idxd->wq); |
341 | } |
342 | |
343 | static int idxd_init_evl(struct idxd_device *idxd) |
344 | { |
345 | struct device *dev = &idxd->pdev->dev; |
346 | unsigned int evl_cache_size; |
347 | struct idxd_evl *evl; |
348 | const char *idxd_name; |
349 | |
350 | if (idxd->hw.gen_cap.evl_support == 0) |
351 | return 0; |
352 | |
353 | evl = kzalloc_node(size: sizeof(*evl), GFP_KERNEL, node: dev_to_node(dev)); |
354 | if (!evl) |
355 | return -ENOMEM; |
356 | |
357 | spin_lock_init(&evl->lock); |
358 | evl->size = IDXD_EVL_SIZE_MIN; |
359 | |
360 | idxd_name = dev_name(idxd_confdev(idxd)); |
361 | evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd); |
362 | /* |
363 | * Since completion record in evl_cache will be copied to user |
364 | * when handling completion record page fault, need to create |
365 | * the cache suitable for user copy. |
366 | */ |
367 | idxd->evl_cache = kmem_cache_create_usercopy(name: idxd_name, size: evl_cache_size, |
368 | align: 0, flags: 0, useroffset: 0, usersize: evl_cache_size, |
369 | NULL); |
370 | if (!idxd->evl_cache) { |
371 | kfree(objp: evl); |
372 | return -ENOMEM; |
373 | } |
374 | |
375 | idxd->evl = evl; |
376 | return 0; |
377 | } |
378 | |
379 | static int idxd_setup_internals(struct idxd_device *idxd) |
380 | { |
381 | struct device *dev = &idxd->pdev->dev; |
382 | int rc, i; |
383 | |
384 | init_waitqueue_head(&idxd->cmd_waitq); |
385 | |
386 | rc = idxd_setup_wqs(idxd); |
387 | if (rc < 0) |
388 | goto err_wqs; |
389 | |
390 | rc = idxd_setup_engines(idxd); |
391 | if (rc < 0) |
392 | goto err_engine; |
393 | |
394 | rc = idxd_setup_groups(idxd); |
395 | if (rc < 0) |
396 | goto err_group; |
397 | |
398 | idxd->wq = create_workqueue(dev_name(dev)); |
399 | if (!idxd->wq) { |
400 | rc = -ENOMEM; |
401 | goto err_wkq_create; |
402 | } |
403 | |
404 | rc = idxd_init_evl(idxd); |
405 | if (rc < 0) |
406 | goto err_evl; |
407 | |
408 | return 0; |
409 | |
410 | err_evl: |
411 | destroy_workqueue(wq: idxd->wq); |
412 | err_wkq_create: |
413 | for (i = 0; i < idxd->max_groups; i++) |
414 | put_device(group_confdev(idxd->groups[i])); |
415 | err_group: |
416 | for (i = 0; i < idxd->max_engines; i++) |
417 | put_device(engine_confdev(idxd->engines[i])); |
418 | err_engine: |
419 | for (i = 0; i < idxd->max_wqs; i++) |
420 | put_device(wq_confdev(idxd->wqs[i])); |
421 | err_wqs: |
422 | return rc; |
423 | } |
424 | |
425 | static void idxd_read_table_offsets(struct idxd_device *idxd) |
426 | { |
427 | union offsets_reg offsets; |
428 | struct device *dev = &idxd->pdev->dev; |
429 | |
430 | offsets.bits[0] = ioread64(addr: idxd->reg_base + IDXD_TABLE_OFFSET); |
431 | offsets.bits[1] = ioread64(addr: idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64)); |
432 | idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT; |
433 | dev_dbg(dev, "IDXD Group Config Offset: %#x\n" , idxd->grpcfg_offset); |
434 | idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT; |
435 | dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n" , idxd->wqcfg_offset); |
436 | idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT; |
437 | dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n" , idxd->msix_perm_offset); |
438 | idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT; |
439 | dev_dbg(dev, "IDXD Perfmon Offset: %#x\n" , idxd->perfmon_offset); |
440 | } |
441 | |
442 | void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) |
443 | { |
444 | int i, j, nr; |
445 | |
446 | for (i = 0, nr = 0; i < count; i++) { |
447 | for (j = 0; j < BITS_PER_LONG_LONG; j++) { |
448 | if (val[i] & BIT(j)) |
449 | set_bit(nr, addr: bmap); |
450 | nr++; |
451 | } |
452 | } |
453 | } |
454 | |
455 | static void idxd_read_caps(struct idxd_device *idxd) |
456 | { |
457 | struct device *dev = &idxd->pdev->dev; |
458 | int i; |
459 | |
460 | /* reading generic capabilities */ |
461 | idxd->hw.gen_cap.bits = ioread64(addr: idxd->reg_base + IDXD_GENCAP_OFFSET); |
462 | dev_dbg(dev, "gen_cap: %#llx\n" , idxd->hw.gen_cap.bits); |
463 | |
464 | if (idxd->hw.gen_cap.cmd_cap) { |
465 | idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET); |
466 | dev_dbg(dev, "cmd_cap: %#x\n" , idxd->hw.cmd_cap); |
467 | } |
468 | |
469 | /* reading command capabilities */ |
470 | if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) |
471 | idxd->request_int_handles = true; |
472 | |
473 | idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; |
474 | dev_dbg(dev, "max xfer size: %llu bytes\n" , idxd->max_xfer_bytes); |
475 | idxd_set_max_batch_size(idxd_type: idxd->data->type, idxd, max_batch_size: 1U << idxd->hw.gen_cap.max_batch_shift); |
476 | dev_dbg(dev, "max batch size: %u\n" , idxd->max_batch_size); |
477 | if (idxd->hw.gen_cap.config_en) |
478 | set_bit(nr: IDXD_FLAG_CONFIGURABLE, addr: &idxd->flags); |
479 | |
480 | /* reading group capabilities */ |
481 | idxd->hw.group_cap.bits = |
482 | ioread64(addr: idxd->reg_base + IDXD_GRPCAP_OFFSET); |
483 | dev_dbg(dev, "group_cap: %#llx\n" , idxd->hw.group_cap.bits); |
484 | idxd->max_groups = idxd->hw.group_cap.num_groups; |
485 | dev_dbg(dev, "max groups: %u\n" , idxd->max_groups); |
486 | idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs; |
487 | dev_dbg(dev, "max read buffers: %u\n" , idxd->max_rdbufs); |
488 | idxd->nr_rdbufs = idxd->max_rdbufs; |
489 | |
490 | /* read engine capabilities */ |
491 | idxd->hw.engine_cap.bits = |
492 | ioread64(addr: idxd->reg_base + IDXD_ENGCAP_OFFSET); |
493 | dev_dbg(dev, "engine_cap: %#llx\n" , idxd->hw.engine_cap.bits); |
494 | idxd->max_engines = idxd->hw.engine_cap.num_engines; |
495 | dev_dbg(dev, "max engines: %u\n" , idxd->max_engines); |
496 | |
497 | /* read workqueue capabilities */ |
498 | idxd->hw.wq_cap.bits = ioread64(addr: idxd->reg_base + IDXD_WQCAP_OFFSET); |
499 | dev_dbg(dev, "wq_cap: %#llx\n" , idxd->hw.wq_cap.bits); |
500 | idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size; |
501 | dev_dbg(dev, "total workqueue size: %u\n" , idxd->max_wq_size); |
502 | idxd->max_wqs = idxd->hw.wq_cap.num_wqs; |
503 | dev_dbg(dev, "max workqueues: %u\n" , idxd->max_wqs); |
504 | idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN); |
505 | dev_dbg(dev, "wqcfg size: %u\n" , idxd->wqcfg_size); |
506 | |
507 | /* reading operation capabilities */ |
508 | for (i = 0; i < 4; i++) { |
509 | idxd->hw.opcap.bits[i] = ioread64(addr: idxd->reg_base + |
510 | IDXD_OPCAP_OFFSET + i * sizeof(u64)); |
511 | dev_dbg(dev, "opcap[%d]: %#llx\n" , i, idxd->hw.opcap.bits[i]); |
512 | } |
513 | multi_u64_to_bmap(bmap: idxd->opcap_bmap, val: &idxd->hw.opcap.bits[0], count: 4); |
514 | |
515 | /* read iaa cap */ |
516 | if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2) |
517 | idxd->hw.iaa_cap.bits = ioread64(addr: idxd->reg_base + IDXD_IAACAP_OFFSET); |
518 | } |
519 | |
520 | static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) |
521 | { |
522 | struct device *dev = &pdev->dev; |
523 | struct device *conf_dev; |
524 | struct idxd_device *idxd; |
525 | int rc; |
526 | |
527 | idxd = kzalloc_node(size: sizeof(*idxd), GFP_KERNEL, node: dev_to_node(dev)); |
528 | if (!idxd) |
529 | return NULL; |
530 | |
531 | conf_dev = idxd_confdev(idxd); |
532 | idxd->pdev = pdev; |
533 | idxd->data = data; |
534 | idxd_dev_set_type(idev: &idxd->idxd_dev, type: idxd->data->type); |
535 | idxd->id = ida_alloc(ida: &idxd_ida, GFP_KERNEL); |
536 | if (idxd->id < 0) |
537 | return NULL; |
538 | |
539 | idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, node: dev_to_node(dev)); |
540 | if (!idxd->opcap_bmap) { |
541 | ida_free(&idxd_ida, id: idxd->id); |
542 | return NULL; |
543 | } |
544 | |
545 | device_initialize(dev: conf_dev); |
546 | conf_dev->parent = dev; |
547 | conf_dev->bus = &dsa_bus_type; |
548 | conf_dev->type = idxd->data->dev_type; |
549 | rc = dev_set_name(dev: conf_dev, name: "%s%d" , idxd->data->name_prefix, idxd->id); |
550 | if (rc < 0) { |
551 | put_device(dev: conf_dev); |
552 | return NULL; |
553 | } |
554 | |
555 | spin_lock_init(&idxd->dev_lock); |
556 | spin_lock_init(&idxd->cmd_lock); |
557 | |
558 | return idxd; |
559 | } |
560 | |
561 | static int idxd_enable_system_pasid(struct idxd_device *idxd) |
562 | { |
563 | struct pci_dev *pdev = idxd->pdev; |
564 | struct device *dev = &pdev->dev; |
565 | struct iommu_domain *domain; |
566 | ioasid_t pasid; |
567 | int ret; |
568 | |
569 | /* |
570 | * Attach a global PASID to the DMA domain so that we can use ENQCMDS |
571 | * to submit work on buffers mapped by DMA API. |
572 | */ |
573 | domain = iommu_get_domain_for_dev(dev); |
574 | if (!domain) |
575 | return -EPERM; |
576 | |
577 | pasid = iommu_alloc_global_pasid(dev); |
578 | if (pasid == IOMMU_PASID_INVALID) |
579 | return -ENOSPC; |
580 | |
581 | /* |
582 | * DMA domain is owned by the driver, it should support all valid |
583 | * types such as DMA-FQ, identity, etc. |
584 | */ |
585 | ret = iommu_attach_device_pasid(domain, dev, pasid); |
586 | if (ret) { |
587 | dev_err(dev, "failed to attach device pasid %d, domain type %d" , |
588 | pasid, domain->type); |
589 | iommu_free_global_pasid(pasid); |
590 | return ret; |
591 | } |
592 | |
593 | /* Since we set user privilege for kernel DMA, enable completion IRQ */ |
594 | idxd_set_user_intr(idxd, enable: 1); |
595 | idxd->pasid = pasid; |
596 | |
597 | return ret; |
598 | } |
599 | |
600 | static void idxd_disable_system_pasid(struct idxd_device *idxd) |
601 | { |
602 | struct pci_dev *pdev = idxd->pdev; |
603 | struct device *dev = &pdev->dev; |
604 | struct iommu_domain *domain; |
605 | |
606 | domain = iommu_get_domain_for_dev(dev); |
607 | if (!domain) |
608 | return; |
609 | |
610 | iommu_detach_device_pasid(domain, dev, pasid: idxd->pasid); |
611 | iommu_free_global_pasid(pasid: idxd->pasid); |
612 | |
613 | idxd_set_user_intr(idxd, enable: 0); |
614 | idxd->sva = NULL; |
615 | idxd->pasid = IOMMU_PASID_INVALID; |
616 | } |
617 | |
618 | static int idxd_enable_sva(struct pci_dev *pdev) |
619 | { |
620 | int ret; |
621 | |
622 | ret = iommu_dev_enable_feature(dev: &pdev->dev, f: IOMMU_DEV_FEAT_IOPF); |
623 | if (ret) |
624 | return ret; |
625 | |
626 | ret = iommu_dev_enable_feature(dev: &pdev->dev, f: IOMMU_DEV_FEAT_SVA); |
627 | if (ret) |
628 | iommu_dev_disable_feature(dev: &pdev->dev, f: IOMMU_DEV_FEAT_IOPF); |
629 | |
630 | return ret; |
631 | } |
632 | |
633 | static void idxd_disable_sva(struct pci_dev *pdev) |
634 | { |
635 | iommu_dev_disable_feature(dev: &pdev->dev, f: IOMMU_DEV_FEAT_SVA); |
636 | iommu_dev_disable_feature(dev: &pdev->dev, f: IOMMU_DEV_FEAT_IOPF); |
637 | } |
638 | |
639 | static int idxd_probe(struct idxd_device *idxd) |
640 | { |
641 | struct pci_dev *pdev = idxd->pdev; |
642 | struct device *dev = &pdev->dev; |
643 | int rc; |
644 | |
645 | dev_dbg(dev, "%s entered and resetting device\n" , __func__); |
646 | rc = idxd_device_init_reset(idxd); |
647 | if (rc < 0) |
648 | return rc; |
649 | |
650 | dev_dbg(dev, "IDXD reset complete\n" ); |
651 | |
652 | if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { |
653 | if (idxd_enable_sva(pdev)) { |
654 | dev_warn(dev, "Unable to turn on user SVA feature.\n" ); |
655 | } else { |
656 | set_bit(nr: IDXD_FLAG_USER_PASID_ENABLED, addr: &idxd->flags); |
657 | |
658 | rc = idxd_enable_system_pasid(idxd); |
659 | if (rc) |
660 | dev_warn(dev, "No in-kernel DMA with PASID. %d\n" , rc); |
661 | else |
662 | set_bit(nr: IDXD_FLAG_PASID_ENABLED, addr: &idxd->flags); |
663 | } |
664 | } else if (!sva) { |
665 | dev_warn(dev, "User forced SVA off via module param.\n" ); |
666 | } |
667 | |
668 | idxd_read_caps(idxd); |
669 | idxd_read_table_offsets(idxd); |
670 | |
671 | rc = idxd_setup_internals(idxd); |
672 | if (rc) |
673 | goto err; |
674 | |
675 | /* If the configs are readonly, then load them from device */ |
676 | if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { |
677 | dev_dbg(dev, "Loading RO device config\n" ); |
678 | rc = idxd_device_load_config(idxd); |
679 | if (rc < 0) |
680 | goto err_config; |
681 | } |
682 | |
683 | rc = idxd_setup_interrupts(idxd); |
684 | if (rc) |
685 | goto err_config; |
686 | |
687 | idxd->major = idxd_cdev_get_major(idxd); |
688 | |
689 | rc = perfmon_pmu_init(idxd); |
690 | if (rc < 0) |
691 | dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n" , rc); |
692 | |
693 | dev_dbg(dev, "IDXD device %d probed successfully\n" , idxd->id); |
694 | return 0; |
695 | |
696 | err_config: |
697 | idxd_cleanup_internals(idxd); |
698 | err: |
699 | if (device_pasid_enabled(idxd)) |
700 | idxd_disable_system_pasid(idxd); |
701 | if (device_user_pasid_enabled(idxd)) |
702 | idxd_disable_sva(pdev); |
703 | return rc; |
704 | } |
705 | |
706 | static void idxd_cleanup(struct idxd_device *idxd) |
707 | { |
708 | perfmon_pmu_remove(idxd); |
709 | idxd_cleanup_interrupts(idxd); |
710 | idxd_cleanup_internals(idxd); |
711 | if (device_pasid_enabled(idxd)) |
712 | idxd_disable_system_pasid(idxd); |
713 | if (device_user_pasid_enabled(idxd)) |
714 | idxd_disable_sva(pdev: idxd->pdev); |
715 | } |
716 | |
717 | static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
718 | { |
719 | struct device *dev = &pdev->dev; |
720 | struct idxd_device *idxd; |
721 | struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data; |
722 | int rc; |
723 | |
724 | rc = pci_enable_device(dev: pdev); |
725 | if (rc) |
726 | return rc; |
727 | |
728 | dev_dbg(dev, "Alloc IDXD context\n" ); |
729 | idxd = idxd_alloc(pdev, data); |
730 | if (!idxd) { |
731 | rc = -ENOMEM; |
732 | goto err_idxd_alloc; |
733 | } |
734 | |
735 | dev_dbg(dev, "Mapping BARs\n" ); |
736 | idxd->reg_base = pci_iomap(dev: pdev, IDXD_MMIO_BAR, max: 0); |
737 | if (!idxd->reg_base) { |
738 | rc = -ENOMEM; |
739 | goto err_iomap; |
740 | } |
741 | |
742 | dev_dbg(dev, "Set DMA masks\n" ); |
743 | rc = dma_set_mask_and_coherent(dev: &pdev->dev, DMA_BIT_MASK(64)); |
744 | if (rc) |
745 | goto err; |
746 | |
747 | dev_dbg(dev, "Set PCI master\n" ); |
748 | pci_set_master(dev: pdev); |
749 | pci_set_drvdata(pdev, data: idxd); |
750 | |
751 | idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET); |
752 | rc = idxd_probe(idxd); |
753 | if (rc) { |
754 | dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n" ); |
755 | goto err; |
756 | } |
757 | |
758 | if (data->load_device_defaults) { |
759 | rc = data->load_device_defaults(idxd); |
760 | if (rc) |
761 | dev_warn(dev, "IDXD loading device defaults failed\n" ); |
762 | } |
763 | |
764 | rc = idxd_register_devices(idxd); |
765 | if (rc) { |
766 | dev_err(dev, "IDXD sysfs setup failed\n" ); |
767 | goto err_dev_register; |
768 | } |
769 | |
770 | rc = idxd_device_init_debugfs(idxd); |
771 | if (rc) |
772 | dev_warn(dev, "IDXD debugfs failed to setup\n" ); |
773 | |
774 | dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n" , |
775 | idxd->hw.version); |
776 | |
777 | return 0; |
778 | |
779 | err_dev_register: |
780 | idxd_cleanup(idxd); |
781 | err: |
782 | pci_iounmap(dev: pdev, idxd->reg_base); |
783 | err_iomap: |
784 | put_device(idxd_confdev(idxd)); |
785 | err_idxd_alloc: |
786 | pci_disable_device(dev: pdev); |
787 | return rc; |
788 | } |
789 | |
790 | void idxd_wqs_quiesce(struct idxd_device *idxd) |
791 | { |
792 | struct idxd_wq *wq; |
793 | int i; |
794 | |
795 | for (i = 0; i < idxd->max_wqs; i++) { |
796 | wq = idxd->wqs[i]; |
797 | if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) |
798 | idxd_wq_quiesce(wq); |
799 | } |
800 | } |
801 | |
802 | static void idxd_shutdown(struct pci_dev *pdev) |
803 | { |
804 | struct idxd_device *idxd = pci_get_drvdata(pdev); |
805 | struct idxd_irq_entry *irq_entry; |
806 | int rc; |
807 | |
808 | rc = idxd_device_disable(idxd); |
809 | if (rc) |
810 | dev_err(&pdev->dev, "Disabling device failed\n" ); |
811 | |
812 | irq_entry = &idxd->ie; |
813 | synchronize_irq(irq: irq_entry->vector); |
814 | idxd_mask_error_interrupts(idxd); |
815 | flush_workqueue(idxd->wq); |
816 | } |
817 | |
818 | static void idxd_remove(struct pci_dev *pdev) |
819 | { |
820 | struct idxd_device *idxd = pci_get_drvdata(pdev); |
821 | struct idxd_irq_entry *irq_entry; |
822 | |
823 | idxd_unregister_devices(idxd); |
824 | /* |
825 | * When ->release() is called for the idxd->conf_dev, it frees all the memory related |
826 | * to the idxd context. The driver still needs those bits in order to do the rest of |
827 | * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref |
828 | * on the device here to hold off the freeing while allowing the idxd sub-driver |
829 | * to unbind. |
830 | */ |
831 | get_device(idxd_confdev(idxd)); |
832 | device_unregister(idxd_confdev(idxd)); |
833 | idxd_shutdown(pdev); |
834 | if (device_pasid_enabled(idxd)) |
835 | idxd_disable_system_pasid(idxd); |
836 | idxd_device_remove_debugfs(idxd); |
837 | |
838 | irq_entry = idxd_get_ie(idxd, idx: 0); |
839 | free_irq(irq_entry->vector, irq_entry); |
840 | pci_free_irq_vectors(dev: pdev); |
841 | pci_iounmap(dev: pdev, idxd->reg_base); |
842 | if (device_user_pasid_enabled(idxd)) |
843 | idxd_disable_sva(pdev); |
844 | pci_disable_device(dev: pdev); |
845 | destroy_workqueue(wq: idxd->wq); |
846 | perfmon_pmu_remove(idxd); |
847 | put_device(idxd_confdev(idxd)); |
848 | } |
849 | |
850 | static struct pci_driver idxd_pci_driver = { |
851 | .name = DRV_NAME, |
852 | .id_table = idxd_pci_tbl, |
853 | .probe = idxd_pci_probe, |
854 | .remove = idxd_remove, |
855 | .shutdown = idxd_shutdown, |
856 | }; |
857 | |
858 | static int __init idxd_init_module(void) |
859 | { |
860 | int err; |
861 | |
862 | /* |
863 | * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in |
864 | * enumerating the device. We can not utilize it. |
865 | */ |
866 | if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) { |
867 | pr_warn("idxd driver failed to load without MOVDIR64B.\n" ); |
868 | return -ENODEV; |
869 | } |
870 | |
871 | if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) |
872 | pr_warn("Platform does not have ENQCMD(S) support.\n" ); |
873 | else |
874 | support_enqcmd = true; |
875 | |
876 | perfmon_init(); |
877 | |
878 | err = idxd_driver_register(&idxd_drv); |
879 | if (err < 0) |
880 | goto err_idxd_driver_register; |
881 | |
882 | err = idxd_driver_register(&idxd_dmaengine_drv); |
883 | if (err < 0) |
884 | goto err_idxd_dmaengine_driver_register; |
885 | |
886 | err = idxd_driver_register(&idxd_user_drv); |
887 | if (err < 0) |
888 | goto err_idxd_user_driver_register; |
889 | |
890 | err = idxd_cdev_register(); |
891 | if (err) |
892 | goto err_cdev_register; |
893 | |
894 | err = idxd_init_debugfs(); |
895 | if (err) |
896 | goto err_debugfs; |
897 | |
898 | err = pci_register_driver(&idxd_pci_driver); |
899 | if (err) |
900 | goto err_pci_register; |
901 | |
902 | return 0; |
903 | |
904 | err_pci_register: |
905 | idxd_remove_debugfs(); |
906 | err_debugfs: |
907 | idxd_cdev_remove(); |
908 | err_cdev_register: |
909 | idxd_driver_unregister(idxd_drv: &idxd_user_drv); |
910 | err_idxd_user_driver_register: |
911 | idxd_driver_unregister(idxd_drv: &idxd_dmaengine_drv); |
912 | err_idxd_dmaengine_driver_register: |
913 | idxd_driver_unregister(idxd_drv: &idxd_drv); |
914 | err_idxd_driver_register: |
915 | return err; |
916 | } |
917 | module_init(idxd_init_module); |
918 | |
919 | static void __exit idxd_exit_module(void) |
920 | { |
921 | idxd_driver_unregister(idxd_drv: &idxd_user_drv); |
922 | idxd_driver_unregister(idxd_drv: &idxd_dmaengine_drv); |
923 | idxd_driver_unregister(idxd_drv: &idxd_drv); |
924 | pci_unregister_driver(dev: &idxd_pci_driver); |
925 | idxd_cdev_remove(); |
926 | perfmon_exit(); |
927 | idxd_remove_debugfs(); |
928 | } |
929 | module_exit(idxd_exit_module); |
930 | |