1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ |
3 | #include <linux/init.h> |
4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> |
6 | #include <linux/pci.h> |
7 | #include <linux/io-64-nonatomic-lo-hi.h> |
8 | #include <linux/dmaengine.h> |
9 | #include <linux/delay.h> |
10 | #include <linux/iommu.h> |
11 | #include <linux/sched/mm.h> |
12 | #include <uapi/linux/idxd.h> |
13 | #include "../dmaengine.h" |
14 | #include "idxd.h" |
15 | #include "registers.h" |
16 | |
17 | enum irq_work_type { |
18 | IRQ_WORK_NORMAL = 0, |
19 | IRQ_WORK_PROCESS_FAULT, |
20 | }; |
21 | |
22 | struct idxd_resubmit { |
23 | struct work_struct work; |
24 | struct idxd_desc *desc; |
25 | }; |
26 | |
27 | struct idxd_int_handle_revoke { |
28 | struct work_struct work; |
29 | struct idxd_device *idxd; |
30 | }; |
31 | |
32 | static void idxd_device_reinit(struct work_struct *work) |
33 | { |
34 | struct idxd_device *idxd = container_of(work, struct idxd_device, work); |
35 | struct device *dev = &idxd->pdev->dev; |
36 | int rc, i; |
37 | |
38 | idxd_device_reset(idxd); |
39 | rc = idxd_device_config(idxd); |
40 | if (rc < 0) |
41 | goto out; |
42 | |
43 | rc = idxd_device_enable(idxd); |
44 | if (rc < 0) |
45 | goto out; |
46 | |
47 | for (i = 0; i < idxd->max_wqs; i++) { |
48 | if (test_bit(i, idxd->wq_enable_map)) { |
49 | struct idxd_wq *wq = idxd->wqs[i]; |
50 | |
51 | rc = idxd_wq_enable(wq); |
52 | if (rc < 0) { |
53 | clear_bit(nr: i, addr: idxd->wq_enable_map); |
54 | dev_warn(dev, "Unable to re-enable wq %s\n" , |
55 | dev_name(wq_confdev(wq))); |
56 | } |
57 | } |
58 | } |
59 | |
60 | return; |
61 | |
62 | out: |
63 | idxd_device_clear_state(idxd); |
64 | } |
65 | |
66 | /* |
67 | * The function sends a drain descriptor for the interrupt handle. The drain ensures |
68 | * all descriptors with this interrupt handle is flushed and the interrupt |
69 | * will allow the cleanup of the outstanding descriptors. |
70 | */ |
71 | static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) |
72 | { |
73 | struct idxd_wq *wq = ie_to_wq(ie); |
74 | struct idxd_device *idxd = wq->idxd; |
75 | struct device *dev = &idxd->pdev->dev; |
76 | struct dsa_hw_desc desc = {}; |
77 | void __iomem *portal; |
78 | int rc; |
79 | |
80 | /* Issue a simple drain operation with interrupt but no completion record */ |
81 | desc.flags = IDXD_OP_FLAG_RCI; |
82 | desc.opcode = DSA_OPCODE_DRAIN; |
83 | desc.priv = 1; |
84 | |
85 | if (ie->pasid != IOMMU_PASID_INVALID) |
86 | desc.pasid = ie->pasid; |
87 | desc.int_handle = ie->int_handle; |
88 | portal = idxd_wq_portal_addr(wq); |
89 | |
90 | /* |
91 | * The wmb() makes sure that the descriptor is all there before we |
92 | * issue. |
93 | */ |
94 | wmb(); |
95 | if (wq_dedicated(wq)) { |
96 | iosubmit_cmds512(dst: portal, src: &desc, count: 1); |
97 | } else { |
98 | rc = idxd_enqcmds(wq, portal, desc: &desc); |
99 | /* This should not fail unless hardware failed. */ |
100 | if (rc < 0) |
101 | dev_warn(dev, "Failed to submit drain desc on wq %d\n" , wq->id); |
102 | } |
103 | } |
104 | |
105 | static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) |
106 | { |
107 | LIST_HEAD(flist); |
108 | struct idxd_desc *d, *t; |
109 | struct llist_node *head; |
110 | |
111 | spin_lock(lock: &ie->list_lock); |
112 | head = llist_del_all(head: &ie->pending_llist); |
113 | if (head) { |
114 | llist_for_each_entry_safe(d, t, head, llnode) |
115 | list_add_tail(new: &d->list, head: &ie->work_list); |
116 | } |
117 | |
118 | list_for_each_entry_safe(d, t, &ie->work_list, list) { |
119 | if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) |
120 | list_move_tail(list: &d->list, head: &flist); |
121 | } |
122 | spin_unlock(lock: &ie->list_lock); |
123 | |
124 | list_for_each_entry_safe(d, t, &flist, list) { |
125 | list_del(entry: &d->list); |
126 | idxd_desc_complete(desc: d, comp_type: IDXD_COMPLETE_ABORT, free_desc: true); |
127 | } |
128 | } |
129 | |
130 | static void idxd_int_handle_revoke(struct work_struct *work) |
131 | { |
132 | struct idxd_int_handle_revoke *revoke = |
133 | container_of(work, struct idxd_int_handle_revoke, work); |
134 | struct idxd_device *idxd = revoke->idxd; |
135 | struct pci_dev *pdev = idxd->pdev; |
136 | struct device *dev = &pdev->dev; |
137 | int i, new_handle, rc; |
138 | |
139 | if (!idxd->request_int_handles) { |
140 | kfree(objp: revoke); |
141 | dev_warn(dev, "Unexpected int handle refresh interrupt.\n" ); |
142 | return; |
143 | } |
144 | |
145 | /* |
146 | * The loop attempts to acquire new interrupt handle for all interrupt |
147 | * vectors that supports a handle. If a new interrupt handle is acquired and the |
148 | * wq is kernel type, the driver will kill the percpu_ref to pause all |
149 | * ongoing descriptor submissions. The interrupt handle is then changed. |
150 | * After change, the percpu_ref is revived and all the pending submissions |
151 | * are woken to try again. A drain is sent to for the interrupt handle |
152 | * at the end to make sure all invalid int handle descriptors are processed. |
153 | */ |
154 | for (i = 1; i < idxd->irq_cnt; i++) { |
155 | struct idxd_irq_entry *ie = idxd_get_ie(idxd, idx: i); |
156 | struct idxd_wq *wq = ie_to_wq(ie); |
157 | |
158 | if (ie->int_handle == INVALID_INT_HANDLE) |
159 | continue; |
160 | |
161 | rc = idxd_device_request_int_handle(idxd, idx: i, handle: &new_handle, irq_type: IDXD_IRQ_MSIX); |
162 | if (rc < 0) { |
163 | dev_warn(dev, "get int handle %d failed: %d\n" , i, rc); |
164 | /* |
165 | * Failed to acquire new interrupt handle. Kill the WQ |
166 | * and release all the pending submitters. The submitters will |
167 | * get error return code and handle appropriately. |
168 | */ |
169 | ie->int_handle = INVALID_INT_HANDLE; |
170 | idxd_wq_quiesce(wq); |
171 | idxd_abort_invalid_int_handle_descs(ie); |
172 | continue; |
173 | } |
174 | |
175 | /* No change in interrupt handle, nothing needs to be done */ |
176 | if (ie->int_handle == new_handle) |
177 | continue; |
178 | |
179 | if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { |
180 | /* |
181 | * All the MSIX interrupts are allocated at once during probe. |
182 | * Therefore we need to update all interrupts even if the WQ |
183 | * isn't supporting interrupt operations. |
184 | */ |
185 | ie->int_handle = new_handle; |
186 | continue; |
187 | } |
188 | |
189 | mutex_lock(&wq->wq_lock); |
190 | reinit_completion(x: &wq->wq_resurrect); |
191 | |
192 | /* Kill percpu_ref to pause additional descriptor submissions */ |
193 | percpu_ref_kill(ref: &wq->wq_active); |
194 | |
195 | /* Wait for all submitters quiesce before we change interrupt handle */ |
196 | wait_for_completion(&wq->wq_dead); |
197 | |
198 | ie->int_handle = new_handle; |
199 | |
200 | /* Revive percpu ref and wake up all the waiting submitters */ |
201 | percpu_ref_reinit(ref: &wq->wq_active); |
202 | complete_all(&wq->wq_resurrect); |
203 | mutex_unlock(lock: &wq->wq_lock); |
204 | |
205 | /* |
206 | * The delay here is to wait for all possible MOVDIR64B that |
207 | * are issued before percpu_ref_kill() has happened to have |
208 | * reached the PCIe domain before the drain is issued. The driver |
209 | * needs to ensure that the drain descriptor issued does not pass |
210 | * all the other issued descriptors that contain the invalid |
211 | * interrupt handle in order to ensure that the drain descriptor |
212 | * interrupt will allow the cleanup of all the descriptors with |
213 | * invalid interrupt handle. |
214 | */ |
215 | if (wq_dedicated(wq)) |
216 | udelay(100); |
217 | idxd_int_handle_revoke_drain(ie); |
218 | } |
219 | kfree(objp: revoke); |
220 | } |
221 | |
222 | static void idxd_evl_fault_work(struct work_struct *work) |
223 | { |
224 | struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); |
225 | struct idxd_wq *wq = fault->wq; |
226 | struct idxd_device *idxd = wq->idxd; |
227 | struct device *dev = &idxd->pdev->dev; |
228 | struct idxd_evl *evl = idxd->evl; |
229 | struct __evl_entry *entry_head = fault->entry; |
230 | void *cr = (void *)entry_head + idxd->data->evl_cr_off; |
231 | int cr_size = idxd->data->compl_size; |
232 | u8 *status = (u8 *)cr + idxd->data->cr_status_off; |
233 | u8 *result = (u8 *)cr + idxd->data->cr_result_off; |
234 | int copied, copy_size; |
235 | bool *bf; |
236 | |
237 | switch (fault->status) { |
238 | case DSA_COMP_CRA_XLAT: |
239 | if (entry_head->batch && entry_head->first_err_in_batch) |
240 | evl->batch_fail[entry_head->batch_id] = false; |
241 | |
242 | copy_size = cr_size; |
243 | idxd_user_counter_increment(wq, pasid: entry_head->pasid, index: COUNTER_FAULTS); |
244 | break; |
245 | case DSA_COMP_BATCH_EVL_ERR: |
246 | bf = &evl->batch_fail[entry_head->batch_id]; |
247 | |
248 | copy_size = entry_head->rcr || *bf ? cr_size : 0; |
249 | if (*bf) { |
250 | if (*status == DSA_COMP_SUCCESS) |
251 | *status = DSA_COMP_BATCH_FAIL; |
252 | *result = 1; |
253 | *bf = false; |
254 | } |
255 | idxd_user_counter_increment(wq, pasid: entry_head->pasid, index: COUNTER_FAULTS); |
256 | break; |
257 | case DSA_COMP_DRAIN_EVL: |
258 | copy_size = cr_size; |
259 | break; |
260 | default: |
261 | copy_size = 0; |
262 | dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n" , fault->status); |
263 | break; |
264 | } |
265 | |
266 | if (copy_size == 0) |
267 | return; |
268 | |
269 | /* |
270 | * Copy completion record to fault_addr in user address space |
271 | * that is found by wq and PASID. |
272 | */ |
273 | copied = idxd_copy_cr(wq, pasid: entry_head->pasid, addr: entry_head->fault_addr, |
274 | buf: cr, len: copy_size); |
275 | /* |
276 | * The task that triggered the page fault is unknown currently |
277 | * because multiple threads may share the user address |
278 | * space or the task exits already before this fault. |
279 | * So if the copy fails, SIGSEGV can not be sent to the task. |
280 | * Just print an error for the failure. The user application |
281 | * waiting for the completion record will time out on this |
282 | * failure. |
283 | */ |
284 | switch (fault->status) { |
285 | case DSA_COMP_CRA_XLAT: |
286 | if (copied != copy_size) { |
287 | idxd_user_counter_increment(wq, pasid: entry_head->pasid, index: COUNTER_FAULT_FAILS); |
288 | dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n" , |
289 | copy_size, copied); |
290 | if (entry_head->batch) |
291 | evl->batch_fail[entry_head->batch_id] = true; |
292 | } |
293 | break; |
294 | case DSA_COMP_BATCH_EVL_ERR: |
295 | if (copied != copy_size) { |
296 | idxd_user_counter_increment(wq, pasid: entry_head->pasid, index: COUNTER_FAULT_FAILS); |
297 | dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n" , |
298 | copy_size, copied); |
299 | } |
300 | break; |
301 | case DSA_COMP_DRAIN_EVL: |
302 | if (copied != copy_size) |
303 | dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n" , |
304 | copy_size, copied); |
305 | break; |
306 | } |
307 | |
308 | kmem_cache_free(s: idxd->evl_cache, objp: fault); |
309 | } |
310 | |
311 | static void process_evl_entry(struct idxd_device *idxd, |
312 | struct __evl_entry *entry_head, unsigned int index) |
313 | { |
314 | struct device *dev = &idxd->pdev->dev; |
315 | struct idxd_evl *evl = idxd->evl; |
316 | u8 status; |
317 | |
318 | if (test_bit(index, evl->bmap)) { |
319 | clear_bit(nr: index, addr: evl->bmap); |
320 | } else { |
321 | status = DSA_COMP_STATUS(entry_head->error); |
322 | |
323 | if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || |
324 | status == DSA_COMP_BATCH_EVL_ERR) { |
325 | struct idxd_evl_fault *fault; |
326 | int ent_size = evl_ent_size(idxd); |
327 | |
328 | if (entry_head->rci) |
329 | dev_dbg(dev, "Completion Int Req set, ignoring!\n" ); |
330 | |
331 | if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) |
332 | return; |
333 | |
334 | fault = kmem_cache_alloc(cachep: idxd->evl_cache, GFP_ATOMIC); |
335 | if (fault) { |
336 | struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; |
337 | |
338 | fault->wq = wq; |
339 | fault->status = status; |
340 | memcpy(&fault->entry, entry_head, ent_size); |
341 | INIT_WORK(&fault->work, idxd_evl_fault_work); |
342 | queue_work(wq: wq->wq, work: &fault->work); |
343 | } else { |
344 | dev_warn(dev, "Failed to service fault work.\n" ); |
345 | } |
346 | } else { |
347 | dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n" , |
348 | status, entry_head->operation, |
349 | entry_head->fault_addr); |
350 | } |
351 | } |
352 | } |
353 | |
354 | static void process_evl_entries(struct idxd_device *idxd) |
355 | { |
356 | union evl_status_reg evl_status; |
357 | unsigned int h, t; |
358 | struct idxd_evl *evl = idxd->evl; |
359 | struct __evl_entry *entry_head; |
360 | unsigned int ent_size = evl_ent_size(idxd); |
361 | u32 size; |
362 | |
363 | evl_status.bits = 0; |
364 | evl_status.int_pending = 1; |
365 | |
366 | spin_lock(lock: &evl->lock); |
367 | /* Clear interrupt pending bit */ |
368 | iowrite32(evl_status.bits_upper32, |
369 | idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); |
370 | evl_status.bits = ioread64(addr: idxd->reg_base + IDXD_EVLSTATUS_OFFSET); |
371 | t = evl_status.tail; |
372 | h = evl_status.head; |
373 | size = idxd->evl->size; |
374 | |
375 | while (h != t) { |
376 | entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); |
377 | process_evl_entry(idxd, entry_head, index: h); |
378 | h = (h + 1) % size; |
379 | } |
380 | |
381 | evl_status.head = h; |
382 | iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); |
383 | spin_unlock(lock: &evl->lock); |
384 | } |
385 | |
386 | irqreturn_t idxd_misc_thread(int vec, void *data) |
387 | { |
388 | struct idxd_irq_entry *irq_entry = data; |
389 | struct idxd_device *idxd = ie_to_idxd(ie: irq_entry); |
390 | struct device *dev = &idxd->pdev->dev; |
391 | union gensts_reg gensts; |
392 | u32 val = 0; |
393 | int i; |
394 | bool err = false; |
395 | u32 cause; |
396 | |
397 | cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); |
398 | if (!cause) |
399 | return IRQ_NONE; |
400 | |
401 | iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); |
402 | |
403 | if (cause & IDXD_INTC_HALT_STATE) |
404 | goto halt; |
405 | |
406 | if (cause & IDXD_INTC_ERR) { |
407 | spin_lock(lock: &idxd->dev_lock); |
408 | for (i = 0; i < 4; i++) |
409 | idxd->sw_err.bits[i] = ioread64(addr: idxd->reg_base + |
410 | IDXD_SWERR_OFFSET + i * sizeof(u64)); |
411 | |
412 | iowrite64(val: idxd->sw_err.bits[0] & IDXD_SWERR_ACK, |
413 | addr: idxd->reg_base + IDXD_SWERR_OFFSET); |
414 | |
415 | if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { |
416 | int id = idxd->sw_err.wq_idx; |
417 | struct idxd_wq *wq = idxd->wqs[id]; |
418 | |
419 | if (wq->type == IDXD_WQT_USER) |
420 | wake_up_interruptible(&wq->err_queue); |
421 | } else { |
422 | int i; |
423 | |
424 | for (i = 0; i < idxd->max_wqs; i++) { |
425 | struct idxd_wq *wq = idxd->wqs[i]; |
426 | |
427 | if (wq->type == IDXD_WQT_USER) |
428 | wake_up_interruptible(&wq->err_queue); |
429 | } |
430 | } |
431 | |
432 | spin_unlock(lock: &idxd->dev_lock); |
433 | val |= IDXD_INTC_ERR; |
434 | |
435 | for (i = 0; i < 4; i++) |
436 | dev_warn_ratelimited(dev, "err[%d]: %#16.16llx\n" , |
437 | i, idxd->sw_err.bits[i]); |
438 | err = true; |
439 | } |
440 | |
441 | if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { |
442 | struct idxd_int_handle_revoke *revoke; |
443 | |
444 | val |= IDXD_INTC_INT_HANDLE_REVOKED; |
445 | |
446 | revoke = kzalloc(size: sizeof(*revoke), GFP_ATOMIC); |
447 | if (revoke) { |
448 | revoke->idxd = idxd; |
449 | INIT_WORK(&revoke->work, idxd_int_handle_revoke); |
450 | queue_work(wq: idxd->wq, work: &revoke->work); |
451 | |
452 | } else { |
453 | dev_err(dev, "Failed to allocate work for int handle revoke\n" ); |
454 | idxd_wqs_quiesce(idxd); |
455 | } |
456 | } |
457 | |
458 | if (cause & IDXD_INTC_CMD) { |
459 | val |= IDXD_INTC_CMD; |
460 | complete(idxd->cmd_done); |
461 | } |
462 | |
463 | if (cause & IDXD_INTC_OCCUPY) { |
464 | /* Driver does not utilize occupancy interrupt */ |
465 | val |= IDXD_INTC_OCCUPY; |
466 | } |
467 | |
468 | if (cause & IDXD_INTC_PERFMON_OVFL) { |
469 | val |= IDXD_INTC_PERFMON_OVFL; |
470 | perfmon_counter_overflow(idxd); |
471 | } |
472 | |
473 | if (cause & IDXD_INTC_EVL) { |
474 | val |= IDXD_INTC_EVL; |
475 | process_evl_entries(idxd); |
476 | } |
477 | |
478 | val ^= cause; |
479 | if (val) |
480 | dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n" , |
481 | val); |
482 | |
483 | if (!err) |
484 | goto out; |
485 | |
486 | halt: |
487 | gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); |
488 | if (gensts.state == IDXD_DEVICE_STATE_HALT) { |
489 | idxd->state = IDXD_DEV_HALTED; |
490 | if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) { |
491 | /* |
492 | * If we need a software reset, we will throw the work |
493 | * on a system workqueue in order to allow interrupts |
494 | * for the device command completions. |
495 | */ |
496 | INIT_WORK(&idxd->work, idxd_device_reinit); |
497 | queue_work(wq: idxd->wq, work: &idxd->work); |
498 | } else { |
499 | idxd->state = IDXD_DEV_HALTED; |
500 | idxd_wqs_quiesce(idxd); |
501 | idxd_wqs_unmap_portal(idxd); |
502 | idxd_device_clear_state(idxd); |
503 | dev_err(&idxd->pdev->dev, |
504 | "idxd halted, need %s.\n" , |
505 | gensts.reset_type == IDXD_DEVICE_RESET_FLR ? |
506 | "FLR" : "system reset" ); |
507 | } |
508 | } |
509 | |
510 | out: |
511 | return IRQ_HANDLED; |
512 | } |
513 | |
514 | static void idxd_int_handle_resubmit_work(struct work_struct *work) |
515 | { |
516 | struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); |
517 | struct idxd_desc *desc = irw->desc; |
518 | struct idxd_wq *wq = desc->wq; |
519 | int rc; |
520 | |
521 | desc->completion->status = 0; |
522 | rc = idxd_submit_desc(wq, desc); |
523 | if (rc < 0) { |
524 | dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n" , |
525 | desc->id, wq->id); |
526 | /* |
527 | * If the error is not -EAGAIN, it means the submission failed due to wq |
528 | * has been killed instead of ENQCMDS failure. Here the driver needs to |
529 | * notify the submitter of the failure by reporting abort status. |
530 | * |
531 | * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the |
532 | * abort. |
533 | */ |
534 | if (rc != -EAGAIN) { |
535 | desc->completion->status = IDXD_COMP_DESC_ABORT; |
536 | idxd_desc_complete(desc, comp_type: IDXD_COMPLETE_ABORT, free_desc: false); |
537 | } |
538 | idxd_free_desc(wq, desc); |
539 | } |
540 | kfree(objp: irw); |
541 | } |
542 | |
543 | bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) |
544 | { |
545 | struct idxd_wq *wq = desc->wq; |
546 | struct idxd_device *idxd = wq->idxd; |
547 | struct idxd_resubmit *irw; |
548 | |
549 | irw = kzalloc(size: sizeof(*irw), GFP_KERNEL); |
550 | if (!irw) |
551 | return false; |
552 | |
553 | irw->desc = desc; |
554 | INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); |
555 | queue_work(wq: idxd->wq, work: &irw->work); |
556 | return true; |
557 | } |
558 | |
559 | static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) |
560 | { |
561 | struct idxd_desc *desc, *t; |
562 | struct llist_node *head; |
563 | |
564 | head = llist_del_all(head: &irq_entry->pending_llist); |
565 | if (!head) |
566 | return; |
567 | |
568 | llist_for_each_entry_safe(desc, t, head, llnode) { |
569 | u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; |
570 | |
571 | if (status) { |
572 | /* |
573 | * Check against the original status as ABORT is software defined |
574 | * and 0xff, which DSA_COMP_STATUS_MASK can mask out. |
575 | */ |
576 | if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { |
577 | idxd_desc_complete(desc, comp_type: IDXD_COMPLETE_ABORT, free_desc: true); |
578 | continue; |
579 | } |
580 | |
581 | idxd_desc_complete(desc, comp_type: IDXD_COMPLETE_NORMAL, free_desc: true); |
582 | } else { |
583 | spin_lock(lock: &irq_entry->list_lock); |
584 | list_add_tail(new: &desc->list, |
585 | head: &irq_entry->work_list); |
586 | spin_unlock(lock: &irq_entry->list_lock); |
587 | } |
588 | } |
589 | } |
590 | |
591 | static void irq_process_work_list(struct idxd_irq_entry *irq_entry) |
592 | { |
593 | LIST_HEAD(flist); |
594 | struct idxd_desc *desc, *n; |
595 | |
596 | /* |
597 | * This lock protects list corruption from access of list outside of the irq handler |
598 | * thread. |
599 | */ |
600 | spin_lock(lock: &irq_entry->list_lock); |
601 | if (list_empty(head: &irq_entry->work_list)) { |
602 | spin_unlock(lock: &irq_entry->list_lock); |
603 | return; |
604 | } |
605 | |
606 | list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { |
607 | if (desc->completion->status) { |
608 | list_move_tail(list: &desc->list, head: &flist); |
609 | } |
610 | } |
611 | |
612 | spin_unlock(lock: &irq_entry->list_lock); |
613 | |
614 | list_for_each_entry(desc, &flist, list) { |
615 | /* |
616 | * Check against the original status as ABORT is software defined |
617 | * and 0xff, which DSA_COMP_STATUS_MASK can mask out. |
618 | */ |
619 | if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { |
620 | idxd_desc_complete(desc, comp_type: IDXD_COMPLETE_ABORT, free_desc: true); |
621 | continue; |
622 | } |
623 | |
624 | idxd_desc_complete(desc, comp_type: IDXD_COMPLETE_NORMAL, free_desc: true); |
625 | } |
626 | } |
627 | |
628 | irqreturn_t idxd_wq_thread(int irq, void *data) |
629 | { |
630 | struct idxd_irq_entry *irq_entry = data; |
631 | |
632 | /* |
633 | * There are two lists we are processing. The pending_llist is where |
634 | * submmiter adds all the submitted descriptor after sending it to |
635 | * the workqueue. It's a lockless singly linked list. The work_list |
636 | * is the common linux double linked list. We are in a scenario of |
637 | * multiple producers and a single consumer. The producers are all |
638 | * the kernel submitters of descriptors, and the consumer is the |
639 | * kernel irq handler thread for the msix vector when using threaded |
640 | * irq. To work with the restrictions of llist to remain lockless, |
641 | * we are doing the following steps: |
642 | * 1. Iterate through the work_list and process any completed |
643 | * descriptor. Delete the completed entries during iteration. |
644 | * 2. llist_del_all() from the pending list. |
645 | * 3. Iterate through the llist that was deleted from the pending list |
646 | * and process the completed entries. |
647 | * 4. If the entry is still waiting on hardware, list_add_tail() to |
648 | * the work_list. |
649 | */ |
650 | irq_process_work_list(irq_entry); |
651 | irq_process_pending_llist(irq_entry); |
652 | |
653 | return IRQ_HANDLED; |
654 | } |
655 | |