1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ |
3 | #include <linux/init.h> |
4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> |
6 | #include <linux/pci.h> |
7 | #include <uapi/linux/idxd.h> |
8 | #include "idxd.h" |
9 | #include "registers.h" |
10 | |
11 | static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) |
12 | { |
13 | struct idxd_desc *desc; |
14 | struct idxd_device *idxd = wq->idxd; |
15 | |
16 | desc = wq->descs[idx]; |
17 | memset(desc->hw, 0, sizeof(struct dsa_hw_desc)); |
18 | memset(desc->completion, 0, idxd->data->compl_size); |
19 | desc->cpu = cpu; |
20 | |
21 | if (device_pasid_enabled(idxd)) |
22 | desc->hw->pasid = idxd->pasid; |
23 | |
24 | return desc; |
25 | } |
26 | |
27 | struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype) |
28 | { |
29 | int cpu, idx; |
30 | struct idxd_device *idxd = wq->idxd; |
31 | DEFINE_SBQ_WAIT(wait); |
32 | struct sbq_wait_state *ws; |
33 | struct sbitmap_queue *sbq; |
34 | |
35 | if (idxd->state != IDXD_DEV_ENABLED) |
36 | return ERR_PTR(error: -EIO); |
37 | |
38 | sbq = &wq->sbq; |
39 | idx = sbitmap_queue_get(sbq, cpu: &cpu); |
40 | if (idx < 0) { |
41 | if (optype == IDXD_OP_NONBLOCK) |
42 | return ERR_PTR(error: -EAGAIN); |
43 | } else { |
44 | return __get_desc(wq, idx, cpu); |
45 | } |
46 | |
47 | ws = &sbq->ws[0]; |
48 | for (;;) { |
49 | sbitmap_prepare_to_wait(sbq, ws, sbq_wait: &wait, TASK_INTERRUPTIBLE); |
50 | if (signal_pending_state(TASK_INTERRUPTIBLE, current)) |
51 | break; |
52 | idx = sbitmap_queue_get(sbq, cpu: &cpu); |
53 | if (idx >= 0) |
54 | break; |
55 | schedule(); |
56 | } |
57 | |
58 | sbitmap_finish_wait(sbq, ws, sbq_wait: &wait); |
59 | if (idx < 0) |
60 | return ERR_PTR(error: -EAGAIN); |
61 | |
62 | return __get_desc(wq, idx, cpu); |
63 | } |
64 | EXPORT_SYMBOL_NS_GPL(idxd_alloc_desc, IDXD); |
65 | |
66 | void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) |
67 | { |
68 | int cpu = desc->cpu; |
69 | |
70 | desc->cpu = -1; |
71 | sbitmap_queue_clear(sbq: &wq->sbq, nr: desc->id, cpu); |
72 | } |
73 | EXPORT_SYMBOL_NS_GPL(idxd_free_desc, IDXD); |
74 | |
75 | static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, |
76 | struct idxd_desc *desc) |
77 | { |
78 | struct idxd_desc *d, *n; |
79 | |
80 | lockdep_assert_held(&ie->list_lock); |
81 | list_for_each_entry_safe(d, n, &ie->work_list, list) { |
82 | if (d == desc) { |
83 | list_del(entry: &d->list); |
84 | return d; |
85 | } |
86 | } |
87 | |
88 | /* |
89 | * At this point, the desc needs to be aborted is held by the completion |
90 | * handler where it has taken it off the pending list but has not added to the |
91 | * work list. It will be cleaned up by the interrupt handler when it sees the |
92 | * IDXD_COMP_DESC_ABORT for completion status. |
93 | */ |
94 | return NULL; |
95 | } |
96 | |
97 | static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, |
98 | struct idxd_desc *desc) |
99 | { |
100 | struct idxd_desc *d, *t, *found = NULL; |
101 | struct llist_node *head; |
102 | LIST_HEAD(flist); |
103 | |
104 | desc->completion->status = IDXD_COMP_DESC_ABORT; |
105 | /* |
106 | * Grab the list lock so it will block the irq thread handler. This allows the |
107 | * abort code to locate the descriptor need to be aborted. |
108 | */ |
109 | spin_lock(lock: &ie->list_lock); |
110 | head = llist_del_all(head: &ie->pending_llist); |
111 | if (head) { |
112 | llist_for_each_entry_safe(d, t, head, llnode) { |
113 | if (d == desc) { |
114 | found = desc; |
115 | continue; |
116 | } |
117 | |
118 | if (d->completion->status) |
119 | list_add_tail(new: &d->list, head: &flist); |
120 | else |
121 | list_add_tail(new: &d->list, head: &ie->work_list); |
122 | } |
123 | } |
124 | |
125 | if (!found) |
126 | found = list_abort_desc(wq, ie, desc); |
127 | spin_unlock(lock: &ie->list_lock); |
128 | |
129 | if (found) |
130 | idxd_dma_complete_txd(desc: found, comp_type: IDXD_COMPLETE_ABORT, free_desc: false, |
131 | NULL, NULL); |
132 | |
133 | /* |
134 | * completing the descriptor will return desc to allocator and |
135 | * the desc can be acquired by a different process and the |
136 | * desc->list can be modified. Delete desc from list so the |
137 | * list trasversing does not get corrupted by the other process. |
138 | */ |
139 | list_for_each_entry_safe(d, t, &flist, list) { |
140 | list_del_init(entry: &d->list); |
141 | idxd_dma_complete_txd(desc: found, comp_type: IDXD_COMPLETE_ABORT, free_desc: true, |
142 | NULL, NULL); |
143 | } |
144 | } |
145 | |
146 | /* |
147 | * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver |
148 | * has better control of number of descriptors being submitted to a shared wq by limiting |
149 | * the number of driver allocated descriptors to the wq size. However, when the swq is |
150 | * exported to a guest kernel, it may be shared with multiple guest kernels. This means |
151 | * the likelihood of getting busy returned on the swq when submitting goes significantly up. |
152 | * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving |
153 | * up. The sysfs knob can be tuned by the system administrator. |
154 | */ |
155 | int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) |
156 | { |
157 | unsigned int retries = wq->enqcmds_retries; |
158 | int rc; |
159 | |
160 | do { |
161 | rc = enqcmds(dst: portal, src: desc); |
162 | if (rc == 0) |
163 | break; |
164 | cpu_relax(); |
165 | } while (retries--); |
166 | |
167 | return rc; |
168 | } |
169 | |
170 | int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) |
171 | { |
172 | struct idxd_device *idxd = wq->idxd; |
173 | struct idxd_irq_entry *ie = NULL; |
174 | u32 desc_flags = desc->hw->flags; |
175 | void __iomem *portal; |
176 | int rc; |
177 | |
178 | if (idxd->state != IDXD_DEV_ENABLED) |
179 | return -EIO; |
180 | |
181 | if (!percpu_ref_tryget_live(ref: &wq->wq_active)) { |
182 | wait_for_completion(&wq->wq_resurrect); |
183 | if (!percpu_ref_tryget_live(ref: &wq->wq_active)) |
184 | return -ENXIO; |
185 | } |
186 | |
187 | portal = idxd_wq_portal_addr(wq); |
188 | |
189 | /* |
190 | * Pending the descriptor to the lockless list for the irq_entry |
191 | * that we designated the descriptor to. |
192 | */ |
193 | if (desc_flags & IDXD_OP_FLAG_RCI) { |
194 | ie = &wq->ie; |
195 | desc->hw->int_handle = ie->int_handle; |
196 | llist_add(new: &desc->llnode, head: &ie->pending_llist); |
197 | } |
198 | |
199 | /* |
200 | * The wmb() flushes writes to coherent DMA data before |
201 | * possibly triggering a DMA read. The wmb() is necessary |
202 | * even on UP because the recipient is a device. |
203 | */ |
204 | wmb(); |
205 | |
206 | if (wq_dedicated(wq)) { |
207 | iosubmit_cmds512(dst: portal, src: desc->hw, count: 1); |
208 | } else { |
209 | rc = idxd_enqcmds(wq, portal, desc: desc->hw); |
210 | if (rc < 0) { |
211 | percpu_ref_put(ref: &wq->wq_active); |
212 | /* abort operation frees the descriptor */ |
213 | if (ie) |
214 | llist_abort_desc(wq, ie, desc); |
215 | return rc; |
216 | } |
217 | } |
218 | |
219 | percpu_ref_put(ref: &wq->wq_active); |
220 | return 0; |
221 | } |
222 | EXPORT_SYMBOL_NS_GPL(idxd_submit_desc, IDXD); |
223 | |