1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * AMD Passthru DMA device driver |
4 | * -- Based on the CCP driver |
5 | * |
6 | * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. |
7 | * |
8 | * Author: Sanjay R Mehta <sanju.mehta@amd.com> |
9 | * Author: Tom Lendacky <thomas.lendacky@amd.com> |
10 | * Author: Gary R Hook <gary.hook@amd.com> |
11 | */ |
12 | |
13 | #ifndef __PT_DEV_H__ |
14 | #define __PT_DEV_H__ |
15 | |
16 | #include <linux/device.h> |
17 | #include <linux/dmaengine.h> |
18 | #include <linux/pci.h> |
19 | #include <linux/spinlock.h> |
20 | #include <linux/mutex.h> |
21 | #include <linux/list.h> |
22 | #include <linux/wait.h> |
23 | #include <linux/dmapool.h> |
24 | |
25 | #include "../virt-dma.h" |
26 | |
27 | #define MAX_PT_NAME_LEN 16 |
28 | #define MAX_DMAPOOL_NAME_LEN 32 |
29 | |
30 | #define MAX_HW_QUEUES 1 |
31 | #define MAX_CMD_QLEN 100 |
32 | |
33 | #define PT_ENGINE_PASSTHRU 5 |
34 | |
35 | /* Register Mappings */ |
36 | #define IRQ_MASK_REG 0x040 |
37 | #define IRQ_STATUS_REG 0x200 |
38 | |
39 | #define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) |
40 | |
41 | #define CMD_QUEUE_PRIO_OFFSET 0x00 |
42 | #define CMD_REQID_CONFIG_OFFSET 0x04 |
43 | #define CMD_TIMEOUT_OFFSET 0x08 |
44 | #define CMD_PT_VERSION 0x10 |
45 | |
46 | #define CMD_Q_CONTROL_BASE 0x0000 |
47 | #define CMD_Q_TAIL_LO_BASE 0x0004 |
48 | #define CMD_Q_HEAD_LO_BASE 0x0008 |
49 | #define CMD_Q_INT_ENABLE_BASE 0x000C |
50 | #define CMD_Q_INTERRUPT_STATUS_BASE 0x0010 |
51 | |
52 | #define CMD_Q_STATUS_BASE 0x0100 |
53 | #define CMD_Q_INT_STATUS_BASE 0x0104 |
54 | #define CMD_Q_DMA_STATUS_BASE 0x0108 |
55 | #define CMD_Q_DMA_READ_STATUS_BASE 0x010C |
56 | #define CMD_Q_DMA_WRITE_STATUS_BASE 0x0110 |
57 | #define CMD_Q_ABORT_BASE 0x0114 |
58 | #define CMD_Q_AX_CACHE_BASE 0x0118 |
59 | |
60 | #define CMD_CONFIG_OFFSET 0x1120 |
61 | #define CMD_CLK_GATE_CTL_OFFSET 0x6004 |
62 | |
63 | #define CMD_DESC_DW0_VAL 0x500012 |
64 | |
65 | /* Address offset for virtual queue registers */ |
66 | #define CMD_Q_STATUS_INCR 0x1000 |
67 | |
68 | /* Bit masks */ |
69 | #define CMD_CONFIG_REQID 0 |
70 | #define CMD_TIMEOUT_DISABLE 0 |
71 | #define CMD_CLK_DYN_GATING_DIS 0 |
72 | #define CMD_CLK_SW_GATE_MODE 0 |
73 | #define CMD_CLK_GATE_CTL 0 |
74 | #define CMD_QUEUE_PRIO GENMASK(2, 1) |
75 | #define CMD_CONFIG_VHB_EN BIT(0) |
76 | #define CMD_CLK_DYN_GATING_EN BIT(0) |
77 | #define CMD_CLK_HW_GATE_MODE BIT(0) |
78 | #define CMD_CLK_GATE_ON_DELAY BIT(12) |
79 | #define CMD_CLK_GATE_OFF_DELAY BIT(12) |
80 | |
81 | #define CMD_CLK_GATE_CONFIG (CMD_CLK_GATE_CTL | \ |
82 | CMD_CLK_HW_GATE_MODE | \ |
83 | CMD_CLK_GATE_ON_DELAY | \ |
84 | CMD_CLK_DYN_GATING_EN | \ |
85 | CMD_CLK_GATE_OFF_DELAY) |
86 | |
87 | #define CMD_Q_LEN 32 |
88 | #define CMD_Q_RUN BIT(0) |
89 | #define CMD_Q_HALT BIT(1) |
90 | #define CMD_Q_MEM_LOCATION BIT(2) |
91 | #define CMD_Q_SIZE_MASK GENMASK(4, 0) |
92 | #define CMD_Q_SIZE GENMASK(7, 3) |
93 | #define CMD_Q_SHIFT GENMASK(1, 0) |
94 | #define QUEUE_SIZE_VAL ((ffs(CMD_Q_LEN) - 2) & \ |
95 | CMD_Q_SIZE_MASK) |
96 | #define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1) |
97 | #define Q_DESC_SIZE sizeof(struct ptdma_desc) |
98 | #define Q_SIZE(n) (CMD_Q_LEN * (n)) |
99 | |
100 | #define INT_COMPLETION BIT(0) |
101 | #define INT_ERROR BIT(1) |
102 | #define INT_QUEUE_STOPPED BIT(2) |
103 | #define INT_EMPTY_QUEUE BIT(3) |
104 | #define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR) |
105 | |
106 | /****** Local Storage Block ******/ |
107 | #define LSB_START 0 |
108 | #define LSB_END 127 |
109 | #define LSB_COUNT (LSB_END - LSB_START + 1) |
110 | |
111 | #define PT_DMAPOOL_MAX_SIZE 64 |
112 | #define PT_DMAPOOL_ALIGN BIT(5) |
113 | |
114 | #define PT_PASSTHRU_BLOCKSIZE 512 |
115 | |
116 | struct pt_device; |
117 | |
118 | struct pt_tasklet_data { |
119 | struct completion completion; |
120 | struct pt_cmd *cmd; |
121 | }; |
122 | |
123 | /* |
124 | * struct pt_passthru_engine - pass-through operation |
125 | * without performing DMA mapping |
126 | * @mask: mask to be applied to data |
127 | * @mask_len: length in bytes of mask |
128 | * @src_dma: data to be used for this operation |
129 | * @dst_dma: data produced by this operation |
130 | * @src_len: length in bytes of data used for this operation |
131 | * |
132 | * Variables required to be set when calling pt_enqueue_cmd(): |
133 | * - bit_mod, byte_swap, src, dst, src_len |
134 | * - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP |
135 | */ |
136 | struct pt_passthru_engine { |
137 | dma_addr_t mask; |
138 | u32 mask_len; /* In bytes */ |
139 | |
140 | dma_addr_t src_dma, dst_dma; |
141 | u64 src_len; /* In bytes */ |
142 | }; |
143 | |
144 | /* |
145 | * struct pt_cmd - PTDMA operation request |
146 | * @entry: list element |
147 | * @work: work element used for callbacks |
148 | * @pt: PT device to be run on |
149 | * @ret: operation return code |
150 | * @flags: cmd processing flags |
151 | * @engine: PTDMA operation to perform (passthru) |
152 | * @engine_error: PT engine return code |
153 | * @passthru: engine specific structures, refer to specific engine struct below |
154 | * @callback: operation completion callback function |
155 | * @data: parameter value to be supplied to the callback function |
156 | * |
157 | * Variables required to be set when calling pt_enqueue_cmd(): |
158 | * - engine, callback |
159 | * - See the operation structures below for what is required for each |
160 | * operation. |
161 | */ |
162 | struct pt_cmd { |
163 | struct list_head entry; |
164 | struct work_struct work; |
165 | struct pt_device *pt; |
166 | int ret; |
167 | u32 engine; |
168 | u32 engine_error; |
169 | struct pt_passthru_engine passthru; |
170 | /* Completion callback support */ |
171 | void (*pt_cmd_callback)(void *data, int err); |
172 | void *data; |
173 | }; |
174 | |
175 | struct pt_dma_desc { |
176 | struct virt_dma_desc vd; |
177 | struct pt_device *pt; |
178 | enum dma_status status; |
179 | size_t len; |
180 | bool issued_to_hw; |
181 | struct pt_cmd pt_cmd; |
182 | }; |
183 | |
184 | struct pt_dma_chan { |
185 | struct virt_dma_chan vc; |
186 | struct pt_device *pt; |
187 | }; |
188 | |
189 | struct pt_cmd_queue { |
190 | struct pt_device *pt; |
191 | |
192 | /* Queue dma pool */ |
193 | struct dma_pool *dma_pool; |
194 | |
195 | /* Queue base address (not neccessarily aligned)*/ |
196 | struct ptdma_desc *qbase; |
197 | |
198 | /* Aligned queue start address (per requirement) */ |
199 | spinlock_t q_lock ____cacheline_aligned; |
200 | unsigned int qidx; |
201 | |
202 | unsigned int qsize; |
203 | dma_addr_t qbase_dma; |
204 | dma_addr_t qdma_tail; |
205 | |
206 | unsigned int active; |
207 | unsigned int suspended; |
208 | |
209 | /* Interrupt flag */ |
210 | bool int_en; |
211 | |
212 | /* Register addresses for queue */ |
213 | void __iomem *reg_control; |
214 | u32 qcontrol; /* Cached control register */ |
215 | |
216 | /* Status values from job */ |
217 | u32 int_status; |
218 | u32 q_status; |
219 | u32 q_int_status; |
220 | u32 cmd_error; |
221 | /* Queue Statistics */ |
222 | unsigned long total_pt_ops; |
223 | } ____cacheline_aligned; |
224 | |
225 | struct pt_device { |
226 | struct list_head entry; |
227 | |
228 | unsigned int ord; |
229 | char name[MAX_PT_NAME_LEN]; |
230 | |
231 | struct device *dev; |
232 | |
233 | /* Bus specific device information */ |
234 | struct pt_msix *pt_msix; |
235 | |
236 | struct pt_dev_vdata *dev_vdata; |
237 | |
238 | unsigned int pt_irq; |
239 | |
240 | /* I/O area used for device communication */ |
241 | void __iomem *io_regs; |
242 | |
243 | spinlock_t cmd_lock ____cacheline_aligned; |
244 | unsigned int cmd_count; |
245 | struct list_head cmd; |
246 | |
247 | /* |
248 | * The command queue. This represent the queue available on the |
249 | * PTDMA that are available for processing cmds |
250 | */ |
251 | struct pt_cmd_queue cmd_q; |
252 | |
253 | /* Support for the DMA Engine capabilities */ |
254 | struct dma_device dma_dev; |
255 | struct pt_dma_chan *pt_dma_chan; |
256 | struct kmem_cache *dma_cmd_cache; |
257 | struct kmem_cache *dma_desc_cache; |
258 | |
259 | wait_queue_head_t lsb_queue; |
260 | |
261 | /* Device Statistics */ |
262 | unsigned long total_interrupts; |
263 | |
264 | struct pt_tasklet_data tdata; |
265 | }; |
266 | |
267 | /* |
268 | * descriptor for PTDMA commands |
269 | * 8 32-bit words: |
270 | * word 0: function; engine; control bits |
271 | * word 1: length of source data |
272 | * word 2: low 32 bits of source pointer |
273 | * word 3: upper 16 bits of source pointer; source memory type |
274 | * word 4: low 32 bits of destination pointer |
275 | * word 5: upper 16 bits of destination pointer; destination memory type |
276 | * word 6: reserved 32 bits |
277 | * word 7: reserved 32 bits |
278 | */ |
279 | |
280 | #define DWORD0_SOC BIT(0) |
281 | #define DWORD0_IOC BIT(1) |
282 | |
283 | struct dword3 { |
284 | unsigned int src_hi:16; |
285 | unsigned int src_mem:2; |
286 | unsigned int lsb_cxt_id:8; |
287 | unsigned int rsvd1:5; |
288 | unsigned int fixed:1; |
289 | }; |
290 | |
291 | struct dword5 { |
292 | unsigned int dst_hi:16; |
293 | unsigned int dst_mem:2; |
294 | unsigned int rsvd1:13; |
295 | unsigned int fixed:1; |
296 | }; |
297 | |
298 | struct ptdma_desc { |
299 | u32 dw0; |
300 | u32 length; |
301 | u32 src_lo; |
302 | struct dword3 dw3; |
303 | u32 dst_lo; |
304 | struct dword5 dw5; |
305 | __le32 rsvd1; |
306 | __le32 rsvd2; |
307 | }; |
308 | |
309 | /* Structure to hold PT device data */ |
310 | struct pt_dev_vdata { |
311 | const unsigned int bar; |
312 | }; |
313 | |
314 | int pt_dmaengine_register(struct pt_device *pt); |
315 | void pt_dmaengine_unregister(struct pt_device *pt); |
316 | |
317 | void ptdma_debugfs_setup(struct pt_device *pt); |
318 | int pt_core_init(struct pt_device *pt); |
319 | void pt_core_destroy(struct pt_device *pt); |
320 | |
321 | int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, |
322 | struct pt_passthru_engine *pt_engine); |
323 | |
324 | void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q); |
325 | void pt_start_queue(struct pt_cmd_queue *cmd_q); |
326 | void pt_stop_queue(struct pt_cmd_queue *cmd_q); |
327 | |
328 | static inline void pt_core_disable_queue_interrupts(struct pt_device *pt) |
329 | { |
330 | iowrite32(0, pt->cmd_q.reg_control + 0x000C); |
331 | } |
332 | |
333 | static inline void pt_core_enable_queue_interrupts(struct pt_device *pt) |
334 | { |
335 | iowrite32(SUPPORTED_INTERRUPTS, pt->cmd_q.reg_control + 0x000C); |
336 | } |
337 | #endif |
338 | |