1 | /* SPDX-License-Identifier: GPL-2.0-only |
2 | * Copyright (C) 2020 Marvell. |
3 | */ |
4 | |
5 | #ifndef __OTX2_CPT_REQMGR_H |
6 | #define __OTX2_CPT_REQMGR_H |
7 | |
8 | #include "otx2_cpt_common.h" |
9 | |
10 | /* Completion code size and initial value */ |
11 | #define OTX2_CPT_COMPLETION_CODE_SIZE 8 |
12 | #define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE |
13 | /* |
14 | * Maximum total number of SG buffers is 100, we divide it equally |
15 | * between input and output |
16 | */ |
17 | #define OTX2_CPT_MAX_SG_IN_CNT 50 |
18 | #define OTX2_CPT_MAX_SG_OUT_CNT 50 |
19 | |
20 | /* DMA mode direct or SG */ |
21 | #define OTX2_CPT_DMA_MODE_DIRECT 0 |
22 | #define OTX2_CPT_DMA_MODE_SG 1 |
23 | |
24 | /* Context source CPTR or DPTR */ |
25 | #define OTX2_CPT_FROM_CPTR 0 |
26 | #define OTX2_CPT_FROM_DPTR 1 |
27 | |
28 | #define OTX2_CPT_MAX_REQ_SIZE 65535 |
29 | |
30 | #define SG_COMPS_MAX 4 |
31 | #define SGV2_COMPS_MAX 3 |
32 | |
33 | #define SG_COMP_3 3 |
34 | #define SG_COMP_2 2 |
35 | #define SG_COMP_1 1 |
36 | |
37 | union otx2_cpt_opcode { |
38 | u16 flags; |
39 | struct { |
40 | u8 major; |
41 | u8 minor; |
42 | } s; |
43 | }; |
44 | |
45 | struct otx2_cptvf_request { |
46 | u32 param1; |
47 | u32 param2; |
48 | u16 dlen; |
49 | union otx2_cpt_opcode opcode; |
50 | dma_addr_t cptr_dma; |
51 | void *cptr; |
52 | }; |
53 | |
54 | /* |
55 | * CPT_INST_S software command definitions |
56 | * Words EI (0-3) |
57 | */ |
58 | union otx2_cpt_iq_cmd_word0 { |
59 | u64 u; |
60 | struct { |
61 | __be16 opcode; |
62 | __be16 param1; |
63 | __be16 param2; |
64 | __be16 dlen; |
65 | } s; |
66 | }; |
67 | |
68 | union otx2_cpt_iq_cmd_word3 { |
69 | u64 u; |
70 | struct { |
71 | u64 cptr:61; |
72 | u64 grp:3; |
73 | } s; |
74 | }; |
75 | |
76 | struct otx2_cpt_iq_command { |
77 | union otx2_cpt_iq_cmd_word0 cmd; |
78 | u64 dptr; |
79 | u64 rptr; |
80 | union otx2_cpt_iq_cmd_word3 cptr; |
81 | }; |
82 | |
83 | struct otx2_cpt_pending_entry { |
84 | void *completion_addr; /* Completion address */ |
85 | void *info; |
86 | /* Kernel async request callback */ |
87 | void (*callback)(int status, void *arg1, void *arg2); |
88 | struct crypto_async_request *areq; /* Async request callback arg */ |
89 | u8 resume_sender; /* Notify sender to resume sending requests */ |
90 | u8 busy; /* Entry status (free/busy) */ |
91 | }; |
92 | |
93 | struct otx2_cpt_pending_queue { |
94 | struct otx2_cpt_pending_entry *head; /* Head of the queue */ |
95 | u32 front; /* Process work from here */ |
96 | u32 rear; /* Append new work here */ |
97 | u32 pending_count; /* Pending requests count */ |
98 | u32 qlen; /* Queue length */ |
99 | spinlock_t lock; /* Queue lock */ |
100 | }; |
101 | |
102 | struct otx2_cpt_buf_ptr { |
103 | u8 *vptr; |
104 | dma_addr_t dma_addr; |
105 | u16 size; |
106 | }; |
107 | |
108 | union otx2_cpt_ctrl_info { |
109 | u32 flags; |
110 | struct { |
111 | #if defined(__BIG_ENDIAN_BITFIELD) |
112 | u32 reserved_6_31:26; |
113 | u32 grp:3; /* Group bits */ |
114 | u32 dma_mode:2; /* DMA mode */ |
115 | u32 se_req:1; /* To SE core */ |
116 | #else |
117 | u32 se_req:1; /* To SE core */ |
118 | u32 dma_mode:2; /* DMA mode */ |
119 | u32 grp:3; /* Group bits */ |
120 | u32 reserved_6_31:26; |
121 | #endif |
122 | } s; |
123 | }; |
124 | |
125 | struct otx2_cpt_req_info { |
126 | /* Kernel async request callback */ |
127 | void (*callback)(int status, void *arg1, void *arg2); |
128 | struct crypto_async_request *areq; /* Async request callback arg */ |
129 | struct otx2_cptvf_request req;/* Request information (core specific) */ |
130 | union otx2_cpt_ctrl_info ctrl;/* User control information */ |
131 | struct otx2_cpt_buf_ptr in[OTX2_CPT_MAX_SG_IN_CNT]; |
132 | struct otx2_cpt_buf_ptr out[OTX2_CPT_MAX_SG_OUT_CNT]; |
133 | u8 *iv_out; /* IV to send back */ |
134 | u16 rlen; /* Output length */ |
135 | u8 in_cnt; /* Number of input buffers */ |
136 | u8 out_cnt; /* Number of output buffers */ |
137 | u8 req_type; /* Type of request */ |
138 | u8 is_enc; /* Is a request an encryption request */ |
139 | u8 is_trunc_hmac;/* Is truncated hmac used */ |
140 | }; |
141 | |
142 | struct otx2_cpt_inst_info { |
143 | struct otx2_cpt_pending_entry *pentry; |
144 | struct otx2_cpt_req_info *req; |
145 | struct pci_dev *pdev; |
146 | void *completion_addr; |
147 | u8 *out_buffer; |
148 | u8 *in_buffer; |
149 | dma_addr_t dptr_baddr; |
150 | dma_addr_t rptr_baddr; |
151 | dma_addr_t comp_baddr; |
152 | unsigned long time_in; |
153 | u32 dlen; |
154 | u32 dma_len; |
155 | u64 gthr_sz; |
156 | u64 sctr_sz; |
157 | u8 ; |
158 | }; |
159 | |
160 | struct otx2_cpt_sglist_component { |
161 | __be16 len0; |
162 | __be16 len1; |
163 | __be16 len2; |
164 | __be16 len3; |
165 | __be64 ptr0; |
166 | __be64 ptr1; |
167 | __be64 ptr2; |
168 | __be64 ptr3; |
169 | }; |
170 | |
171 | struct cn10kb_cpt_sglist_component { |
172 | u16 len0; |
173 | u16 len1; |
174 | u16 len2; |
175 | u16 valid_segs; |
176 | u64 ptr0; |
177 | u64 ptr1; |
178 | u64 ptr2; |
179 | }; |
180 | |
181 | static inline void otx2_cpt_info_destroy(struct pci_dev *pdev, |
182 | struct otx2_cpt_inst_info *info) |
183 | { |
184 | struct otx2_cpt_req_info *req; |
185 | int i; |
186 | |
187 | if (info->dptr_baddr) |
188 | dma_unmap_single(&pdev->dev, info->dptr_baddr, |
189 | info->dma_len, DMA_BIDIRECTIONAL); |
190 | |
191 | if (info->req) { |
192 | req = info->req; |
193 | for (i = 0; i < req->out_cnt; i++) { |
194 | if (req->out[i].dma_addr) |
195 | dma_unmap_single(&pdev->dev, |
196 | req->out[i].dma_addr, |
197 | req->out[i].size, |
198 | DMA_BIDIRECTIONAL); |
199 | } |
200 | |
201 | for (i = 0; i < req->in_cnt; i++) { |
202 | if (req->in[i].dma_addr) |
203 | dma_unmap_single(&pdev->dev, |
204 | req->in[i].dma_addr, |
205 | req->in[i].size, |
206 | DMA_BIDIRECTIONAL); |
207 | } |
208 | } |
209 | kfree(objp: info); |
210 | } |
211 | |
212 | static inline int setup_sgio_components(struct pci_dev *pdev, |
213 | struct otx2_cpt_buf_ptr *list, |
214 | int buf_count, u8 *buffer) |
215 | { |
216 | struct otx2_cpt_sglist_component *sg_ptr; |
217 | int components; |
218 | int i, j; |
219 | |
220 | if (unlikely(!list)) { |
221 | dev_err(&pdev->dev, "Input list pointer is NULL\n" ); |
222 | return -EINVAL; |
223 | } |
224 | |
225 | for (i = 0; i < buf_count; i++) { |
226 | if (unlikely(!list[i].vptr)) |
227 | continue; |
228 | list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, |
229 | list[i].size, |
230 | DMA_BIDIRECTIONAL); |
231 | if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { |
232 | dev_err(&pdev->dev, "Dma mapping failed\n" ); |
233 | goto sg_cleanup; |
234 | } |
235 | } |
236 | components = buf_count / SG_COMPS_MAX; |
237 | sg_ptr = (struct otx2_cpt_sglist_component *)buffer; |
238 | for (i = 0; i < components; i++) { |
239 | sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size); |
240 | sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size); |
241 | sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size); |
242 | sg_ptr->len3 = cpu_to_be16(list[i * SG_COMPS_MAX + 3].size); |
243 | sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr); |
244 | sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr); |
245 | sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr); |
246 | sg_ptr->ptr3 = cpu_to_be64(list[i * SG_COMPS_MAX + 3].dma_addr); |
247 | sg_ptr++; |
248 | } |
249 | components = buf_count % SG_COMPS_MAX; |
250 | |
251 | switch (components) { |
252 | case SG_COMP_3: |
253 | sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size); |
254 | sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr); |
255 | fallthrough; |
256 | case SG_COMP_2: |
257 | sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size); |
258 | sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr); |
259 | fallthrough; |
260 | case SG_COMP_1: |
261 | sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size); |
262 | sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr); |
263 | break; |
264 | default: |
265 | break; |
266 | } |
267 | return 0; |
268 | |
269 | sg_cleanup: |
270 | for (j = 0; j < i; j++) { |
271 | if (list[j].dma_addr) { |
272 | dma_unmap_single(&pdev->dev, list[j].dma_addr, |
273 | list[j].size, DMA_BIDIRECTIONAL); |
274 | } |
275 | |
276 | list[j].dma_addr = 0; |
277 | } |
278 | return -EIO; |
279 | } |
280 | |
281 | static inline int sgv2io_components_setup(struct pci_dev *pdev, |
282 | struct otx2_cpt_buf_ptr *list, |
283 | int buf_count, u8 *buffer) |
284 | { |
285 | struct cn10kb_cpt_sglist_component *sg_ptr; |
286 | int components; |
287 | int i, j; |
288 | |
289 | if (unlikely(!list)) { |
290 | dev_err(&pdev->dev, "Input list pointer is NULL\n" ); |
291 | return -EFAULT; |
292 | } |
293 | |
294 | for (i = 0; i < buf_count; i++) { |
295 | if (unlikely(!list[i].vptr)) |
296 | continue; |
297 | list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, |
298 | list[i].size, |
299 | DMA_BIDIRECTIONAL); |
300 | if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { |
301 | dev_err(&pdev->dev, "Dma mapping failed\n" ); |
302 | goto sg_cleanup; |
303 | } |
304 | } |
305 | components = buf_count / SGV2_COMPS_MAX; |
306 | sg_ptr = (struct cn10kb_cpt_sglist_component *)buffer; |
307 | for (i = 0; i < components; i++) { |
308 | sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size; |
309 | sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size; |
310 | sg_ptr->len2 = list[i * SGV2_COMPS_MAX + 2].size; |
311 | sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr; |
312 | sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr; |
313 | sg_ptr->ptr2 = list[i * SGV2_COMPS_MAX + 2].dma_addr; |
314 | sg_ptr->valid_segs = SGV2_COMPS_MAX; |
315 | sg_ptr++; |
316 | } |
317 | components = buf_count % SGV2_COMPS_MAX; |
318 | |
319 | sg_ptr->valid_segs = components; |
320 | switch (components) { |
321 | case SG_COMP_2: |
322 | sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size; |
323 | sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr; |
324 | fallthrough; |
325 | case SG_COMP_1: |
326 | sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size; |
327 | sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr; |
328 | break; |
329 | default: |
330 | break; |
331 | } |
332 | return 0; |
333 | |
334 | sg_cleanup: |
335 | for (j = 0; j < i; j++) { |
336 | if (list[j].dma_addr) { |
337 | dma_unmap_single(&pdev->dev, list[j].dma_addr, |
338 | list[j].size, DMA_BIDIRECTIONAL); |
339 | } |
340 | |
341 | list[j].dma_addr = 0; |
342 | } |
343 | return -EIO; |
344 | } |
345 | |
346 | static inline struct otx2_cpt_inst_info * |
347 | cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, |
348 | gfp_t gfp) |
349 | { |
350 | u32 dlen = 0, g_len, sg_len, info_len; |
351 | int align = OTX2_CPT_DMA_MINALIGN; |
352 | struct otx2_cpt_inst_info *info; |
353 | u16 g_sz_bytes, s_sz_bytes; |
354 | u32 total_mem_len; |
355 | int i; |
356 | |
357 | g_sz_bytes = ((req->in_cnt + 2) / 3) * |
358 | sizeof(struct cn10kb_cpt_sglist_component); |
359 | s_sz_bytes = ((req->out_cnt + 2) / 3) * |
360 | sizeof(struct cn10kb_cpt_sglist_component); |
361 | |
362 | g_len = ALIGN(g_sz_bytes, align); |
363 | sg_len = ALIGN(g_len + s_sz_bytes, align); |
364 | info_len = ALIGN(sizeof(*info), align); |
365 | total_mem_len = sg_len + info_len + sizeof(union otx2_cpt_res_s); |
366 | |
367 | info = kzalloc(size: total_mem_len, flags: gfp); |
368 | if (unlikely(!info)) |
369 | return NULL; |
370 | |
371 | for (i = 0; i < req->in_cnt; i++) |
372 | dlen += req->in[i].size; |
373 | |
374 | info->dlen = dlen; |
375 | info->in_buffer = (u8 *)info + info_len; |
376 | info->gthr_sz = req->in_cnt; |
377 | info->sctr_sz = req->out_cnt; |
378 | |
379 | /* Setup gather (input) components */ |
380 | if (sgv2io_components_setup(pdev, list: req->in, buf_count: req->in_cnt, |
381 | buffer: info->in_buffer)) { |
382 | dev_err(&pdev->dev, "Failed to setup gather list\n" ); |
383 | goto destroy_info; |
384 | } |
385 | |
386 | if (sgv2io_components_setup(pdev, list: req->out, buf_count: req->out_cnt, |
387 | buffer: &info->in_buffer[g_len])) { |
388 | dev_err(&pdev->dev, "Failed to setup scatter list\n" ); |
389 | goto destroy_info; |
390 | } |
391 | |
392 | info->dma_len = total_mem_len - info_len; |
393 | info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, |
394 | info->dma_len, DMA_BIDIRECTIONAL); |
395 | if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { |
396 | dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n" ); |
397 | goto destroy_info; |
398 | } |
399 | info->rptr_baddr = info->dptr_baddr + g_len; |
400 | /* |
401 | * Get buffer for union otx2_cpt_res_s response |
402 | * structure and its physical address |
403 | */ |
404 | info->completion_addr = info->in_buffer + sg_len; |
405 | info->comp_baddr = info->dptr_baddr + sg_len; |
406 | |
407 | return info; |
408 | |
409 | destroy_info: |
410 | otx2_cpt_info_destroy(pdev, info); |
411 | return NULL; |
412 | } |
413 | |
414 | /* SG list header size in bytes */ |
415 | #define SG_LIST_HDR_SIZE 8 |
416 | static inline struct otx2_cpt_inst_info * |
417 | otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, |
418 | gfp_t gfp) |
419 | { |
420 | int align = OTX2_CPT_DMA_MINALIGN; |
421 | struct otx2_cpt_inst_info *info; |
422 | u32 dlen, align_dlen, info_len; |
423 | u16 g_sz_bytes, s_sz_bytes; |
424 | u32 total_mem_len; |
425 | |
426 | if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || |
427 | req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) { |
428 | dev_err(&pdev->dev, "Error too many sg components\n" ); |
429 | return NULL; |
430 | } |
431 | |
432 | g_sz_bytes = ((req->in_cnt + 3) / 4) * |
433 | sizeof(struct otx2_cpt_sglist_component); |
434 | s_sz_bytes = ((req->out_cnt + 3) / 4) * |
435 | sizeof(struct otx2_cpt_sglist_component); |
436 | |
437 | dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; |
438 | align_dlen = ALIGN(dlen, align); |
439 | info_len = ALIGN(sizeof(*info), align); |
440 | total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); |
441 | |
442 | info = kzalloc(size: total_mem_len, flags: gfp); |
443 | if (unlikely(!info)) |
444 | return NULL; |
445 | |
446 | info->dlen = dlen; |
447 | info->in_buffer = (u8 *)info + info_len; |
448 | |
449 | ((u16 *)info->in_buffer)[0] = req->out_cnt; |
450 | ((u16 *)info->in_buffer)[1] = req->in_cnt; |
451 | ((u16 *)info->in_buffer)[2] = 0; |
452 | ((u16 *)info->in_buffer)[3] = 0; |
453 | cpu_to_be64s((u64 *)info->in_buffer); |
454 | |
455 | /* Setup gather (input) components */ |
456 | if (setup_sgio_components(pdev, list: req->in, buf_count: req->in_cnt, |
457 | buffer: &info->in_buffer[8])) { |
458 | dev_err(&pdev->dev, "Failed to setup gather list\n" ); |
459 | goto destroy_info; |
460 | } |
461 | |
462 | if (setup_sgio_components(pdev, list: req->out, buf_count: req->out_cnt, |
463 | buffer: &info->in_buffer[8 + g_sz_bytes])) { |
464 | dev_err(&pdev->dev, "Failed to setup scatter list\n" ); |
465 | goto destroy_info; |
466 | } |
467 | |
468 | info->dma_len = total_mem_len - info_len; |
469 | info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, |
470 | info->dma_len, DMA_BIDIRECTIONAL); |
471 | if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { |
472 | dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n" ); |
473 | goto destroy_info; |
474 | } |
475 | /* |
476 | * Get buffer for union otx2_cpt_res_s response |
477 | * structure and its physical address |
478 | */ |
479 | info->completion_addr = info->in_buffer + align_dlen; |
480 | info->comp_baddr = info->dptr_baddr + align_dlen; |
481 | |
482 | return info; |
483 | |
484 | destroy_info: |
485 | otx2_cpt_info_destroy(pdev, info); |
486 | return NULL; |
487 | } |
488 | |
489 | struct otx2_cptlf_wqe; |
490 | int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, |
491 | int cpu_num); |
492 | void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe); |
493 | int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev); |
494 | |
495 | #endif /* __OTX2_CPT_REQMGR_H */ |
496 | |