otx_cptvf_reqmgr.c source code [linux/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Marvell OcteonTX CPT driver*
3	*
4	* Copyright (C) 2019 Marvell International Ltd.
5	*
6	* This program is free software; you can redistribute it and/or modify
7	* it under the terms of the GNU General Public License version 2 as
8	* published by the Free Software Foundation.
9	*/
10
11	#include "otx_cptvf.h"
12	#include "otx_cptvf_algs.h"
13
14	/ Completion code size and initial value /
15	#define COMPLETION_CODE_SIZE 8
16	#define COMPLETION_CODE_INIT 0
17
18	/ SG list header size in bytes /
19	#define SG_LIST_HDR_SIZE 8
20
21	/ Default timeout when waiting for free pending entry in us /
22	#define CPT_PENTRY_TIMEOUT 1000
23	#define CPT_PENTRY_STEP 50
24
25	/ Default threshold for stopping and resuming sender requests /
26	#define CPT_IQ_STOP_MARGIN 128
27	#define CPT_IQ_RESUME_MARGIN 512
28
29	#define CPT_DMA_ALIGN 128
30
31	void otx_cpt_dump_sg_list(struct pci_dev pdev, struct* otx_cpt_req_info *req)
32	{
33	int i;
34
35	pr_debug("Gather list size %d\n", req->incnt);
36	for (i = `0`; i < req->incnt; i++) {
37	pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
38	req->in[i].size, req->in[i].vptr,
39	(void *) req->in[i].dma_addr);
40	pr_debug("Buffer hexdump (%d bytes)\n",
41	req->in[i].size);
42	print_hex_dump_debug("", DUMP_PREFIX_NONE, `16`, `1`,
43	req->in[i].vptr, req->in[i].size, false);
44	}
45
46	pr_debug("Scatter list size %d\n", req->outcnt);
47	for (i = `0`; i < req->outcnt; i++) {
48	pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
49	req->out[i].size, req->out[i].vptr,
50	(void *) req->out[i].dma_addr);
51	pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size);
52	print_hex_dump_debug("", DUMP_PREFIX_NONE, `16`, `1`,
53	req->out[i].vptr, req->out[i].size, false);
54	}
55	}
56
57	static inline struct otx_cpt_pending_entry *get_free_pending_entry(
58	struct otx_cpt_pending_queue *q,
59	int qlen)
60	{
61	struct otx_cpt_pending_entry *ent = NULL;
62
63	ent = &q->head[q->rear];
64	if (unlikely(ent->busy))
65	return NULL;
66
67	q->rear++;
68	if (unlikely(q->rear == qlen))
69	q->rear = `0`;
70
71	return ent;
72	}
73
74	static inline u32 modulo_inc(u32 index, u32 length, u32 inc)
75	{
76	if (WARN_ON(inc > length))
77	inc = length;
78
79	index += inc;
80	if (unlikely(index >= length))
81	index -= length;
82
83	return index;
84	}
85
86	static inline void free_pentry(struct otx_cpt_pending_entry *pentry)
87	{
88	pentry->completion_addr = NULL;
89	pentry->info = NULL;
90	pentry->callback = NULL;
91	pentry->areq = NULL;
92	pentry->resume_sender = false;
93	pentry->busy = false;
94	}
95
96	static inline int setup_sgio_components(struct pci_dev *pdev,
97	struct otx_cpt_buf_ptr *list,
98	int buf_count, u8 *buffer)
99	{
100	struct otx_cpt_sglist_component *sg_ptr = NULL;
101	int ret = `0`, i, j;
102	int components;
103
104	if (unlikely(!list)) {
105	dev_err(&pdev->dev, "Input list pointer is NULL\n");
106	return -EFAULT;
107	}
108
109	for (i = `0`; i < buf_count; i++) {
110	if (likely(list[i].vptr)) {
111	list[i].dma_addr = dma_map_single(&pdev->dev,
112	list[i].vptr,
113	list[i].size,
114	DMA_BIDIRECTIONAL);
115	if (unlikely(dma_mapping_error(&pdev->dev,
116	list[i].dma_addr))) {
117	dev_err(&pdev->dev, "Dma mapping failed\n");
118	ret = -EIO;
119	goto sg_cleanup;
120	}
121	}
122	}
123
124	components = buf_count / `4`;
125	sg_ptr = (struct otx_cpt_sglist_component *)buffer;
126	for (i = `0`; i < components; i++) {
127	sg_ptr->u.s.len0 = cpu_to_be16(list[i * `4` + `0`].size);
128	sg_ptr->u.s.len1 = cpu_to_be16(list[i * `4` + `1`].size);
129	sg_ptr->u.s.len2 = cpu_to_be16(list[i * `4` + `2`].size);
130	sg_ptr->u.s.len3 = cpu_to_be16(list[i * `4` + `3`].size);
131	sg_ptr->ptr0 = cpu_to_be64(list[i * `4` + `0`].dma_addr);
132	sg_ptr->ptr1 = cpu_to_be64(list[i * `4` + `1`].dma_addr);
133	sg_ptr->ptr2 = cpu_to_be64(list[i * `4` + `2`].dma_addr);
134	sg_ptr->ptr3 = cpu_to_be64(list[i * `4` + `3`].dma_addr);
135	sg_ptr++;
136	}
137	components = buf_count % `4`;
138
139	switch (components) {
140	case `3`:
141	sg_ptr->u.s.len2 = cpu_to_be16(list[i * `4` + `2`].size);
142	sg_ptr->ptr2 = cpu_to_be64(list[i * `4` + `2`].dma_addr);
143	fallthrough;
144	case `2`:
145	sg_ptr->u.s.len1 = cpu_to_be16(list[i * `4` + `1`].size);
146	sg_ptr->ptr1 = cpu_to_be64(list[i * `4` + `1`].dma_addr);
147	fallthrough;
148	case `1`:
149	sg_ptr->u.s.len0 = cpu_to_be16(list[i * `4` + `0`].size);
150	sg_ptr->ptr0 = cpu_to_be64(list[i * `4` + `0`].dma_addr);
151	break;
152	default:
153	break;
154	}
155	return ret;
156
157	sg_cleanup:
158	for (j = `0`; j < i; j++) {
159	if (list[j].dma_addr) {
160	dma_unmap_single(&pdev->dev, list[i].dma_addr,
161	list[i].size, DMA_BIDIRECTIONAL);
162	}
163
164	list[j].dma_addr = `0`;
165	}
166	return ret;
167	}
168
169	static inline int setup_sgio_list(struct pci_dev *pdev,
170	struct otx_cpt_info_buffer **pinfo,
171	struct otx_cpt_req_info *req, gfp_t gfp)
172	{
173	u32 dlen, align_dlen, info_len, rlen;
174	struct otx_cpt_info_buffer *info;
175	u16 g_sz_bytes, s_sz_bytes;
176	int align = CPT_DMA_ALIGN;
177	u32 total_mem_len;
178
179	if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT \|\|
180	req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) {
181	dev_err(&pdev->dev, "Error too many sg components\n");
182	return -EINVAL;
183	}
184
185	g_sz_bytes = ((req->incnt + `3`) / `4`) *
186	sizeof(struct otx_cpt_sglist_component);
187	s_sz_bytes = ((req->outcnt + `3`) / `4`) *
188	sizeof(struct otx_cpt_sglist_component);
189
190	dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
191	align_dlen = ALIGN(dlen, align);
192	info_len = ALIGN(sizeof(*info), align);
193	rlen = ALIGN(sizeof(union otx_cpt_res_s), align);
194	total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE;
195
196	info = kzalloc(size: total_mem_len, flags: gfp);
197	if (unlikely(!info)) {
198	dev_err(&pdev->dev, "Memory allocation failed\n");
199	return -ENOMEM;
200	}
201	*pinfo = info;
202	info->dlen = dlen;
203	info->in_buffer = (u8 *)info + info_len;
204
205	((__be16 *)info->in_buffer)[`0`] = cpu_to_be16(req->outcnt);
206	((__be16 *)info->in_buffer)[`1`] = cpu_to_be16(req->incnt);
207	((u16 *)info->in_buffer)[`2`] = `0`;
208	((u16 *)info->in_buffer)[`3`] = `0`;
209
210	/ Setup gather (input) components /
211	if (setup_sgio_components(pdev, list: req->in, buf_count: req->incnt,
212	buffer: &info->in_buffer[`8`])) {
213	dev_err(&pdev->dev, "Failed to setup gather list\n");
214	return -EFAULT;
215	}
216
217	if (setup_sgio_components(pdev, list: req->out, buf_count: req->outcnt,
218	buffer: &info->in_buffer[`8` + g_sz_bytes])) {
219	dev_err(&pdev->dev, "Failed to setup scatter list\n");
220	return -EFAULT;
221	}
222
223	info->dma_len = total_mem_len - info_len;
224	info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer,
225	info->dma_len, DMA_BIDIRECTIONAL);
226	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
227	dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
228	return -EIO;
229	}
230	/*
231	* Get buffer for union otx_cpt_res_s response
232	* structure and its physical address
233	*/
234	info->completion_addr = (u64 *)(info->in_buffer + align_dlen);
235	info->comp_baddr = info->dptr_baddr + align_dlen;
236
237	/ Create and initialize RPTR /
238	info->out_buffer = (u8 *)info->completion_addr + rlen;
239	info->rptr_baddr = info->comp_baddr + rlen;
240
241	((u64 ) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT);
242
243	return `0`;
244	}
245
246
247	static void cpt_fill_inst(union otx_cpt_inst_s *inst,
248	struct otx_cpt_info_buffer *info,
249	struct otx_cpt_iq_cmd *cmd)
250	{
251	inst->u[`0`] = `0x0`;
252	inst->s.doneint = true;
253	inst->s.res_addr = (u64)info->comp_baddr;
254	inst->u[`2`] = `0x0`;
255	inst->s.wq_ptr = `0`;
256	inst->s.ei0 = cmd->cmd.u64;
257	inst->s.ei1 = cmd->dptr;
258	inst->s.ei2 = cmd->rptr;
259	inst->s.ei3 = cmd->cptr.u64;
260	}
261
262	/*
263	* On OcteonTX platform the parameter db_count is used as a count for ringing
264	* door bell. The valid values for db_count are:
265	* 0 - 1 CPT instruction will be enqueued however CPT will not be informed
266	* 1 - 1 CPT instruction will be enqueued and CPT will be informed
267	*/
268	static void cpt_send_cmd(union otx_cpt_inst_s cptinst, struct* otx_cptvf *cptvf)
269	{
270	struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo;
271	struct otx_cpt_cmd_queue *queue;
272	struct otx_cpt_cmd_chunk *curr;
273	u8 *ent;
274
275	queue = &qinfo->queue[`0`];
276	/*
277	* cpt_send_cmd is currently called only from critical section
278	* therefore no locking is required for accessing instruction queue
279	*/
280	ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE];
281	memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE);
282
283	if (++queue->idx >= queue->qhead->size / `64`) {
284	curr = queue->qhead;
285
286	if (list_is_last(list: &curr->nextchunk, head: &queue->chead))
287	queue->qhead = queue->base;
288	else
289	queue->qhead = list_next_entry(queue->qhead, nextchunk);
290	queue->idx = `0`;
291	}
292	/ make sure all memory stores are done before ringing doorbell /
293	smp_wmb();
294	otx_cptvf_write_vq_doorbell(cptvf, val: `1`);
295	}
296
297	static int process_request(struct pci_dev pdev, struct* otx_cpt_req_info *req,
298	struct otx_cpt_pending_queue *pqueue,
299	struct otx_cptvf *cptvf)
300	{
301	struct otx_cptvf_request *cpt_req = &req->req;
302	struct otx_cpt_pending_entry *pentry = NULL;
303	union otx_cpt_ctrl_info *ctrl = &req->ctrl;
304	struct otx_cpt_info_buffer *info = NULL;
305	union otx_cpt_res_s *result = NULL;
306	struct otx_cpt_iq_cmd iq_cmd;
307	union otx_cpt_inst_s cptinst;
308	int retry, ret = `0`;
309	u8 resume_sender;
310	gfp_t gfp;
311
312	gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
313	GFP_ATOMIC;
314	ret = setup_sgio_list(pdev, pinfo: &info, req, gfp);
315	if (unlikely(ret)) {
316	dev_err(&pdev->dev, "Setting up SG list failed\n");
317	goto request_cleanup;
318	}
319	cpt_req->dlen = info->dlen;
320
321	result = (union otx_cpt_res_s *) info->completion_addr;
322	result->s.compcode = COMPLETION_CODE_INIT;
323
324	spin_lock_bh(lock: &pqueue->lock);
325	pentry = get_free_pending_entry(q: pqueue, qlen: pqueue->qlen);
326	retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP;
327	while (unlikely(!pentry) && retry--) {
328	spin_unlock_bh(lock: &pqueue->lock);
329	udelay(CPT_PENTRY_STEP);
330	spin_lock_bh(lock: &pqueue->lock);
331	pentry = get_free_pending_entry(q: pqueue, qlen: pqueue->qlen);
332	}
333
334	if (unlikely(!pentry)) {
335	ret = -ENOSPC;
336	spin_unlock_bh(lock: &pqueue->lock);
337	goto request_cleanup;
338	}
339
340	/*
341	* Check if we are close to filling in entire pending queue,
342	* if so then tell the sender to stop/sleep by returning -EBUSY
343	* We do it only for context which can sleep (GFP_KERNEL)
344	*/
345	if (gfp == GFP_KERNEL &&
346	pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) {
347	pentry->resume_sender = true;
348	} else
349	pentry->resume_sender = false;
350	resume_sender = pentry->resume_sender;
351	pqueue->pending_count++;
352
353	pentry->completion_addr = info->completion_addr;
354	pentry->info = info;
355	pentry->callback = req->callback;
356	pentry->areq = req->areq;
357	pentry->busy = true;
358	info->pentry = pentry;
359	info->time_in = jiffies;
360	info->req = req;
361
362	/ Fill in the command /
363	iq_cmd.cmd.u64 = `0`;
364	iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags);
365	iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1);
366	iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
367	iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen);
368
369	iq_cmd.dptr = info->dptr_baddr;
370	iq_cmd.rptr = info->rptr_baddr;
371	iq_cmd.cptr.u64 = `0`;
372	iq_cmd.cptr.s.grp = ctrl->s.grp;
373
374	/ Fill in the CPT_INST_S type command for HW interpretation /
375	cpt_fill_inst(inst: &cptinst, info, cmd: &iq_cmd);
376
377	/ Print debug info if enabled /
378	otx_cpt_dump_sg_list(pdev, req);
379	pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE);
380	print_hex_dump_debug("", `0`, `16`, `1`, &cptinst, OTX_CPT_INST_SIZE, false);
381	pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen);
382	print_hex_dump_debug("", `0`, `16`, `1`, info->in_buffer,
383	cpt_req->dlen, false);
384
385	/ Send CPT command /
386	cpt_send_cmd(cptinst: &cptinst, cptvf);
387
388	/*
389	* We allocate and prepare pending queue entry in critical section
390	* together with submitting CPT instruction to CPT instruction queue
391	* to make sure that order of CPT requests is the same in both
392	* pending and instruction queues
393	*/
394	spin_unlock_bh(lock: &pqueue->lock);
395
396	ret = resume_sender ? -EBUSY : -EINPROGRESS;
397	return ret;
398
399	request_cleanup:
400	do_request_cleanup(pdev, info);
401	return ret;
402	}
403
404	int otx_cpt_do_request(struct pci_dev pdev, struct* otx_cpt_req_info *req,
405	int cpu_num)
406	{
407	struct otx_cptvf *cptvf = pci_get_drvdata(pdev);
408
409	if (!otx_cpt_device_ready(cptvf)) {
410	dev_err(&pdev->dev, "CPT Device is not ready\n");
411	return -ENODEV;
412	}
413
414	if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) {
415	dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request\n",
416	cptvf->vfid);
417	return -EINVAL;
418	} else if ((cptvf->vftype == OTX_CPT_AE_TYPES) &&
419	(req->ctrl.s.se_req)) {
420	dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request\n",
421	cptvf->vfid);
422	return -EINVAL;
423	}
424
425	return process_request(pdev, req, pqueue: &cptvf->pqinfo.queue[`0`], cptvf);
426	}
427
428	static int cpt_process_ccode(struct pci_dev *pdev,
429	union otx_cpt_res_s *cpt_status,
430	struct otx_cpt_info_buffer *cpt_info,
431	struct otx_cpt_req_info req, u32 res_code)
432	{
433	u8 ccode = cpt_status->s.compcode;
434	union otx_cpt_error_code ecode;
435
436	ecode.u = be64_to_cpup(p: (__be64 *)cpt_info->out_buffer);
437	switch (ccode) {
438	case CPT_COMP_E_FAULT:
439	dev_err(&pdev->dev,
440	"Request failed with DMA fault\n");
441	otx_cpt_dump_sg_list(pdev, req);
442	break;
443
444	case CPT_COMP_E_SWERR:
445	dev_err(&pdev->dev,
446	"Request failed with software error code %d\n",
447	ecode.s.ccode);
448	otx_cpt_dump_sg_list(pdev, req);
449	break;
450
451	case CPT_COMP_E_HWERR:
452	dev_err(&pdev->dev,
453	"Request failed with hardware error\n");
454	otx_cpt_dump_sg_list(pdev, req);
455	break;
456
457	case COMPLETION_CODE_INIT:
458	/ check for timeout /
459	if (time_after_eq(jiffies, cpt_info->time_in +
460	OTX_CPT_COMMAND_TIMEOUT * HZ))
461	dev_warn(&pdev->dev, "Request timed out 0x%p\n", req);
462	else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) {
463	cpt_info->time_in = jiffies;
464	cpt_info->extra_time++;
465	}
466	return `1`;
467
468	case CPT_COMP_E_GOOD:
469	/ Check microcode completion code /
470	if (ecode.s.ccode) {
471	/*
472	* If requested hmac is truncated and ucode returns
473	* s/g write length error then we report success
474	* because ucode writes as many bytes of calculated
475	* hmac as available in gather buffer and reports
476	* s/g write length error if number of bytes in gather
477	* buffer is less than full hmac size.
478	*/
479	if (req->is_trunc_hmac &&
480	ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) {
481	*res_code = `0`;
482	break;
483	}
484
485	dev_err(&pdev->dev,
486	"Request failed with software error code 0x%x\n",
487	ecode.s.ccode);
488	otx_cpt_dump_sg_list(pdev, req);
489	break;
490	}
491
492	/ Request has been processed with success /
493	*res_code = `0`;
494	break;
495
496	default:
497	dev_err(&pdev->dev, "Request returned invalid status\n");
498	break;
499	}
500
501	return `0`;
502	}
503
504	static inline void process_pending_queue(struct pci_dev *pdev,
505	struct otx_cpt_pending_queue *pqueue)
506	{
507	void (callback)(int* status, void arg1, void* *arg2);
508	struct otx_cpt_pending_entry *resume_pentry = NULL;
509	struct otx_cpt_pending_entry *pentry = NULL;
510	struct otx_cpt_info_buffer *cpt_info = NULL;
511	union otx_cpt_res_s *cpt_status = NULL;
512	struct otx_cpt_req_info *req = NULL;
513	struct crypto_async_request *areq;
514	u32 res_code, resume_index;
515
516	while (`1`) {
517	spin_lock_bh(lock: &pqueue->lock);
518	pentry = &pqueue->head[pqueue->front];
519
520	if (WARN_ON(!pentry)) {
521	spin_unlock_bh(lock: &pqueue->lock);
522	break;
523	}
524
525	res_code = -EINVAL;
526	if (unlikely(!pentry->busy)) {
527	spin_unlock_bh(lock: &pqueue->lock);
528	break;
529	}
530
531	if (unlikely(!pentry->callback)) {
532	dev_err(&pdev->dev, "Callback NULL\n");
533	goto process_pentry;
534	}
535
536	cpt_info = pentry->info;
537	if (unlikely(!cpt_info)) {
538	dev_err(&pdev->dev, "Pending entry post arg NULL\n");
539	goto process_pentry;
540	}
541
542	req = cpt_info->req;
543	if (unlikely(!req)) {
544	dev_err(&pdev->dev, "Request NULL\n");
545	goto process_pentry;
546	}
547
548	cpt_status = (union otx_cpt_res_s *) pentry->completion_addr;
549	if (unlikely(!cpt_status)) {
550	dev_err(&pdev->dev, "Completion address NULL\n");
551	goto process_pentry;
552	}
553
554	if (cpt_process_ccode(pdev, cpt_status, cpt_info, req,
555	res_code: &res_code)) {
556	spin_unlock_bh(lock: &pqueue->lock);
557	return;
558	}
559	cpt_info->pdev = pdev;
560
561	process_pentry:
562	/*
563	* Check if we should inform sending side to resume
564	* We do it CPT_IQ_RESUME_MARGIN elements in advance before
565	* pending queue becomes empty
566	*/
567	resume_index = modulo_inc(index: pqueue->front, length: pqueue->qlen,
568	CPT_IQ_RESUME_MARGIN);
569	resume_pentry = &pqueue->head[resume_index];
570	if (resume_pentry &&
571	resume_pentry->resume_sender) {
572	resume_pentry->resume_sender = false;
573	callback = resume_pentry->callback;
574	areq = resume_pentry->areq;
575
576	if (callback) {
577	spin_unlock_bh(lock: &pqueue->lock);
578
579	/*
580	* EINPROGRESS is an indication for sending
581	* side that it can resume sending requests
582	*/
583	callback(-EINPROGRESS, areq, cpt_info);
584	spin_lock_bh(lock: &pqueue->lock);
585	}
586	}
587
588	callback = pentry->callback;
589	areq = pentry->areq;
590	free_pentry(pentry);
591
592	pqueue->pending_count--;
593	pqueue->front = modulo_inc(index: pqueue->front, length: pqueue->qlen, inc: `1`);
594	spin_unlock_bh(lock: &pqueue->lock);
595
596	/*
597	* Call callback after current pending entry has been
598	* processed, we don't do it if the callback pointer is
599	* invalid.
600	*/
601	if (callback)
602	callback(res_code, areq, cpt_info);
603	}
604	}
605
606	void otx_cpt_post_process(struct otx_cptvf_wqe *wqe)
607	{
608	process_pending_queue(pdev: wqe->cptvf->pdev, pqueue: &wqe->cptvf->pqinfo.queue[`0`]);
609	}
610

source code of linux/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c