1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2019 HiSilicon Limited. */ |
3 | #include <linux/align.h> |
4 | #include <linux/dma-mapping.h> |
5 | #include <linux/hisi_acc_qm.h> |
6 | #include <linux/module.h> |
7 | #include <linux/slab.h> |
8 | |
9 | #define HISI_ACC_SGL_SGE_NR_MIN 1 |
10 | #define HISI_ACC_SGL_NR_MAX 256 |
11 | #define HISI_ACC_SGL_ALIGN_SIZE 64 |
12 | #define HISI_ACC_MEM_BLOCK_NR 5 |
13 | |
14 | struct acc_hw_sge { |
15 | dma_addr_t buf; |
16 | void *page_ctrl; |
17 | __le32 len; |
18 | __le32 pad; |
19 | __le32 pad0; |
20 | __le32 pad1; |
21 | }; |
22 | |
23 | /* use default sgl head size 64B */ |
24 | struct hisi_acc_hw_sgl { |
25 | dma_addr_t next_dma; |
26 | __le16 entry_sum_in_chain; |
27 | __le16 entry_sum_in_sgl; |
28 | __le16 entry_length_in_sgl; |
29 | __le16 pad0; |
30 | __le64 pad1[5]; |
31 | struct hisi_acc_hw_sgl *next; |
32 | struct acc_hw_sge sge_entries[]; |
33 | } __aligned(1); |
34 | |
35 | struct hisi_acc_sgl_pool { |
36 | struct mem_block { |
37 | struct hisi_acc_hw_sgl *sgl; |
38 | dma_addr_t sgl_dma; |
39 | size_t size; |
40 | } mem_block[HISI_ACC_MEM_BLOCK_NR]; |
41 | u32 sgl_num_per_block; |
42 | u32 block_num; |
43 | u32 count; |
44 | u32 sge_nr; |
45 | size_t sgl_size; |
46 | }; |
47 | |
48 | /** |
49 | * hisi_acc_create_sgl_pool() - Create a hw sgl pool. |
50 | * @dev: The device which hw sgl pool belongs to. |
51 | * @count: Count of hisi_acc_hw_sgl in pool. |
52 | * @sge_nr: The count of sge in hw_sgl |
53 | * |
54 | * This function creates a hw sgl pool, after this user can get hw sgl memory |
55 | * from it. |
56 | */ |
57 | struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, |
58 | u32 count, u32 sge_nr) |
59 | { |
60 | u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl; |
61 | struct hisi_acc_sgl_pool *pool; |
62 | struct mem_block *block; |
63 | u32 i, j; |
64 | |
65 | if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX) |
66 | return ERR_PTR(error: -EINVAL); |
67 | |
68 | sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr + |
69 | sizeof(struct hisi_acc_hw_sgl), |
70 | HISI_ACC_SGL_ALIGN_SIZE); |
71 | |
72 | /* |
73 | * the pool may allocate a block of memory of size PAGE_SIZE * 2^MAX_PAGE_ORDER, |
74 | * block size may exceed 2^31 on ia64, so the max of block size is 2^31 |
75 | */ |
76 | block_size = 1 << (PAGE_SHIFT + MAX_PAGE_ORDER < 32 ? |
77 | PAGE_SHIFT + MAX_PAGE_ORDER : 31); |
78 | sgl_num_per_block = block_size / sgl_size; |
79 | block_num = count / sgl_num_per_block; |
80 | remain_sgl = count % sgl_num_per_block; |
81 | |
82 | if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) || |
83 | (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1)) |
84 | return ERR_PTR(error: -EINVAL); |
85 | |
86 | pool = kzalloc(size: sizeof(*pool), GFP_KERNEL); |
87 | if (!pool) |
88 | return ERR_PTR(error: -ENOMEM); |
89 | block = pool->mem_block; |
90 | |
91 | for (i = 0; i < block_num; i++) { |
92 | block[i].sgl = dma_alloc_coherent(dev, size: block_size, |
93 | dma_handle: &block[i].sgl_dma, |
94 | GFP_KERNEL); |
95 | if (!block[i].sgl) { |
96 | dev_err(dev, "Fail to allocate hw SG buffer!\n" ); |
97 | goto err_free_mem; |
98 | } |
99 | |
100 | block[i].size = block_size; |
101 | } |
102 | |
103 | if (remain_sgl > 0) { |
104 | block[i].sgl = dma_alloc_coherent(dev, size: remain_sgl * sgl_size, |
105 | dma_handle: &block[i].sgl_dma, |
106 | GFP_KERNEL); |
107 | if (!block[i].sgl) { |
108 | dev_err(dev, "Fail to allocate remained hw SG buffer!\n" ); |
109 | goto err_free_mem; |
110 | } |
111 | |
112 | block[i].size = remain_sgl * sgl_size; |
113 | } |
114 | |
115 | pool->sgl_num_per_block = sgl_num_per_block; |
116 | pool->block_num = remain_sgl ? block_num + 1 : block_num; |
117 | pool->count = count; |
118 | pool->sgl_size = sgl_size; |
119 | pool->sge_nr = sge_nr; |
120 | |
121 | return pool; |
122 | |
123 | err_free_mem: |
124 | for (j = 0; j < i; j++) |
125 | dma_free_coherent(dev, size: block_size, cpu_addr: block[j].sgl, |
126 | dma_handle: block[j].sgl_dma); |
127 | |
128 | kfree_sensitive(objp: pool); |
129 | return ERR_PTR(error: -ENOMEM); |
130 | } |
131 | EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool); |
132 | |
133 | /** |
134 | * hisi_acc_free_sgl_pool() - Free a hw sgl pool. |
135 | * @dev: The device which hw sgl pool belongs to. |
136 | * @pool: Pointer of pool. |
137 | * |
138 | * This function frees memory of a hw sgl pool. |
139 | */ |
140 | void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool) |
141 | { |
142 | struct mem_block *block; |
143 | u32 i; |
144 | |
145 | if (!dev || !pool) |
146 | return; |
147 | |
148 | block = pool->mem_block; |
149 | |
150 | for (i = 0; i < pool->block_num; i++) |
151 | dma_free_coherent(dev, size: block[i].size, cpu_addr: block[i].sgl, |
152 | dma_handle: block[i].sgl_dma); |
153 | |
154 | kfree(objp: pool); |
155 | } |
156 | EXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool); |
157 | |
158 | static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool, |
159 | u32 index, dma_addr_t *hw_sgl_dma) |
160 | { |
161 | struct mem_block *block; |
162 | u32 block_index, offset; |
163 | |
164 | if (!pool || !hw_sgl_dma || index >= pool->count) |
165 | return ERR_PTR(error: -EINVAL); |
166 | |
167 | block = pool->mem_block; |
168 | block_index = index / pool->sgl_num_per_block; |
169 | offset = index % pool->sgl_num_per_block; |
170 | |
171 | *hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset; |
172 | return (void *)block[block_index].sgl + pool->sgl_size * offset; |
173 | } |
174 | |
175 | static void sg_map_to_hw_sg(struct scatterlist *sgl, |
176 | struct acc_hw_sge *hw_sge) |
177 | { |
178 | hw_sge->buf = sg_dma_address(sgl); |
179 | hw_sge->len = cpu_to_le32(sg_dma_len(sgl)); |
180 | hw_sge->page_ctrl = sg_virt(sg: sgl); |
181 | } |
182 | |
183 | static void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl) |
184 | { |
185 | u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl); |
186 | |
187 | var++; |
188 | hw_sgl->entry_sum_in_sgl = cpu_to_le16(var); |
189 | } |
190 | |
191 | static void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum) |
192 | { |
193 | hw_sgl->entry_sum_in_chain = cpu_to_le16(sum); |
194 | } |
195 | |
196 | static void clear_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl) |
197 | { |
198 | struct acc_hw_sge *hw_sge = hw_sgl->sge_entries; |
199 | u16 entry_sum = le16_to_cpu(hw_sgl->entry_sum_in_sgl); |
200 | int i; |
201 | |
202 | for (i = 0; i < entry_sum; i++) { |
203 | hw_sge[i].page_ctrl = NULL; |
204 | hw_sge[i].buf = 0; |
205 | hw_sge[i].len = 0; |
206 | } |
207 | } |
208 | |
209 | /** |
210 | * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl. |
211 | * @dev: The device which hw sgl belongs to. |
212 | * @sgl: Scatterlist which will be mapped to hw sgl. |
213 | * @pool: Pool which hw sgl memory will be allocated in. |
214 | * @index: Index of hisi_acc_hw_sgl in pool. |
215 | * @hw_sgl_dma: The dma address of allocated hw sgl. |
216 | * |
217 | * This function builds hw sgl according input sgl, user can use hw_sgl_dma |
218 | * as src/dst in its BD. Only support single hw sgl currently. |
219 | */ |
220 | struct hisi_acc_hw_sgl * |
221 | hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, |
222 | struct scatterlist *sgl, |
223 | struct hisi_acc_sgl_pool *pool, |
224 | u32 index, dma_addr_t *hw_sgl_dma) |
225 | { |
226 | struct hisi_acc_hw_sgl *curr_hw_sgl; |
227 | unsigned int i, sg_n_mapped; |
228 | dma_addr_t curr_sgl_dma = 0; |
229 | struct acc_hw_sge *curr_hw_sge; |
230 | struct scatterlist *sg; |
231 | int sg_n; |
232 | |
233 | if (!dev || !sgl || !pool || !hw_sgl_dma) |
234 | return ERR_PTR(error: -EINVAL); |
235 | |
236 | sg_n = sg_nents(sg: sgl); |
237 | |
238 | sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); |
239 | if (!sg_n_mapped) { |
240 | dev_err(dev, "DMA mapping for SG error!\n" ); |
241 | return ERR_PTR(error: -EINVAL); |
242 | } |
243 | |
244 | if (sg_n_mapped > pool->sge_nr) { |
245 | dev_err(dev, "the number of entries in input scatterlist is bigger than SGL pool setting.\n" ); |
246 | return ERR_PTR(error: -EINVAL); |
247 | } |
248 | |
249 | curr_hw_sgl = acc_get_sgl(pool, index, hw_sgl_dma: &curr_sgl_dma); |
250 | if (IS_ERR(ptr: curr_hw_sgl)) { |
251 | dev_err(dev, "Get SGL error!\n" ); |
252 | dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); |
253 | return ERR_PTR(error: -ENOMEM); |
254 | } |
255 | curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr); |
256 | curr_hw_sge = curr_hw_sgl->sge_entries; |
257 | |
258 | for_each_sg(sgl, sg, sg_n_mapped, i) { |
259 | sg_map_to_hw_sg(sgl: sg, hw_sge: curr_hw_sge); |
260 | inc_hw_sgl_sge(hw_sgl: curr_hw_sgl); |
261 | curr_hw_sge++; |
262 | } |
263 | |
264 | update_hw_sgl_sum_sge(hw_sgl: curr_hw_sgl, sum: pool->sge_nr); |
265 | *hw_sgl_dma = curr_sgl_dma; |
266 | |
267 | return curr_hw_sgl; |
268 | } |
269 | EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl); |
270 | |
271 | /** |
272 | * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl. |
273 | * @dev: The device which hw sgl belongs to. |
274 | * @sgl: Related scatterlist. |
275 | * @hw_sgl: Virtual address of hw sgl. |
276 | * |
277 | * This function unmaps allocated hw sgl. |
278 | */ |
279 | void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, |
280 | struct hisi_acc_hw_sgl *hw_sgl) |
281 | { |
282 | if (!dev || !sgl || !hw_sgl) |
283 | return; |
284 | |
285 | dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL); |
286 | clear_hw_sgl_sge(hw_sgl); |
287 | hw_sgl->entry_sum_in_chain = 0; |
288 | hw_sgl->entry_sum_in_sgl = 0; |
289 | hw_sgl->entry_length_in_sgl = 0; |
290 | } |
291 | EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap); |
292 | |