1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | |
3 | /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ |
4 | /* Copyright (c) 2008-2019, IBM Corporation */ |
5 | |
6 | #include <linux/gfp.h> |
7 | #include <rdma/ib_verbs.h> |
8 | #include <rdma/ib_umem.h> |
9 | #include <linux/dma-mapping.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/sched/mm.h> |
12 | #include <linux/resource.h> |
13 | |
14 | #include "siw.h" |
15 | #include "siw_mem.h" |
16 | |
17 | /* Stag lookup is based on its index part only (24 bits). */ |
18 | #define SIW_STAG_MAX_INDEX 0x00ffffff |
19 | |
20 | /* |
21 | * The code avoids special Stag of zero and tries to randomize |
22 | * STag values between 1 and SIW_STAG_MAX_INDEX. |
23 | */ |
24 | int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) |
25 | { |
26 | struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); |
27 | u32 id, next; |
28 | |
29 | get_random_bytes(buf: &next, len: 4); |
30 | next &= SIW_STAG_MAX_INDEX; |
31 | |
32 | if (xa_alloc_cyclic(xa: &sdev->mem_xa, id: &id, entry: m, limit, next: &next, |
33 | GFP_KERNEL) < 0) |
34 | return -ENOMEM; |
35 | |
36 | /* Set the STag index part */ |
37 | m->stag = id << 8; |
38 | |
39 | siw_dbg_mem(m, "new MEM object\n" ); |
40 | |
41 | return 0; |
42 | } |
43 | |
44 | /* |
45 | * siw_mem_id2obj() |
46 | * |
47 | * resolves memory from stag given by id. might be called from: |
48 | * o process context before sending out of sgl, or |
49 | * o in softirq when resolving target memory |
50 | */ |
51 | struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) |
52 | { |
53 | struct siw_mem *mem; |
54 | |
55 | rcu_read_lock(); |
56 | mem = xa_load(&sdev->mem_xa, index: stag_index); |
57 | if (likely(mem && kref_get_unless_zero(&mem->ref))) { |
58 | rcu_read_unlock(); |
59 | return mem; |
60 | } |
61 | rcu_read_unlock(); |
62 | |
63 | return NULL; |
64 | } |
65 | |
66 | void siw_umem_release(struct siw_umem *umem) |
67 | { |
68 | int i, num_pages = umem->num_pages; |
69 | |
70 | if (umem->base_mem) |
71 | ib_umem_release(umem: umem->base_mem); |
72 | |
73 | for (i = 0; num_pages > 0; i++) { |
74 | kfree(objp: umem->page_chunk[i].plist); |
75 | num_pages -= PAGES_PER_CHUNK; |
76 | } |
77 | kfree(objp: umem->page_chunk); |
78 | kfree(objp: umem); |
79 | } |
80 | |
81 | int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, |
82 | u64 start, u64 len, int rights) |
83 | { |
84 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
85 | struct siw_mem *mem = kzalloc(size: sizeof(*mem), GFP_KERNEL); |
86 | struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); |
87 | u32 id, next; |
88 | |
89 | if (!mem) |
90 | return -ENOMEM; |
91 | |
92 | mem->mem_obj = mem_obj; |
93 | mem->stag_valid = 0; |
94 | mem->sdev = sdev; |
95 | mem->va = start; |
96 | mem->len = len; |
97 | mem->pd = pd; |
98 | mem->perms = rights & IWARP_ACCESS_MASK; |
99 | kref_init(kref: &mem->ref); |
100 | |
101 | get_random_bytes(buf: &next, len: 4); |
102 | next &= SIW_STAG_MAX_INDEX; |
103 | |
104 | if (xa_alloc_cyclic(xa: &sdev->mem_xa, id: &id, entry: mem, limit, next: &next, |
105 | GFP_KERNEL) < 0) { |
106 | kfree(objp: mem); |
107 | return -ENOMEM; |
108 | } |
109 | |
110 | mr->mem = mem; |
111 | /* Set the STag index part */ |
112 | mem->stag = id << 8; |
113 | mr->base_mr.lkey = mr->base_mr.rkey = mem->stag; |
114 | |
115 | return 0; |
116 | } |
117 | |
118 | void siw_mr_drop_mem(struct siw_mr *mr) |
119 | { |
120 | struct siw_mem *mem = mr->mem, *found; |
121 | |
122 | mem->stag_valid = 0; |
123 | |
124 | /* make STag invalid visible asap */ |
125 | smp_mb(); |
126 | |
127 | found = xa_erase(&mem->sdev->mem_xa, index: mem->stag >> 8); |
128 | WARN_ON(found != mem); |
129 | siw_mem_put(mem); |
130 | } |
131 | |
132 | void siw_free_mem(struct kref *ref) |
133 | { |
134 | struct siw_mem *mem = container_of(ref, struct siw_mem, ref); |
135 | |
136 | siw_dbg_mem(mem, "free mem, pbl: %s\n" , mem->is_pbl ? "y" : "n" ); |
137 | |
138 | if (!mem->is_mw && mem->mem_obj) { |
139 | if (mem->is_pbl == 0) |
140 | siw_umem_release(umem: mem->umem); |
141 | else |
142 | kfree(objp: mem->pbl); |
143 | } |
144 | kfree(objp: mem); |
145 | } |
146 | |
147 | /* |
148 | * siw_check_mem() |
149 | * |
150 | * Check protection domain, STAG state, access permissions and |
151 | * address range for memory object. |
152 | * |
153 | * @pd: Protection Domain memory should belong to |
154 | * @mem: memory to be checked |
155 | * @addr: starting addr of mem |
156 | * @perms: requested access permissions |
157 | * @len: len of memory interval to be checked |
158 | * |
159 | */ |
160 | int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, |
161 | enum ib_access_flags perms, int len) |
162 | { |
163 | if (!mem->stag_valid) { |
164 | siw_dbg_pd(pd, "STag 0x%08x invalid\n" , mem->stag); |
165 | return -E_STAG_INVALID; |
166 | } |
167 | if (mem->pd != pd) { |
168 | siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n" , mem->stag); |
169 | return -E_PD_MISMATCH; |
170 | } |
171 | /* |
172 | * check access permissions |
173 | */ |
174 | if ((mem->perms & perms) < perms) { |
175 | siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n" , |
176 | mem->perms, perms); |
177 | return -E_ACCESS_PERM; |
178 | } |
179 | /* |
180 | * Check if access falls into valid memory interval. |
181 | */ |
182 | if (addr < mem->va || addr + len > mem->va + mem->len) { |
183 | siw_dbg_pd(pd, "MEM interval len %d\n" , len); |
184 | siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n" , |
185 | (void *)(uintptr_t)addr, |
186 | (void *)(uintptr_t)(addr + len)); |
187 | siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n" , |
188 | (void *)(uintptr_t)mem->va, |
189 | (void *)(uintptr_t)(mem->va + mem->len), |
190 | mem->stag); |
191 | |
192 | return -E_BASE_BOUNDS; |
193 | } |
194 | return E_ACCESS_OK; |
195 | } |
196 | |
197 | /* |
198 | * siw_check_sge() |
199 | * |
200 | * Check SGE for access rights in given interval |
201 | * |
202 | * @pd: Protection Domain memory should belong to |
203 | * @sge: SGE to be checked |
204 | * @mem: location of memory reference within array |
205 | * @perms: requested access permissions |
206 | * @off: starting offset in SGE |
207 | * @len: len of memory interval to be checked |
208 | * |
209 | * NOTE: Function references SGE's memory object (mem->obj) |
210 | * if not yet done. New reference is kept if check went ok and |
211 | * released if check failed. If mem->obj is already valid, no new |
212 | * lookup is being done and mem is not released it check fails. |
213 | */ |
214 | int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[], |
215 | enum ib_access_flags perms, u32 off, int len) |
216 | { |
217 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
218 | struct siw_mem *new = NULL; |
219 | int rv = E_ACCESS_OK; |
220 | |
221 | if (len + off > sge->length) { |
222 | rv = -E_BASE_BOUNDS; |
223 | goto fail; |
224 | } |
225 | if (*mem == NULL) { |
226 | new = siw_mem_id2obj(sdev, stag_index: sge->lkey >> 8); |
227 | if (unlikely(!new)) { |
228 | siw_dbg_pd(pd, "STag unknown: 0x%08x\n" , sge->lkey); |
229 | rv = -E_STAG_INVALID; |
230 | goto fail; |
231 | } |
232 | *mem = new; |
233 | } |
234 | /* Check if user re-registered with different STag key */ |
235 | if (unlikely((*mem)->stag != sge->lkey)) { |
236 | siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n" , sge->lkey); |
237 | rv = -E_STAG_INVALID; |
238 | goto fail; |
239 | } |
240 | rv = siw_check_mem(pd, mem: *mem, addr: sge->laddr + off, perms, len); |
241 | if (unlikely(rv)) |
242 | goto fail; |
243 | |
244 | return 0; |
245 | |
246 | fail: |
247 | if (new) { |
248 | *mem = NULL; |
249 | siw_mem_put(mem: new); |
250 | } |
251 | return rv; |
252 | } |
253 | |
254 | void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) |
255 | { |
256 | switch (op) { |
257 | case SIW_OP_SEND: |
258 | case SIW_OP_WRITE: |
259 | case SIW_OP_SEND_WITH_IMM: |
260 | case SIW_OP_SEND_REMOTE_INV: |
261 | case SIW_OP_READ: |
262 | case SIW_OP_READ_LOCAL_INV: |
263 | if (!(wqe->sqe.flags & SIW_WQE_INLINE)) |
264 | siw_unref_mem_sgl(mem: wqe->mem, num_sge: wqe->sqe.num_sge); |
265 | break; |
266 | |
267 | case SIW_OP_RECEIVE: |
268 | siw_unref_mem_sgl(mem: wqe->mem, num_sge: wqe->rqe.num_sge); |
269 | break; |
270 | |
271 | case SIW_OP_READ_RESPONSE: |
272 | siw_unref_mem_sgl(mem: wqe->mem, num_sge: 1); |
273 | break; |
274 | |
275 | default: |
276 | /* |
277 | * SIW_OP_INVAL_STAG and SIW_OP_REG_MR |
278 | * do not hold memory references |
279 | */ |
280 | break; |
281 | } |
282 | } |
283 | |
284 | int siw_invalidate_stag(struct ib_pd *pd, u32 stag) |
285 | { |
286 | struct siw_device *sdev = to_siw_dev(base_dev: pd->device); |
287 | struct siw_mem *mem = siw_mem_id2obj(sdev, stag_index: stag >> 8); |
288 | int rv = 0; |
289 | |
290 | if (unlikely(!mem)) { |
291 | siw_dbg_pd(pd, "STag 0x%08x unknown\n" , stag); |
292 | return -EINVAL; |
293 | } |
294 | if (unlikely(mem->pd != pd)) { |
295 | siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n" , stag); |
296 | rv = -EACCES; |
297 | goto out; |
298 | } |
299 | /* |
300 | * Per RDMA verbs definition, an STag may already be in invalid |
301 | * state if invalidation is requested. So no state check here. |
302 | */ |
303 | mem->stag_valid = 0; |
304 | |
305 | siw_dbg_pd(pd, "STag 0x%08x now invalid\n" , stag); |
306 | out: |
307 | siw_mem_put(mem); |
308 | return rv; |
309 | } |
310 | |
311 | /* |
312 | * Gets physical address backed by PBL element. Address is referenced |
313 | * by linear byte offset into list of variably sized PB elements. |
314 | * Optionally, provides remaining len within current element, and |
315 | * current PBL index for later resume at same element. |
316 | */ |
317 | dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) |
318 | { |
319 | int i = idx ? *idx : 0; |
320 | |
321 | while (i < pbl->num_buf) { |
322 | struct siw_pble *pble = &pbl->pbe[i]; |
323 | |
324 | if (pble->pbl_off + pble->size > off) { |
325 | u64 pble_off = off - pble->pbl_off; |
326 | |
327 | if (len) |
328 | *len = pble->size - pble_off; |
329 | if (idx) |
330 | *idx = i; |
331 | |
332 | return pble->addr + pble_off; |
333 | } |
334 | i++; |
335 | } |
336 | if (len) |
337 | *len = 0; |
338 | return 0; |
339 | } |
340 | |
341 | struct siw_pbl *siw_pbl_alloc(u32 num_buf) |
342 | { |
343 | struct siw_pbl *pbl; |
344 | |
345 | if (num_buf == 0) |
346 | return ERR_PTR(error: -EINVAL); |
347 | |
348 | pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL); |
349 | if (!pbl) |
350 | return ERR_PTR(error: -ENOMEM); |
351 | |
352 | pbl->max_buf = num_buf; |
353 | |
354 | return pbl; |
355 | } |
356 | |
357 | struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, |
358 | u64 len, int rights) |
359 | { |
360 | struct siw_umem *umem; |
361 | struct ib_umem *base_mem; |
362 | struct sg_page_iter sg_iter; |
363 | struct sg_table *sgt; |
364 | u64 first_page_va; |
365 | int num_pages, num_chunks, i, rv = 0; |
366 | |
367 | if (!len) |
368 | return ERR_PTR(error: -EINVAL); |
369 | |
370 | first_page_va = start & PAGE_MASK; |
371 | num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; |
372 | num_chunks = (num_pages >> CHUNK_SHIFT) + 1; |
373 | |
374 | umem = kzalloc(size: sizeof(*umem), GFP_KERNEL); |
375 | if (!umem) |
376 | return ERR_PTR(error: -ENOMEM); |
377 | |
378 | umem->page_chunk = |
379 | kcalloc(n: num_chunks, size: sizeof(struct siw_page_chunk), GFP_KERNEL); |
380 | if (!umem->page_chunk) { |
381 | rv = -ENOMEM; |
382 | goto err_out; |
383 | } |
384 | base_mem = ib_umem_get(device: base_dev, addr: start, size: len, access: rights); |
385 | if (IS_ERR(ptr: base_mem)) { |
386 | rv = PTR_ERR(ptr: base_mem); |
387 | siw_dbg(base_dev, "Cannot pin user memory: %d\n" , rv); |
388 | goto err_out; |
389 | } |
390 | umem->fp_addr = first_page_va; |
391 | umem->base_mem = base_mem; |
392 | |
393 | sgt = &base_mem->sgt_append.sgt; |
394 | __sg_page_iter_start(piter: &sg_iter, sglist: sgt->sgl, nents: sgt->orig_nents, pgoffset: 0); |
395 | |
396 | if (!__sg_page_iter_next(piter: &sg_iter)) { |
397 | rv = -EINVAL; |
398 | goto err_out; |
399 | } |
400 | for (i = 0; num_pages > 0; i++) { |
401 | int nents = min_t(int, num_pages, PAGES_PER_CHUNK); |
402 | struct page **plist = |
403 | kcalloc(n: nents, size: sizeof(struct page *), GFP_KERNEL); |
404 | |
405 | if (!plist) { |
406 | rv = -ENOMEM; |
407 | goto err_out; |
408 | } |
409 | umem->page_chunk[i].plist = plist; |
410 | while (nents--) { |
411 | *plist = sg_page_iter_page(piter: &sg_iter); |
412 | umem->num_pages++; |
413 | num_pages--; |
414 | plist++; |
415 | if (!__sg_page_iter_next(piter: &sg_iter)) |
416 | break; |
417 | } |
418 | } |
419 | return umem; |
420 | err_out: |
421 | siw_umem_release(umem); |
422 | |
423 | return ERR_PTR(error: rv); |
424 | } |
425 | |