1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
5 * Copyright 2019 Marvell. All rights reserved.
6 */
7#include <linux/xarray.h>
8#include "uverbs.h"
9#include "core_priv.h"
10
11/**
12 * rdma_umap_priv_init() - Initialize the private data of a vma
13 *
14 * @priv: The already allocated private data
15 * @vma: The vm area struct that needs private data
16 * @entry: entry into the mmap_xa that needs to be linked with
17 * this vma
18 *
19 * Each time we map IO memory into user space this keeps track of the
20 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
21 * to point to the zero page and allow the hot unplug to proceed.
22 *
23 * This is necessary for cases like PCI physical hot unplug as the actual BAR
24 * memory may vanish after this and access to it from userspace could MCE.
25 *
26 * RDMA drivers supporting disassociation must have their user space designed
27 * to cope in some way with their IO pages going to the zero page.
28 *
29 */
30void rdma_umap_priv_init(struct rdma_umap_priv *priv,
31 struct vm_area_struct *vma,
32 struct rdma_user_mmap_entry *entry)
33{
34 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
35
36 priv->vma = vma;
37 if (entry) {
38 kref_get(kref: &entry->ref);
39 priv->entry = entry;
40 }
41 vma->vm_private_data = priv;
42 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
43
44 mutex_lock(&ufile->umap_lock);
45 list_add(new: &priv->list, head: &ufile->umaps);
46 mutex_unlock(lock: &ufile->umap_lock);
47}
48EXPORT_SYMBOL(rdma_umap_priv_init);
49
50/**
51 * rdma_user_mmap_io() - Map IO memory into a process
52 *
53 * @ucontext: associated user context
54 * @vma: the vma related to the current mmap call
55 * @pfn: pfn to map
56 * @size: size to map
57 * @prot: pgprot to use in remap call
58 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
59 * if mmap_entry is not used by the driver
60 *
61 * This is to be called by drivers as part of their mmap() functions if they
62 * wish to send something like PCI-E BAR memory to userspace.
63 *
64 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
65 * success.
66 */
67int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
68 unsigned long pfn, unsigned long size, pgprot_t prot,
69 struct rdma_user_mmap_entry *entry)
70{
71 struct ib_uverbs_file *ufile = ucontext->ufile;
72 struct rdma_umap_priv *priv;
73
74 if (!(vma->vm_flags & VM_SHARED))
75 return -EINVAL;
76
77 if (vma->vm_end - vma->vm_start != size)
78 return -EINVAL;
79
80 /* Driver is using this wrong, must be called by ib_uverbs_mmap */
81 if (WARN_ON(!vma->vm_file ||
82 vma->vm_file->private_data != ufile))
83 return -EINVAL;
84 lockdep_assert_held(&ufile->device->disassociate_srcu);
85
86 priv = kzalloc(size: sizeof(*priv), GFP_KERNEL);
87 if (!priv)
88 return -ENOMEM;
89
90 vma->vm_page_prot = prot;
91 if (io_remap_pfn_range(vma, addr: vma->vm_start, pfn, size, prot)) {
92 kfree(objp: priv);
93 return -EAGAIN;
94 }
95
96 rdma_umap_priv_init(priv, vma, entry);
97 return 0;
98}
99EXPORT_SYMBOL(rdma_user_mmap_io);
100
101/**
102 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
103 *
104 * @ucontext: associated user context
105 * @pgoff: The mmap offset >> PAGE_SHIFT
106 *
107 * This function is called when a user tries to mmap with an offset (returned
108 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
109 * rdma_user_mmap_entry was created by the function
110 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
111 * entry so that it won't be deleted from the xarray in the meantime.
112 *
113 * Return an reference to an entry if exists or NULL if there is no
114 * match. rdma_user_mmap_entry_put() must be called to put the reference.
115 */
116struct rdma_user_mmap_entry *
117rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
118 unsigned long pgoff)
119{
120 struct rdma_user_mmap_entry *entry;
121
122 if (pgoff > U32_MAX)
123 return NULL;
124
125 xa_lock(&ucontext->mmap_xa);
126
127 entry = xa_load(&ucontext->mmap_xa, index: pgoff);
128
129 /*
130 * If refcount is zero, entry is already being deleted, driver_removed
131 * indicates that the no further mmaps are possible and we waiting for
132 * the active VMAs to be closed.
133 */
134 if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
135 !kref_get_unless_zero(kref: &entry->ref))
136 goto err;
137
138 xa_unlock(&ucontext->mmap_xa);
139
140 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
141 pgoff, entry->npages);
142
143 return entry;
144
145err:
146 xa_unlock(&ucontext->mmap_xa);
147 return NULL;
148}
149EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
150
151/**
152 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
153 *
154 * @ucontext: associated user context
155 * @vma: the vma being mmap'd into
156 *
157 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
158 * checks that the VMA is correct.
159 */
160struct rdma_user_mmap_entry *
161rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
162 struct vm_area_struct *vma)
163{
164 struct rdma_user_mmap_entry *entry;
165
166 if (!(vma->vm_flags & VM_SHARED))
167 return NULL;
168 entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
169 if (!entry)
170 return NULL;
171 if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
172 rdma_user_mmap_entry_put(entry);
173 return NULL;
174 }
175 return entry;
176}
177EXPORT_SYMBOL(rdma_user_mmap_entry_get);
178
179static void rdma_user_mmap_entry_free(struct kref *kref)
180{
181 struct rdma_user_mmap_entry *entry =
182 container_of(kref, struct rdma_user_mmap_entry, ref);
183 struct ib_ucontext *ucontext = entry->ucontext;
184 unsigned long i;
185
186 /*
187 * Erase all entries occupied by this single entry, this is deferred
188 * until all VMA are closed so that the mmap offsets remain unique.
189 */
190 xa_lock(&ucontext->mmap_xa);
191 for (i = 0; i < entry->npages; i++)
192 __xa_erase(&ucontext->mmap_xa, index: entry->start_pgoff + i);
193 xa_unlock(&ucontext->mmap_xa);
194
195 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
196 entry->start_pgoff, entry->npages);
197
198 if (ucontext->device->ops.mmap_free)
199 ucontext->device->ops.mmap_free(entry);
200}
201
202/**
203 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
204 *
205 * @entry: an entry in the mmap_xa
206 *
207 * This function is called when the mapping is closed if it was
208 * an io mapping or when the driver is done with the entry for
209 * some other reason.
210 * Should be called after rdma_user_mmap_entry_get was called
211 * and entry is no longer needed. This function will erase the
212 * entry and free it if its refcnt reaches zero.
213 */
214void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
215{
216 kref_put(kref: &entry->ref, release: rdma_user_mmap_entry_free);
217}
218EXPORT_SYMBOL(rdma_user_mmap_entry_put);
219
220/**
221 * rdma_user_mmap_entry_remove() - Drop reference to entry and
222 * mark it as unmmapable
223 *
224 * @entry: the entry to insert into the mmap_xa
225 *
226 * Drivers can call this to prevent userspace from creating more mappings for
227 * entry, however existing mmaps continue to exist and ops->mmap_free() will
228 * not be called until all user mmaps are destroyed.
229 */
230void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
231{
232 if (!entry)
233 return;
234
235 xa_lock(&entry->ucontext->mmap_xa);
236 entry->driver_removed = true;
237 xa_unlock(&entry->ucontext->mmap_xa);
238 kref_put(kref: &entry->ref, release: rdma_user_mmap_entry_free);
239}
240EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
241
242/**
243 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
244 * in a given range.
245 *
246 * @ucontext: associated user context.
247 * @entry: the entry to insert into the mmap_xa
248 * @length: length of the address that will be mmapped
249 * @min_pgoff: minimum pgoff to be returned
250 * @max_pgoff: maximum pgoff to be returned
251 *
252 * This function should be called by drivers that use the rdma_user_mmap
253 * interface for implementing their mmap syscall A database of mmap offsets is
254 * handled in the core and helper functions are provided to insert entries
255 * into the database and extract entries when the user calls mmap with the
256 * given offset. The function allocates a unique page offset in a given range
257 * that should be provided to user, the user will use the offset to retrieve
258 * information such as address to be mapped and how.
259 *
260 * Return: 0 on success and -ENOMEM on failure
261 */
262int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
263 struct rdma_user_mmap_entry *entry,
264 size_t length, u32 min_pgoff,
265 u32 max_pgoff)
266{
267 struct ib_uverbs_file *ufile = ucontext->ufile;
268 XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
269 u32 xa_first, xa_last, npages;
270 int err;
271 u32 i;
272
273 if (!entry)
274 return -EINVAL;
275
276 kref_init(kref: &entry->ref);
277 entry->ucontext = ucontext;
278
279 /*
280 * We want the whole allocation to be done without interruption from a
281 * different thread. The allocation requires finding a free range and
282 * storing. During the xa_insert the lock could be released, possibly
283 * allowing another thread to choose the same range.
284 */
285 mutex_lock(&ufile->umap_lock);
286
287 xa_lock(&ucontext->mmap_xa);
288
289 /* We want to find an empty range */
290 npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
291 entry->npages = npages;
292 while (true) {
293 /* First find an empty index */
294 xas_find_marked(&xas, max: max_pgoff, XA_FREE_MARK);
295 if (xas.xa_node == XAS_RESTART)
296 goto err_unlock;
297
298 xa_first = xas.xa_index;
299
300 /* Is there enough room to have the range? */
301 if (check_add_overflow(xa_first, npages, &xa_last))
302 goto err_unlock;
303
304 /*
305 * Now look for the next present entry. If an entry doesn't
306 * exist, we found an empty range and can proceed.
307 */
308 xas_next_entry(xas: &xas, max: xa_last - 1);
309 if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
310 break;
311 }
312
313 for (i = xa_first; i < xa_last; i++) {
314 err = __xa_insert(&ucontext->mmap_xa, index: i, entry, GFP_KERNEL);
315 if (err)
316 goto err_undo;
317 }
318
319 /*
320 * Internally the kernel uses a page offset, in libc this is a byte
321 * offset. Drivers should not return pgoff to userspace.
322 */
323 entry->start_pgoff = xa_first;
324 xa_unlock(&ucontext->mmap_xa);
325 mutex_unlock(lock: &ufile->umap_lock);
326
327 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
328 entry->start_pgoff, npages);
329
330 return 0;
331
332err_undo:
333 for (; i > xa_first; i--)
334 __xa_erase(&ucontext->mmap_xa, index: i - 1);
335
336err_unlock:
337 xa_unlock(&ucontext->mmap_xa);
338 mutex_unlock(lock: &ufile->umap_lock);
339 return -ENOMEM;
340}
341EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
342
343/**
344 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
345 *
346 * @ucontext: associated user context.
347 * @entry: the entry to insert into the mmap_xa
348 * @length: length of the address that will be mmapped
349 *
350 * This function should be called by drivers that use the rdma_user_mmap
351 * interface for handling user mmapped addresses. The database is handled in
352 * the core and helper functions are provided to insert entries into the
353 * database and extract entries when the user calls mmap with the given offset.
354 * The function allocates a unique page offset that should be provided to user,
355 * the user will use the offset to retrieve information such as address to
356 * be mapped and how.
357 *
358 * Return: 0 on success and -ENOMEM on failure
359 */
360int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
361 struct rdma_user_mmap_entry *entry,
362 size_t length)
363{
364 return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
365 U32_MAX);
366}
367EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
368

source code of linux/drivers/infiniband/core/ib_core_uverbs.c