1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2/*
3 * Copyright(c) 2020 Cornelis Networks, Inc.
4 * Copyright(c) 2016 - 2017 Intel Corporation.
5 */
6
7#include <linux/list.h>
8#include <linux/rculist.h>
9#include <linux/mmu_notifier.h>
10#include <linux/interval_tree_generic.h>
11#include <linux/sched/mm.h>
12
13#include "mmu_rb.h"
14#include "trace.h"
15
16static unsigned long mmu_node_start(struct mmu_rb_node *);
17static unsigned long mmu_node_last(struct mmu_rb_node *);
18static int mmu_notifier_range_start(struct mmu_notifier *,
19 const struct mmu_notifier_range *);
20static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
21 unsigned long, unsigned long);
22static void release_immediate(struct kref *refcount);
23static void handle_remove(struct work_struct *work);
24
25static const struct mmu_notifier_ops mn_opts = {
26 .invalidate_range_start = mmu_notifier_range_start,
27};
28
29INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
30 mmu_node_start, mmu_node_last, static, __mmu_int_rb);
31
32static unsigned long mmu_node_start(struct mmu_rb_node *node)
33{
34 return node->addr & PAGE_MASK;
35}
36
37static unsigned long mmu_node_last(struct mmu_rb_node *node)
38{
39 return PAGE_ALIGN(node->addr + node->len) - 1;
40}
41
42int hfi1_mmu_rb_register(void *ops_arg,
43 struct mmu_rb_ops *ops,
44 struct workqueue_struct *wq,
45 struct mmu_rb_handler **handler)
46{
47 struct mmu_rb_handler *h;
48 void *free_ptr;
49 int ret;
50
51 free_ptr = kzalloc(size: sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
52 if (!free_ptr)
53 return -ENOMEM;
54
55 h = PTR_ALIGN(free_ptr, cache_line_size());
56 h->root = RB_ROOT_CACHED;
57 h->ops = ops;
58 h->ops_arg = ops_arg;
59 INIT_HLIST_NODE(h: &h->mn.hlist);
60 spin_lock_init(&h->lock);
61 h->mn.ops = &mn_opts;
62 INIT_WORK(&h->del_work, handle_remove);
63 INIT_LIST_HEAD(list: &h->del_list);
64 INIT_LIST_HEAD(list: &h->lru_list);
65 h->wq = wq;
66 h->free_ptr = free_ptr;
67
68 ret = mmu_notifier_register(subscription: &h->mn, current->mm);
69 if (ret) {
70 kfree(objp: free_ptr);
71 return ret;
72 }
73
74 *handler = h;
75 return 0;
76}
77
78void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
79{
80 struct mmu_rb_node *rbnode;
81 struct rb_node *node;
82 unsigned long flags;
83 struct list_head del_list;
84
85 /* Prevent freeing of mm until we are completely finished. */
86 mmgrab(mm: handler->mn.mm);
87
88 /* Unregister first so we don't get any more notifications. */
89 mmu_notifier_unregister(subscription: &handler->mn, mm: handler->mn.mm);
90
91 /*
92 * Make sure the wq delete handler is finished running. It will not
93 * be triggered once the mmu notifiers are unregistered above.
94 */
95 flush_work(work: &handler->del_work);
96
97 INIT_LIST_HEAD(list: &del_list);
98
99 spin_lock_irqsave(&handler->lock, flags);
100 while ((node = rb_first_cached(&handler->root))) {
101 rbnode = rb_entry(node, struct mmu_rb_node, node);
102 rb_erase_cached(node, root: &handler->root);
103 /* move from LRU list to delete list */
104 list_move(list: &rbnode->list, head: &del_list);
105 }
106 spin_unlock_irqrestore(lock: &handler->lock, flags);
107
108 while (!list_empty(head: &del_list)) {
109 rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
110 list_del(entry: &rbnode->list);
111 kref_put(kref: &rbnode->refcount, release: release_immediate);
112 }
113
114 /* Now the mm may be freed. */
115 mmdrop(mm: handler->mn.mm);
116
117 kfree(objp: handler->free_ptr);
118}
119
120int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
121 struct mmu_rb_node *mnode)
122{
123 struct mmu_rb_node *node;
124 unsigned long flags;
125 int ret = 0;
126
127 trace_hfi1_mmu_rb_insert(node: mnode);
128
129 if (current->mm != handler->mn.mm)
130 return -EPERM;
131
132 spin_lock_irqsave(&handler->lock, flags);
133 node = __mmu_rb_search(handler, mnode->addr, mnode->len);
134 if (node) {
135 ret = -EEXIST;
136 goto unlock;
137 }
138 __mmu_int_rb_insert(node: mnode, root: &handler->root);
139 list_add_tail(new: &mnode->list, head: &handler->lru_list);
140 mnode->handler = handler;
141unlock:
142 spin_unlock_irqrestore(lock: &handler->lock, flags);
143 return ret;
144}
145
146/* Caller must hold handler lock */
147struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
148 unsigned long addr, unsigned long len)
149{
150 struct mmu_rb_node *node;
151
152 trace_hfi1_mmu_rb_search(addr, len);
153 node = __mmu_int_rb_iter_first(root: &handler->root, start: addr, last: (addr + len) - 1);
154 if (node)
155 list_move_tail(list: &node->list, head: &handler->lru_list);
156 return node;
157}
158
159/* Caller must hold handler lock */
160static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
161 unsigned long addr,
162 unsigned long len)
163{
164 struct mmu_rb_node *node = NULL;
165
166 trace_hfi1_mmu_rb_search(addr, len);
167 if (!handler->ops->filter) {
168 node = __mmu_int_rb_iter_first(root: &handler->root, start: addr,
169 last: (addr + len) - 1);
170 } else {
171 for (node = __mmu_int_rb_iter_first(root: &handler->root, start: addr,
172 last: (addr + len) - 1);
173 node;
174 node = __mmu_int_rb_iter_next(node, start: addr,
175 last: (addr + len) - 1)) {
176 if (handler->ops->filter(node, addr, len))
177 return node;
178 }
179 }
180 return node;
181}
182
183/*
184 * Must NOT call while holding mnode->handler->lock.
185 * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
186 * spinlock.
187 */
188static void release_immediate(struct kref *refcount)
189{
190 struct mmu_rb_node *mnode =
191 container_of(refcount, struct mmu_rb_node, refcount);
192 trace_hfi1_mmu_release_node(node: mnode);
193 mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
194}
195
196/* Caller must hold mnode->handler->lock */
197static void release_nolock(struct kref *refcount)
198{
199 struct mmu_rb_node *mnode =
200 container_of(refcount, struct mmu_rb_node, refcount);
201 list_move(list: &mnode->list, head: &mnode->handler->del_list);
202 queue_work(wq: mnode->handler->wq, work: &mnode->handler->del_work);
203}
204
205/*
206 * struct mmu_rb_node->refcount kref_put() callback.
207 * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
208 * handler->del_work on handler->wq.
209 * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
210 * Acquires mmu_rb_node->handler->lock; do not call while already holding
211 * handler->lock.
212 */
213void hfi1_mmu_rb_release(struct kref *refcount)
214{
215 struct mmu_rb_node *mnode =
216 container_of(refcount, struct mmu_rb_node, refcount);
217 struct mmu_rb_handler *handler = mnode->handler;
218 unsigned long flags;
219
220 spin_lock_irqsave(&handler->lock, flags);
221 list_move(list: &mnode->list, head: &mnode->handler->del_list);
222 spin_unlock_irqrestore(lock: &handler->lock, flags);
223 queue_work(wq: handler->wq, work: &handler->del_work);
224}
225
226void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
227{
228 struct mmu_rb_node *rbnode, *ptr;
229 struct list_head del_list;
230 unsigned long flags;
231 bool stop = false;
232
233 if (current->mm != handler->mn.mm)
234 return;
235
236 INIT_LIST_HEAD(list: &del_list);
237
238 spin_lock_irqsave(&handler->lock, flags);
239 list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
240 /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
241 if (kref_read(kref: &rbnode->refcount) > 1)
242 continue;
243
244 if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
245 &stop)) {
246 __mmu_int_rb_remove(node: rbnode, root: &handler->root);
247 /* move from LRU list to delete list */
248 list_move(list: &rbnode->list, head: &del_list);
249 }
250 if (stop)
251 break;
252 }
253 spin_unlock_irqrestore(lock: &handler->lock, flags);
254
255 list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
256 trace_hfi1_mmu_rb_evict(node: rbnode);
257 kref_put(kref: &rbnode->refcount, release: release_immediate);
258 }
259}
260
261static int mmu_notifier_range_start(struct mmu_notifier *mn,
262 const struct mmu_notifier_range *range)
263{
264 struct mmu_rb_handler *handler =
265 container_of(mn, struct mmu_rb_handler, mn);
266 struct rb_root_cached *root = &handler->root;
267 struct mmu_rb_node *node, *ptr = NULL;
268 unsigned long flags;
269
270 spin_lock_irqsave(&handler->lock, flags);
271 for (node = __mmu_int_rb_iter_first(root, start: range->start, last: range->end-1);
272 node; node = ptr) {
273 /* Guard against node removal. */
274 ptr = __mmu_int_rb_iter_next(node, start: range->start,
275 last: range->end - 1);
276 trace_hfi1_mmu_mem_invalidate(node);
277 /* Remove from rb tree and lru_list. */
278 __mmu_int_rb_remove(node, root);
279 list_del_init(entry: &node->list);
280 kref_put(kref: &node->refcount, release: release_nolock);
281 }
282 spin_unlock_irqrestore(lock: &handler->lock, flags);
283
284 return 0;
285}
286
287/*
288 * Work queue function to remove all nodes that have been queued up to
289 * be removed. The key feature is that mm->mmap_lock is not being held
290 * and the remove callback can sleep while taking it, if needed.
291 */
292static void handle_remove(struct work_struct *work)
293{
294 struct mmu_rb_handler *handler = container_of(work,
295 struct mmu_rb_handler,
296 del_work);
297 struct list_head del_list;
298 unsigned long flags;
299 struct mmu_rb_node *node;
300
301 /* remove anything that is queued to get removed */
302 spin_lock_irqsave(&handler->lock, flags);
303 list_replace_init(old: &handler->del_list, new: &del_list);
304 spin_unlock_irqrestore(lock: &handler->lock, flags);
305
306 while (!list_empty(head: &del_list)) {
307 node = list_first_entry(&del_list, struct mmu_rb_node, list);
308 list_del(entry: &node->list);
309 trace_hfi1_mmu_release_node(node);
310 handler->ops->remove(handler->ops_arg, node);
311 }
312}
313

source code of linux/drivers/infiniband/hw/hfi1/mmu_rb.c