1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | /* |
3 | * Copyright(c) 2020 Cornelis Networks, Inc. |
4 | * Copyright(c) 2016 - 2017 Intel Corporation. |
5 | */ |
6 | |
7 | #include <linux/list.h> |
8 | #include <linux/rculist.h> |
9 | #include <linux/mmu_notifier.h> |
10 | #include <linux/interval_tree_generic.h> |
11 | #include <linux/sched/mm.h> |
12 | |
13 | #include "mmu_rb.h" |
14 | #include "trace.h" |
15 | |
16 | static unsigned long mmu_node_start(struct mmu_rb_node *); |
17 | static unsigned long mmu_node_last(struct mmu_rb_node *); |
18 | static int mmu_notifier_range_start(struct mmu_notifier *, |
19 | const struct mmu_notifier_range *); |
20 | static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, |
21 | unsigned long, unsigned long); |
22 | static void release_immediate(struct kref *refcount); |
23 | static void handle_remove(struct work_struct *work); |
24 | |
25 | static const struct mmu_notifier_ops mn_opts = { |
26 | .invalidate_range_start = mmu_notifier_range_start, |
27 | }; |
28 | |
29 | INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last, |
30 | mmu_node_start, mmu_node_last, static, __mmu_int_rb); |
31 | |
32 | static unsigned long mmu_node_start(struct mmu_rb_node *node) |
33 | { |
34 | return node->addr & PAGE_MASK; |
35 | } |
36 | |
37 | static unsigned long mmu_node_last(struct mmu_rb_node *node) |
38 | { |
39 | return PAGE_ALIGN(node->addr + node->len) - 1; |
40 | } |
41 | |
42 | int hfi1_mmu_rb_register(void *ops_arg, |
43 | struct mmu_rb_ops *ops, |
44 | struct workqueue_struct *wq, |
45 | struct mmu_rb_handler **handler) |
46 | { |
47 | struct mmu_rb_handler *h; |
48 | void *free_ptr; |
49 | int ret; |
50 | |
51 | free_ptr = kzalloc(size: sizeof(*h) + cache_line_size() - 1, GFP_KERNEL); |
52 | if (!free_ptr) |
53 | return -ENOMEM; |
54 | |
55 | h = PTR_ALIGN(free_ptr, cache_line_size()); |
56 | h->root = RB_ROOT_CACHED; |
57 | h->ops = ops; |
58 | h->ops_arg = ops_arg; |
59 | INIT_HLIST_NODE(h: &h->mn.hlist); |
60 | spin_lock_init(&h->lock); |
61 | h->mn.ops = &mn_opts; |
62 | INIT_WORK(&h->del_work, handle_remove); |
63 | INIT_LIST_HEAD(list: &h->del_list); |
64 | INIT_LIST_HEAD(list: &h->lru_list); |
65 | h->wq = wq; |
66 | h->free_ptr = free_ptr; |
67 | |
68 | ret = mmu_notifier_register(subscription: &h->mn, current->mm); |
69 | if (ret) { |
70 | kfree(objp: free_ptr); |
71 | return ret; |
72 | } |
73 | |
74 | *handler = h; |
75 | return 0; |
76 | } |
77 | |
78 | void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) |
79 | { |
80 | struct mmu_rb_node *rbnode; |
81 | struct rb_node *node; |
82 | unsigned long flags; |
83 | struct list_head del_list; |
84 | |
85 | /* Prevent freeing of mm until we are completely finished. */ |
86 | mmgrab(mm: handler->mn.mm); |
87 | |
88 | /* Unregister first so we don't get any more notifications. */ |
89 | mmu_notifier_unregister(subscription: &handler->mn, mm: handler->mn.mm); |
90 | |
91 | /* |
92 | * Make sure the wq delete handler is finished running. It will not |
93 | * be triggered once the mmu notifiers are unregistered above. |
94 | */ |
95 | flush_work(work: &handler->del_work); |
96 | |
97 | INIT_LIST_HEAD(list: &del_list); |
98 | |
99 | spin_lock_irqsave(&handler->lock, flags); |
100 | while ((node = rb_first_cached(&handler->root))) { |
101 | rbnode = rb_entry(node, struct mmu_rb_node, node); |
102 | rb_erase_cached(node, root: &handler->root); |
103 | /* move from LRU list to delete list */ |
104 | list_move(list: &rbnode->list, head: &del_list); |
105 | } |
106 | spin_unlock_irqrestore(lock: &handler->lock, flags); |
107 | |
108 | while (!list_empty(head: &del_list)) { |
109 | rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); |
110 | list_del(entry: &rbnode->list); |
111 | kref_put(kref: &rbnode->refcount, release: release_immediate); |
112 | } |
113 | |
114 | /* Now the mm may be freed. */ |
115 | mmdrop(mm: handler->mn.mm); |
116 | |
117 | kfree(objp: handler->free_ptr); |
118 | } |
119 | |
120 | int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, |
121 | struct mmu_rb_node *mnode) |
122 | { |
123 | struct mmu_rb_node *node; |
124 | unsigned long flags; |
125 | int ret = 0; |
126 | |
127 | trace_hfi1_mmu_rb_insert(node: mnode); |
128 | |
129 | if (current->mm != handler->mn.mm) |
130 | return -EPERM; |
131 | |
132 | spin_lock_irqsave(&handler->lock, flags); |
133 | node = __mmu_rb_search(handler, mnode->addr, mnode->len); |
134 | if (node) { |
135 | ret = -EEXIST; |
136 | goto unlock; |
137 | } |
138 | __mmu_int_rb_insert(node: mnode, root: &handler->root); |
139 | list_add_tail(new: &mnode->list, head: &handler->lru_list); |
140 | mnode->handler = handler; |
141 | unlock: |
142 | spin_unlock_irqrestore(lock: &handler->lock, flags); |
143 | return ret; |
144 | } |
145 | |
146 | /* Caller must hold handler lock */ |
147 | struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler, |
148 | unsigned long addr, unsigned long len) |
149 | { |
150 | struct mmu_rb_node *node; |
151 | |
152 | trace_hfi1_mmu_rb_search(addr, len); |
153 | node = __mmu_int_rb_iter_first(root: &handler->root, start: addr, last: (addr + len) - 1); |
154 | if (node) |
155 | list_move_tail(list: &node->list, head: &handler->lru_list); |
156 | return node; |
157 | } |
158 | |
159 | /* Caller must hold handler lock */ |
160 | static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, |
161 | unsigned long addr, |
162 | unsigned long len) |
163 | { |
164 | struct mmu_rb_node *node = NULL; |
165 | |
166 | trace_hfi1_mmu_rb_search(addr, len); |
167 | if (!handler->ops->filter) { |
168 | node = __mmu_int_rb_iter_first(root: &handler->root, start: addr, |
169 | last: (addr + len) - 1); |
170 | } else { |
171 | for (node = __mmu_int_rb_iter_first(root: &handler->root, start: addr, |
172 | last: (addr + len) - 1); |
173 | node; |
174 | node = __mmu_int_rb_iter_next(node, start: addr, |
175 | last: (addr + len) - 1)) { |
176 | if (handler->ops->filter(node, addr, len)) |
177 | return node; |
178 | } |
179 | } |
180 | return node; |
181 | } |
182 | |
183 | /* |
184 | * Must NOT call while holding mnode->handler->lock. |
185 | * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a |
186 | * spinlock. |
187 | */ |
188 | static void release_immediate(struct kref *refcount) |
189 | { |
190 | struct mmu_rb_node *mnode = |
191 | container_of(refcount, struct mmu_rb_node, refcount); |
192 | trace_hfi1_mmu_release_node(node: mnode); |
193 | mnode->handler->ops->remove(mnode->handler->ops_arg, mnode); |
194 | } |
195 | |
196 | /* Caller must hold mnode->handler->lock */ |
197 | static void release_nolock(struct kref *refcount) |
198 | { |
199 | struct mmu_rb_node *mnode = |
200 | container_of(refcount, struct mmu_rb_node, refcount); |
201 | list_move(list: &mnode->list, head: &mnode->handler->del_list); |
202 | queue_work(wq: mnode->handler->wq, work: &mnode->handler->del_work); |
203 | } |
204 | |
205 | /* |
206 | * struct mmu_rb_node->refcount kref_put() callback. |
207 | * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues |
208 | * handler->del_work on handler->wq. |
209 | * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root. |
210 | * Acquires mmu_rb_node->handler->lock; do not call while already holding |
211 | * handler->lock. |
212 | */ |
213 | void hfi1_mmu_rb_release(struct kref *refcount) |
214 | { |
215 | struct mmu_rb_node *mnode = |
216 | container_of(refcount, struct mmu_rb_node, refcount); |
217 | struct mmu_rb_handler *handler = mnode->handler; |
218 | unsigned long flags; |
219 | |
220 | spin_lock_irqsave(&handler->lock, flags); |
221 | list_move(list: &mnode->list, head: &mnode->handler->del_list); |
222 | spin_unlock_irqrestore(lock: &handler->lock, flags); |
223 | queue_work(wq: handler->wq, work: &handler->del_work); |
224 | } |
225 | |
226 | void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) |
227 | { |
228 | struct mmu_rb_node *rbnode, *ptr; |
229 | struct list_head del_list; |
230 | unsigned long flags; |
231 | bool stop = false; |
232 | |
233 | if (current->mm != handler->mn.mm) |
234 | return; |
235 | |
236 | INIT_LIST_HEAD(list: &del_list); |
237 | |
238 | spin_lock_irqsave(&handler->lock, flags); |
239 | list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) { |
240 | /* refcount == 1 implies mmu_rb_handler has only rbnode ref */ |
241 | if (kref_read(kref: &rbnode->refcount) > 1) |
242 | continue; |
243 | |
244 | if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, |
245 | &stop)) { |
246 | __mmu_int_rb_remove(node: rbnode, root: &handler->root); |
247 | /* move from LRU list to delete list */ |
248 | list_move(list: &rbnode->list, head: &del_list); |
249 | } |
250 | if (stop) |
251 | break; |
252 | } |
253 | spin_unlock_irqrestore(lock: &handler->lock, flags); |
254 | |
255 | list_for_each_entry_safe(rbnode, ptr, &del_list, list) { |
256 | trace_hfi1_mmu_rb_evict(node: rbnode); |
257 | kref_put(kref: &rbnode->refcount, release: release_immediate); |
258 | } |
259 | } |
260 | |
261 | static int mmu_notifier_range_start(struct mmu_notifier *mn, |
262 | const struct mmu_notifier_range *range) |
263 | { |
264 | struct mmu_rb_handler *handler = |
265 | container_of(mn, struct mmu_rb_handler, mn); |
266 | struct rb_root_cached *root = &handler->root; |
267 | struct mmu_rb_node *node, *ptr = NULL; |
268 | unsigned long flags; |
269 | |
270 | spin_lock_irqsave(&handler->lock, flags); |
271 | for (node = __mmu_int_rb_iter_first(root, start: range->start, last: range->end-1); |
272 | node; node = ptr) { |
273 | /* Guard against node removal. */ |
274 | ptr = __mmu_int_rb_iter_next(node, start: range->start, |
275 | last: range->end - 1); |
276 | trace_hfi1_mmu_mem_invalidate(node); |
277 | /* Remove from rb tree and lru_list. */ |
278 | __mmu_int_rb_remove(node, root); |
279 | list_del_init(entry: &node->list); |
280 | kref_put(kref: &node->refcount, release: release_nolock); |
281 | } |
282 | spin_unlock_irqrestore(lock: &handler->lock, flags); |
283 | |
284 | return 0; |
285 | } |
286 | |
287 | /* |
288 | * Work queue function to remove all nodes that have been queued up to |
289 | * be removed. The key feature is that mm->mmap_lock is not being held |
290 | * and the remove callback can sleep while taking it, if needed. |
291 | */ |
292 | static void handle_remove(struct work_struct *work) |
293 | { |
294 | struct mmu_rb_handler *handler = container_of(work, |
295 | struct mmu_rb_handler, |
296 | del_work); |
297 | struct list_head del_list; |
298 | unsigned long flags; |
299 | struct mmu_rb_node *node; |
300 | |
301 | /* remove anything that is queued to get removed */ |
302 | spin_lock_irqsave(&handler->lock, flags); |
303 | list_replace_init(old: &handler->del_list, new: &del_list); |
304 | spin_unlock_irqrestore(lock: &handler->lock, flags); |
305 | |
306 | while (!list_empty(head: &del_list)) { |
307 | node = list_first_entry(&del_list, struct mmu_rb_node, list); |
308 | list_del(entry: &node->list); |
309 | trace_hfi1_mmu_release_node(node); |
310 | handler->ops->remove(handler->ops_arg, node); |
311 | } |
312 | } |
313 | |