1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright © 2006-2009, Intel Corporation. |
4 | * |
5 | * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
6 | */ |
7 | |
8 | #include <linux/iova.h> |
9 | #include <linux/module.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/smp.h> |
12 | #include <linux/bitops.h> |
13 | #include <linux/cpu.h> |
14 | |
15 | /* The anchor node sits above the top of the usable address space */ |
16 | #define IOVA_ANCHOR ~0UL |
17 | |
18 | #define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ |
19 | |
20 | static bool iova_rcache_insert(struct iova_domain *iovad, |
21 | unsigned long pfn, |
22 | unsigned long size); |
23 | static unsigned long iova_rcache_get(struct iova_domain *iovad, |
24 | unsigned long size, |
25 | unsigned long limit_pfn); |
26 | static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); |
27 | static void free_iova_rcaches(struct iova_domain *iovad); |
28 | |
29 | unsigned long iova_rcache_range(void) |
30 | { |
31 | return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); |
32 | } |
33 | |
34 | static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) |
35 | { |
36 | struct iova_domain *iovad; |
37 | |
38 | iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); |
39 | |
40 | free_cpu_cached_iovas(cpu, iovad); |
41 | return 0; |
42 | } |
43 | |
44 | static void free_global_cached_iovas(struct iova_domain *iovad); |
45 | |
46 | static struct iova *to_iova(struct rb_node *node) |
47 | { |
48 | return rb_entry(node, struct iova, node); |
49 | } |
50 | |
51 | void |
52 | init_iova_domain(struct iova_domain *iovad, unsigned long granule, |
53 | unsigned long start_pfn) |
54 | { |
55 | /* |
56 | * IOVA granularity will normally be equal to the smallest |
57 | * supported IOMMU page size; both *must* be capable of |
58 | * representing individual CPU pages exactly. |
59 | */ |
60 | BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); |
61 | |
62 | spin_lock_init(&iovad->iova_rbtree_lock); |
63 | iovad->rbroot = RB_ROOT; |
64 | iovad->cached_node = &iovad->anchor.node; |
65 | iovad->cached32_node = &iovad->anchor.node; |
66 | iovad->granule = granule; |
67 | iovad->start_pfn = start_pfn; |
68 | iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); |
69 | iovad->max32_alloc_size = iovad->dma_32bit_pfn; |
70 | iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; |
71 | rb_link_node(node: &iovad->anchor.node, NULL, rb_link: &iovad->rbroot.rb_node); |
72 | rb_insert_color(&iovad->anchor.node, &iovad->rbroot); |
73 | } |
74 | EXPORT_SYMBOL_GPL(init_iova_domain); |
75 | |
76 | static struct rb_node * |
77 | __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) |
78 | { |
79 | if (limit_pfn <= iovad->dma_32bit_pfn) |
80 | return iovad->cached32_node; |
81 | |
82 | return iovad->cached_node; |
83 | } |
84 | |
85 | static void |
86 | __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) |
87 | { |
88 | if (new->pfn_hi < iovad->dma_32bit_pfn) |
89 | iovad->cached32_node = &new->node; |
90 | else |
91 | iovad->cached_node = &new->node; |
92 | } |
93 | |
94 | static void |
95 | __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) |
96 | { |
97 | struct iova *cached_iova; |
98 | |
99 | cached_iova = to_iova(node: iovad->cached32_node); |
100 | if (free == cached_iova || |
101 | (free->pfn_hi < iovad->dma_32bit_pfn && |
102 | free->pfn_lo >= cached_iova->pfn_lo)) |
103 | iovad->cached32_node = rb_next(&free->node); |
104 | |
105 | if (free->pfn_lo < iovad->dma_32bit_pfn) |
106 | iovad->max32_alloc_size = iovad->dma_32bit_pfn; |
107 | |
108 | cached_iova = to_iova(node: iovad->cached_node); |
109 | if (free->pfn_lo >= cached_iova->pfn_lo) |
110 | iovad->cached_node = rb_next(&free->node); |
111 | } |
112 | |
113 | static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn) |
114 | { |
115 | struct rb_node *node, *next; |
116 | /* |
117 | * Ideally what we'd like to judge here is whether limit_pfn is close |
118 | * enough to the highest-allocated IOVA that starting the allocation |
119 | * walk from the anchor node will be quicker than this initial work to |
120 | * find an exact starting point (especially if that ends up being the |
121 | * anchor node anyway). This is an incredibly crude approximation which |
122 | * only really helps the most likely case, but is at least trivially easy. |
123 | */ |
124 | if (limit_pfn > iovad->dma_32bit_pfn) |
125 | return &iovad->anchor.node; |
126 | |
127 | node = iovad->rbroot.rb_node; |
128 | while (to_iova(node)->pfn_hi < limit_pfn) |
129 | node = node->rb_right; |
130 | |
131 | search_left: |
132 | while (node->rb_left && to_iova(node: node->rb_left)->pfn_lo >= limit_pfn) |
133 | node = node->rb_left; |
134 | |
135 | if (!node->rb_left) |
136 | return node; |
137 | |
138 | next = node->rb_left; |
139 | while (next->rb_right) { |
140 | next = next->rb_right; |
141 | if (to_iova(node: next)->pfn_lo >= limit_pfn) { |
142 | node = next; |
143 | goto search_left; |
144 | } |
145 | } |
146 | |
147 | return node; |
148 | } |
149 | |
150 | /* Insert the iova into domain rbtree by holding writer lock */ |
151 | static void |
152 | iova_insert_rbtree(struct rb_root *root, struct iova *iova, |
153 | struct rb_node *start) |
154 | { |
155 | struct rb_node **new, *parent = NULL; |
156 | |
157 | new = (start) ? &start : &(root->rb_node); |
158 | /* Figure out where to put new node */ |
159 | while (*new) { |
160 | struct iova *this = to_iova(node: *new); |
161 | |
162 | parent = *new; |
163 | |
164 | if (iova->pfn_lo < this->pfn_lo) |
165 | new = &((*new)->rb_left); |
166 | else if (iova->pfn_lo > this->pfn_lo) |
167 | new = &((*new)->rb_right); |
168 | else { |
169 | WARN_ON(1); /* this should not happen */ |
170 | return; |
171 | } |
172 | } |
173 | /* Add new node and rebalance tree. */ |
174 | rb_link_node(node: &iova->node, parent, rb_link: new); |
175 | rb_insert_color(&iova->node, root); |
176 | } |
177 | |
178 | static int __alloc_and_insert_iova_range(struct iova_domain *iovad, |
179 | unsigned long size, unsigned long limit_pfn, |
180 | struct iova *new, bool size_aligned) |
181 | { |
182 | struct rb_node *curr, *prev; |
183 | struct iova *curr_iova; |
184 | unsigned long flags; |
185 | unsigned long new_pfn, retry_pfn; |
186 | unsigned long align_mask = ~0UL; |
187 | unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn; |
188 | |
189 | if (size_aligned) |
190 | align_mask <<= fls_long(l: size - 1); |
191 | |
192 | /* Walk the tree backwards */ |
193 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
194 | if (limit_pfn <= iovad->dma_32bit_pfn && |
195 | size >= iovad->max32_alloc_size) |
196 | goto iova32_full; |
197 | |
198 | curr = __get_cached_rbnode(iovad, limit_pfn); |
199 | curr_iova = to_iova(node: curr); |
200 | retry_pfn = curr_iova->pfn_hi; |
201 | |
202 | retry: |
203 | do { |
204 | high_pfn = min(high_pfn, curr_iova->pfn_lo); |
205 | new_pfn = (high_pfn - size) & align_mask; |
206 | prev = curr; |
207 | curr = rb_prev(curr); |
208 | curr_iova = to_iova(node: curr); |
209 | } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn); |
210 | |
211 | if (high_pfn < size || new_pfn < low_pfn) { |
212 | if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { |
213 | high_pfn = limit_pfn; |
214 | low_pfn = retry_pfn + 1; |
215 | curr = iova_find_limit(iovad, limit_pfn); |
216 | curr_iova = to_iova(node: curr); |
217 | goto retry; |
218 | } |
219 | iovad->max32_alloc_size = size; |
220 | goto iova32_full; |
221 | } |
222 | |
223 | /* pfn_lo will point to size aligned address if size_aligned is set */ |
224 | new->pfn_lo = new_pfn; |
225 | new->pfn_hi = new->pfn_lo + size - 1; |
226 | |
227 | /* If we have 'prev', it's a valid place to start the insertion. */ |
228 | iova_insert_rbtree(root: &iovad->rbroot, iova: new, start: prev); |
229 | __cached_rbnode_insert_update(iovad, new); |
230 | |
231 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
232 | return 0; |
233 | |
234 | iova32_full: |
235 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
236 | return -ENOMEM; |
237 | } |
238 | |
239 | static struct kmem_cache *iova_cache; |
240 | static unsigned int iova_cache_users; |
241 | static DEFINE_MUTEX(iova_cache_mutex); |
242 | |
243 | static struct iova *alloc_iova_mem(void) |
244 | { |
245 | return kmem_cache_zalloc(k: iova_cache, GFP_ATOMIC | __GFP_NOWARN); |
246 | } |
247 | |
248 | static void free_iova_mem(struct iova *iova) |
249 | { |
250 | if (iova->pfn_lo != IOVA_ANCHOR) |
251 | kmem_cache_free(s: iova_cache, objp: iova); |
252 | } |
253 | |
254 | int iova_cache_get(void) |
255 | { |
256 | mutex_lock(&iova_cache_mutex); |
257 | if (!iova_cache_users) { |
258 | int ret; |
259 | |
260 | ret = cpuhp_setup_state_multi(state: CPUHP_IOMMU_IOVA_DEAD, name: "iommu/iova:dead" , NULL, |
261 | teardown: iova_cpuhp_dead); |
262 | if (ret) { |
263 | mutex_unlock(lock: &iova_cache_mutex); |
264 | pr_err("Couldn't register cpuhp handler\n" ); |
265 | return ret; |
266 | } |
267 | |
268 | iova_cache = kmem_cache_create( |
269 | name: "iommu_iova" , size: sizeof(struct iova), align: 0, |
270 | SLAB_HWCACHE_ALIGN, NULL); |
271 | if (!iova_cache) { |
272 | cpuhp_remove_multi_state(state: CPUHP_IOMMU_IOVA_DEAD); |
273 | mutex_unlock(lock: &iova_cache_mutex); |
274 | pr_err("Couldn't create iova cache\n" ); |
275 | return -ENOMEM; |
276 | } |
277 | } |
278 | |
279 | iova_cache_users++; |
280 | mutex_unlock(lock: &iova_cache_mutex); |
281 | |
282 | return 0; |
283 | } |
284 | EXPORT_SYMBOL_GPL(iova_cache_get); |
285 | |
286 | void iova_cache_put(void) |
287 | { |
288 | mutex_lock(&iova_cache_mutex); |
289 | if (WARN_ON(!iova_cache_users)) { |
290 | mutex_unlock(lock: &iova_cache_mutex); |
291 | return; |
292 | } |
293 | iova_cache_users--; |
294 | if (!iova_cache_users) { |
295 | cpuhp_remove_multi_state(state: CPUHP_IOMMU_IOVA_DEAD); |
296 | kmem_cache_destroy(s: iova_cache); |
297 | } |
298 | mutex_unlock(lock: &iova_cache_mutex); |
299 | } |
300 | EXPORT_SYMBOL_GPL(iova_cache_put); |
301 | |
302 | /** |
303 | * alloc_iova - allocates an iova |
304 | * @iovad: - iova domain in question |
305 | * @size: - size of page frames to allocate |
306 | * @limit_pfn: - max limit address |
307 | * @size_aligned: - set if size_aligned address range is required |
308 | * This function allocates an iova in the range iovad->start_pfn to limit_pfn, |
309 | * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned |
310 | * flag is set then the allocated address iova->pfn_lo will be naturally |
311 | * aligned on roundup_power_of_two(size). |
312 | */ |
313 | struct iova * |
314 | alloc_iova(struct iova_domain *iovad, unsigned long size, |
315 | unsigned long limit_pfn, |
316 | bool size_aligned) |
317 | { |
318 | struct iova *new_iova; |
319 | int ret; |
320 | |
321 | new_iova = alloc_iova_mem(); |
322 | if (!new_iova) |
323 | return NULL; |
324 | |
325 | ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn: limit_pfn + 1, |
326 | new: new_iova, size_aligned); |
327 | |
328 | if (ret) { |
329 | free_iova_mem(iova: new_iova); |
330 | return NULL; |
331 | } |
332 | |
333 | return new_iova; |
334 | } |
335 | EXPORT_SYMBOL_GPL(alloc_iova); |
336 | |
337 | static struct iova * |
338 | private_find_iova(struct iova_domain *iovad, unsigned long pfn) |
339 | { |
340 | struct rb_node *node = iovad->rbroot.rb_node; |
341 | |
342 | assert_spin_locked(&iovad->iova_rbtree_lock); |
343 | |
344 | while (node) { |
345 | struct iova *iova = to_iova(node); |
346 | |
347 | if (pfn < iova->pfn_lo) |
348 | node = node->rb_left; |
349 | else if (pfn > iova->pfn_hi) |
350 | node = node->rb_right; |
351 | else |
352 | return iova; /* pfn falls within iova's range */ |
353 | } |
354 | |
355 | return NULL; |
356 | } |
357 | |
358 | static void remove_iova(struct iova_domain *iovad, struct iova *iova) |
359 | { |
360 | assert_spin_locked(&iovad->iova_rbtree_lock); |
361 | __cached_rbnode_delete_update(iovad, free: iova); |
362 | rb_erase(&iova->node, &iovad->rbroot); |
363 | } |
364 | |
365 | /** |
366 | * find_iova - finds an iova for a given pfn |
367 | * @iovad: - iova domain in question. |
368 | * @pfn: - page frame number |
369 | * This function finds and returns an iova belonging to the |
370 | * given domain which matches the given pfn. |
371 | */ |
372 | struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) |
373 | { |
374 | unsigned long flags; |
375 | struct iova *iova; |
376 | |
377 | /* Take the lock so that no other thread is manipulating the rbtree */ |
378 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
379 | iova = private_find_iova(iovad, pfn); |
380 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
381 | return iova; |
382 | } |
383 | EXPORT_SYMBOL_GPL(find_iova); |
384 | |
385 | /** |
386 | * __free_iova - frees the given iova |
387 | * @iovad: iova domain in question. |
388 | * @iova: iova in question. |
389 | * Frees the given iova belonging to the giving domain |
390 | */ |
391 | void |
392 | __free_iova(struct iova_domain *iovad, struct iova *iova) |
393 | { |
394 | unsigned long flags; |
395 | |
396 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
397 | remove_iova(iovad, iova); |
398 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
399 | free_iova_mem(iova); |
400 | } |
401 | EXPORT_SYMBOL_GPL(__free_iova); |
402 | |
403 | /** |
404 | * free_iova - finds and frees the iova for a given pfn |
405 | * @iovad: - iova domain in question. |
406 | * @pfn: - pfn that is allocated previously |
407 | * This functions finds an iova for a given pfn and then |
408 | * frees the iova from that domain. |
409 | */ |
410 | void |
411 | free_iova(struct iova_domain *iovad, unsigned long pfn) |
412 | { |
413 | unsigned long flags; |
414 | struct iova *iova; |
415 | |
416 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
417 | iova = private_find_iova(iovad, pfn); |
418 | if (!iova) { |
419 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
420 | return; |
421 | } |
422 | remove_iova(iovad, iova); |
423 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
424 | free_iova_mem(iova); |
425 | } |
426 | EXPORT_SYMBOL_GPL(free_iova); |
427 | |
428 | /** |
429 | * alloc_iova_fast - allocates an iova from rcache |
430 | * @iovad: - iova domain in question |
431 | * @size: - size of page frames to allocate |
432 | * @limit_pfn: - max limit address |
433 | * @flush_rcache: - set to flush rcache on regular allocation failure |
434 | * This function tries to satisfy an iova allocation from the rcache, |
435 | * and falls back to regular allocation on failure. If regular allocation |
436 | * fails too and the flush_rcache flag is set then the rcache will be flushed. |
437 | */ |
438 | unsigned long |
439 | alloc_iova_fast(struct iova_domain *iovad, unsigned long size, |
440 | unsigned long limit_pfn, bool flush_rcache) |
441 | { |
442 | unsigned long iova_pfn; |
443 | struct iova *new_iova; |
444 | |
445 | /* |
446 | * Freeing non-power-of-two-sized allocations back into the IOVA caches |
447 | * will come back to bite us badly, so we have to waste a bit of space |
448 | * rounding up anything cacheable to make sure that can't happen. The |
449 | * order of the unadjusted size will still match upon freeing. |
450 | */ |
451 | if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) |
452 | size = roundup_pow_of_two(size); |
453 | |
454 | iova_pfn = iova_rcache_get(iovad, size, limit_pfn: limit_pfn + 1); |
455 | if (iova_pfn) |
456 | return iova_pfn; |
457 | |
458 | retry: |
459 | new_iova = alloc_iova(iovad, size, limit_pfn, true); |
460 | if (!new_iova) { |
461 | unsigned int cpu; |
462 | |
463 | if (!flush_rcache) |
464 | return 0; |
465 | |
466 | /* Try replenishing IOVAs by flushing rcache. */ |
467 | flush_rcache = false; |
468 | for_each_online_cpu(cpu) |
469 | free_cpu_cached_iovas(cpu, iovad); |
470 | free_global_cached_iovas(iovad); |
471 | goto retry; |
472 | } |
473 | |
474 | return new_iova->pfn_lo; |
475 | } |
476 | EXPORT_SYMBOL_GPL(alloc_iova_fast); |
477 | |
478 | /** |
479 | * free_iova_fast - free iova pfn range into rcache |
480 | * @iovad: - iova domain in question. |
481 | * @pfn: - pfn that is allocated previously |
482 | * @size: - # of pages in range |
483 | * This functions frees an iova range by trying to put it into the rcache, |
484 | * falling back to regular iova deallocation via free_iova() if this fails. |
485 | */ |
486 | void |
487 | free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) |
488 | { |
489 | if (iova_rcache_insert(iovad, pfn, size)) |
490 | return; |
491 | |
492 | free_iova(iovad, pfn); |
493 | } |
494 | EXPORT_SYMBOL_GPL(free_iova_fast); |
495 | |
496 | static void iova_domain_free_rcaches(struct iova_domain *iovad) |
497 | { |
498 | cpuhp_state_remove_instance_nocalls(state: CPUHP_IOMMU_IOVA_DEAD, |
499 | node: &iovad->cpuhp_dead); |
500 | free_iova_rcaches(iovad); |
501 | } |
502 | |
503 | /** |
504 | * put_iova_domain - destroys the iova domain |
505 | * @iovad: - iova domain in question. |
506 | * All the iova's in that domain are destroyed. |
507 | */ |
508 | void put_iova_domain(struct iova_domain *iovad) |
509 | { |
510 | struct iova *iova, *tmp; |
511 | |
512 | if (iovad->rcaches) |
513 | iova_domain_free_rcaches(iovad); |
514 | |
515 | rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) |
516 | free_iova_mem(iova); |
517 | } |
518 | EXPORT_SYMBOL_GPL(put_iova_domain); |
519 | |
520 | static int |
521 | __is_range_overlap(struct rb_node *node, |
522 | unsigned long pfn_lo, unsigned long pfn_hi) |
523 | { |
524 | struct iova *iova = to_iova(node); |
525 | |
526 | if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) |
527 | return 1; |
528 | return 0; |
529 | } |
530 | |
531 | static inline struct iova * |
532 | alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) |
533 | { |
534 | struct iova *iova; |
535 | |
536 | iova = alloc_iova_mem(); |
537 | if (iova) { |
538 | iova->pfn_lo = pfn_lo; |
539 | iova->pfn_hi = pfn_hi; |
540 | } |
541 | |
542 | return iova; |
543 | } |
544 | |
545 | static struct iova * |
546 | __insert_new_range(struct iova_domain *iovad, |
547 | unsigned long pfn_lo, unsigned long pfn_hi) |
548 | { |
549 | struct iova *iova; |
550 | |
551 | iova = alloc_and_init_iova(pfn_lo, pfn_hi); |
552 | if (iova) |
553 | iova_insert_rbtree(root: &iovad->rbroot, iova, NULL); |
554 | |
555 | return iova; |
556 | } |
557 | |
558 | static void |
559 | __adjust_overlap_range(struct iova *iova, |
560 | unsigned long *pfn_lo, unsigned long *pfn_hi) |
561 | { |
562 | if (*pfn_lo < iova->pfn_lo) |
563 | iova->pfn_lo = *pfn_lo; |
564 | if (*pfn_hi > iova->pfn_hi) |
565 | *pfn_lo = iova->pfn_hi + 1; |
566 | } |
567 | |
568 | /** |
569 | * reserve_iova - reserves an iova in the given range |
570 | * @iovad: - iova domain pointer |
571 | * @pfn_lo: - lower page frame address |
572 | * @pfn_hi:- higher pfn adderss |
573 | * This function allocates reserves the address range from pfn_lo to pfn_hi so |
574 | * that this address is not dished out as part of alloc_iova. |
575 | */ |
576 | struct iova * |
577 | reserve_iova(struct iova_domain *iovad, |
578 | unsigned long pfn_lo, unsigned long pfn_hi) |
579 | { |
580 | struct rb_node *node; |
581 | unsigned long flags; |
582 | struct iova *iova; |
583 | unsigned int overlap = 0; |
584 | |
585 | /* Don't allow nonsensical pfns */ |
586 | if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) |
587 | return NULL; |
588 | |
589 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
590 | for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { |
591 | if (__is_range_overlap(node, pfn_lo, pfn_hi)) { |
592 | iova = to_iova(node); |
593 | __adjust_overlap_range(iova, pfn_lo: &pfn_lo, pfn_hi: &pfn_hi); |
594 | if ((pfn_lo >= iova->pfn_lo) && |
595 | (pfn_hi <= iova->pfn_hi)) |
596 | goto finish; |
597 | overlap = 1; |
598 | |
599 | } else if (overlap) |
600 | break; |
601 | } |
602 | |
603 | /* We are here either because this is the first reserver node |
604 | * or need to insert remaining non overlap addr range |
605 | */ |
606 | iova = __insert_new_range(iovad, pfn_lo, pfn_hi); |
607 | finish: |
608 | |
609 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
610 | return iova; |
611 | } |
612 | EXPORT_SYMBOL_GPL(reserve_iova); |
613 | |
614 | /* |
615 | * Magazine caches for IOVA ranges. For an introduction to magazines, |
616 | * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab |
617 | * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. |
618 | * For simplicity, we use a static magazine size and don't implement the |
619 | * dynamic size tuning described in the paper. |
620 | */ |
621 | |
622 | /* |
623 | * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to |
624 | * assure size of 'iova_magazine' to be 1024 bytes, so that no memory |
625 | * will be wasted. |
626 | */ |
627 | #define IOVA_MAG_SIZE 127 |
628 | #define MAX_GLOBAL_MAGS 32 /* magazines per bin */ |
629 | |
630 | struct iova_magazine { |
631 | unsigned long size; |
632 | unsigned long pfns[IOVA_MAG_SIZE]; |
633 | }; |
634 | |
635 | struct iova_cpu_rcache { |
636 | spinlock_t lock; |
637 | struct iova_magazine *loaded; |
638 | struct iova_magazine *prev; |
639 | }; |
640 | |
641 | struct iova_rcache { |
642 | spinlock_t lock; |
643 | unsigned long depot_size; |
644 | struct iova_magazine *depot[MAX_GLOBAL_MAGS]; |
645 | struct iova_cpu_rcache __percpu *cpu_rcaches; |
646 | }; |
647 | |
648 | static struct iova_magazine *iova_magazine_alloc(gfp_t flags) |
649 | { |
650 | struct iova_magazine *mag; |
651 | |
652 | mag = kmalloc(size: sizeof(*mag), flags); |
653 | if (mag) |
654 | mag->size = 0; |
655 | |
656 | return mag; |
657 | } |
658 | |
659 | static void iova_magazine_free(struct iova_magazine *mag) |
660 | { |
661 | kfree(objp: mag); |
662 | } |
663 | |
664 | static void |
665 | iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) |
666 | { |
667 | unsigned long flags; |
668 | int i; |
669 | |
670 | spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); |
671 | |
672 | for (i = 0 ; i < mag->size; ++i) { |
673 | struct iova *iova = private_find_iova(iovad, pfn: mag->pfns[i]); |
674 | |
675 | if (WARN_ON(!iova)) |
676 | continue; |
677 | |
678 | remove_iova(iovad, iova); |
679 | free_iova_mem(iova); |
680 | } |
681 | |
682 | spin_unlock_irqrestore(lock: &iovad->iova_rbtree_lock, flags); |
683 | |
684 | mag->size = 0; |
685 | } |
686 | |
687 | static bool iova_magazine_full(struct iova_magazine *mag) |
688 | { |
689 | return mag->size == IOVA_MAG_SIZE; |
690 | } |
691 | |
692 | static bool iova_magazine_empty(struct iova_magazine *mag) |
693 | { |
694 | return mag->size == 0; |
695 | } |
696 | |
697 | static unsigned long iova_magazine_pop(struct iova_magazine *mag, |
698 | unsigned long limit_pfn) |
699 | { |
700 | int i; |
701 | unsigned long pfn; |
702 | |
703 | /* Only fall back to the rbtree if we have no suitable pfns at all */ |
704 | for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) |
705 | if (i == 0) |
706 | return 0; |
707 | |
708 | /* Swap it to pop it */ |
709 | pfn = mag->pfns[i]; |
710 | mag->pfns[i] = mag->pfns[--mag->size]; |
711 | |
712 | return pfn; |
713 | } |
714 | |
715 | static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) |
716 | { |
717 | mag->pfns[mag->size++] = pfn; |
718 | } |
719 | |
720 | int iova_domain_init_rcaches(struct iova_domain *iovad) |
721 | { |
722 | unsigned int cpu; |
723 | int i, ret; |
724 | |
725 | iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, |
726 | size: sizeof(struct iova_rcache), |
727 | GFP_KERNEL); |
728 | if (!iovad->rcaches) |
729 | return -ENOMEM; |
730 | |
731 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
732 | struct iova_cpu_rcache *cpu_rcache; |
733 | struct iova_rcache *rcache; |
734 | |
735 | rcache = &iovad->rcaches[i]; |
736 | spin_lock_init(&rcache->lock); |
737 | rcache->depot_size = 0; |
738 | rcache->cpu_rcaches = __alloc_percpu(size: sizeof(*cpu_rcache), |
739 | cache_line_size()); |
740 | if (!rcache->cpu_rcaches) { |
741 | ret = -ENOMEM; |
742 | goto out_err; |
743 | } |
744 | for_each_possible_cpu(cpu) { |
745 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); |
746 | |
747 | spin_lock_init(&cpu_rcache->lock); |
748 | cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); |
749 | cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); |
750 | if (!cpu_rcache->loaded || !cpu_rcache->prev) { |
751 | ret = -ENOMEM; |
752 | goto out_err; |
753 | } |
754 | } |
755 | } |
756 | |
757 | ret = cpuhp_state_add_instance_nocalls(state: CPUHP_IOMMU_IOVA_DEAD, |
758 | node: &iovad->cpuhp_dead); |
759 | if (ret) |
760 | goto out_err; |
761 | return 0; |
762 | |
763 | out_err: |
764 | free_iova_rcaches(iovad); |
765 | return ret; |
766 | } |
767 | EXPORT_SYMBOL_GPL(iova_domain_init_rcaches); |
768 | |
769 | /* |
770 | * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and |
771 | * return true on success. Can fail if rcache is full and we can't free |
772 | * space, and free_iova() (our only caller) will then return the IOVA |
773 | * range to the rbtree instead. |
774 | */ |
775 | static bool __iova_rcache_insert(struct iova_domain *iovad, |
776 | struct iova_rcache *rcache, |
777 | unsigned long iova_pfn) |
778 | { |
779 | struct iova_magazine *mag_to_free = NULL; |
780 | struct iova_cpu_rcache *cpu_rcache; |
781 | bool can_insert = false; |
782 | unsigned long flags; |
783 | |
784 | cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); |
785 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
786 | |
787 | if (!iova_magazine_full(mag: cpu_rcache->loaded)) { |
788 | can_insert = true; |
789 | } else if (!iova_magazine_full(mag: cpu_rcache->prev)) { |
790 | swap(cpu_rcache->prev, cpu_rcache->loaded); |
791 | can_insert = true; |
792 | } else { |
793 | struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); |
794 | |
795 | if (new_mag) { |
796 | spin_lock(lock: &rcache->lock); |
797 | if (rcache->depot_size < MAX_GLOBAL_MAGS) { |
798 | rcache->depot[rcache->depot_size++] = |
799 | cpu_rcache->loaded; |
800 | } else { |
801 | mag_to_free = cpu_rcache->loaded; |
802 | } |
803 | spin_unlock(lock: &rcache->lock); |
804 | |
805 | cpu_rcache->loaded = new_mag; |
806 | can_insert = true; |
807 | } |
808 | } |
809 | |
810 | if (can_insert) |
811 | iova_magazine_push(mag: cpu_rcache->loaded, pfn: iova_pfn); |
812 | |
813 | spin_unlock_irqrestore(lock: &cpu_rcache->lock, flags); |
814 | |
815 | if (mag_to_free) { |
816 | iova_magazine_free_pfns(mag: mag_to_free, iovad); |
817 | iova_magazine_free(mag: mag_to_free); |
818 | } |
819 | |
820 | return can_insert; |
821 | } |
822 | |
823 | static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, |
824 | unsigned long size) |
825 | { |
826 | unsigned int log_size = order_base_2(size); |
827 | |
828 | if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) |
829 | return false; |
830 | |
831 | return __iova_rcache_insert(iovad, rcache: &iovad->rcaches[log_size], iova_pfn: pfn); |
832 | } |
833 | |
834 | /* |
835 | * Caller wants to allocate a new IOVA range from 'rcache'. If we can |
836 | * satisfy the request, return a matching non-NULL range and remove |
837 | * it from the 'rcache'. |
838 | */ |
839 | static unsigned long __iova_rcache_get(struct iova_rcache *rcache, |
840 | unsigned long limit_pfn) |
841 | { |
842 | struct iova_cpu_rcache *cpu_rcache; |
843 | unsigned long iova_pfn = 0; |
844 | bool has_pfn = false; |
845 | unsigned long flags; |
846 | |
847 | cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); |
848 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
849 | |
850 | if (!iova_magazine_empty(mag: cpu_rcache->loaded)) { |
851 | has_pfn = true; |
852 | } else if (!iova_magazine_empty(mag: cpu_rcache->prev)) { |
853 | swap(cpu_rcache->prev, cpu_rcache->loaded); |
854 | has_pfn = true; |
855 | } else { |
856 | spin_lock(lock: &rcache->lock); |
857 | if (rcache->depot_size > 0) { |
858 | iova_magazine_free(mag: cpu_rcache->loaded); |
859 | cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; |
860 | has_pfn = true; |
861 | } |
862 | spin_unlock(lock: &rcache->lock); |
863 | } |
864 | |
865 | if (has_pfn) |
866 | iova_pfn = iova_magazine_pop(mag: cpu_rcache->loaded, limit_pfn); |
867 | |
868 | spin_unlock_irqrestore(lock: &cpu_rcache->lock, flags); |
869 | |
870 | return iova_pfn; |
871 | } |
872 | |
873 | /* |
874 | * Try to satisfy IOVA allocation range from rcache. Fail if requested |
875 | * size is too big or the DMA limit we are given isn't satisfied by the |
876 | * top element in the magazine. |
877 | */ |
878 | static unsigned long iova_rcache_get(struct iova_domain *iovad, |
879 | unsigned long size, |
880 | unsigned long limit_pfn) |
881 | { |
882 | unsigned int log_size = order_base_2(size); |
883 | |
884 | if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) |
885 | return 0; |
886 | |
887 | return __iova_rcache_get(rcache: &iovad->rcaches[log_size], limit_pfn: limit_pfn - size); |
888 | } |
889 | |
890 | /* |
891 | * free rcache data structures. |
892 | */ |
893 | static void free_iova_rcaches(struct iova_domain *iovad) |
894 | { |
895 | struct iova_rcache *rcache; |
896 | struct iova_cpu_rcache *cpu_rcache; |
897 | unsigned int cpu; |
898 | int i, j; |
899 | |
900 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
901 | rcache = &iovad->rcaches[i]; |
902 | if (!rcache->cpu_rcaches) |
903 | break; |
904 | for_each_possible_cpu(cpu) { |
905 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); |
906 | iova_magazine_free(mag: cpu_rcache->loaded); |
907 | iova_magazine_free(mag: cpu_rcache->prev); |
908 | } |
909 | free_percpu(pdata: rcache->cpu_rcaches); |
910 | for (j = 0; j < rcache->depot_size; ++j) |
911 | iova_magazine_free(mag: rcache->depot[j]); |
912 | } |
913 | |
914 | kfree(objp: iovad->rcaches); |
915 | iovad->rcaches = NULL; |
916 | } |
917 | |
918 | /* |
919 | * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) |
920 | */ |
921 | static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) |
922 | { |
923 | struct iova_cpu_rcache *cpu_rcache; |
924 | struct iova_rcache *rcache; |
925 | unsigned long flags; |
926 | int i; |
927 | |
928 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
929 | rcache = &iovad->rcaches[i]; |
930 | cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); |
931 | spin_lock_irqsave(&cpu_rcache->lock, flags); |
932 | iova_magazine_free_pfns(mag: cpu_rcache->loaded, iovad); |
933 | iova_magazine_free_pfns(mag: cpu_rcache->prev, iovad); |
934 | spin_unlock_irqrestore(lock: &cpu_rcache->lock, flags); |
935 | } |
936 | } |
937 | |
938 | /* |
939 | * free all the IOVA ranges of global cache |
940 | */ |
941 | static void free_global_cached_iovas(struct iova_domain *iovad) |
942 | { |
943 | struct iova_rcache *rcache; |
944 | unsigned long flags; |
945 | int i, j; |
946 | |
947 | for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { |
948 | rcache = &iovad->rcaches[i]; |
949 | spin_lock_irqsave(&rcache->lock, flags); |
950 | for (j = 0; j < rcache->depot_size; ++j) { |
951 | iova_magazine_free_pfns(mag: rcache->depot[j], iovad); |
952 | iova_magazine_free(mag: rcache->depot[j]); |
953 | } |
954 | rcache->depot_size = 0; |
955 | spin_unlock_irqrestore(lock: &rcache->lock, flags); |
956 | } |
957 | } |
958 | MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>" ); |
959 | MODULE_LICENSE("GPL" ); |
960 | |