1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/pagewalk.h> |
3 | #include <linux/highmem.h> |
4 | #include <linux/sched.h> |
5 | #include <linux/hugetlb.h> |
6 | |
7 | /* |
8 | * We want to know the real level where a entry is located ignoring any |
9 | * folding of levels which may be happening. For example if p4d is folded then |
10 | * a missing entry found at level 1 (p4d) is actually at level 0 (pgd). |
11 | */ |
12 | static int real_depth(int depth) |
13 | { |
14 | if (depth == 3 && PTRS_PER_PMD == 1) |
15 | depth = 2; |
16 | if (depth == 2 && PTRS_PER_PUD == 1) |
17 | depth = 1; |
18 | if (depth == 1 && PTRS_PER_P4D == 1) |
19 | depth = 0; |
20 | return depth; |
21 | } |
22 | |
23 | static int walk_pte_range_inner(pte_t *pte, unsigned long addr, |
24 | unsigned long end, struct mm_walk *walk) |
25 | { |
26 | const struct mm_walk_ops *ops = walk->ops; |
27 | int err = 0; |
28 | |
29 | for (;;) { |
30 | err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); |
31 | if (err) |
32 | break; |
33 | if (addr >= end - PAGE_SIZE) |
34 | break; |
35 | addr += PAGE_SIZE; |
36 | pte++; |
37 | } |
38 | return err; |
39 | } |
40 | |
41 | static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
42 | struct mm_walk *walk) |
43 | { |
44 | pte_t *pte; |
45 | int err = 0; |
46 | spinlock_t *ptl; |
47 | |
48 | if (walk->no_vma) { |
49 | /* |
50 | * pte_offset_map() might apply user-specific validation. |
51 | * Indeed, on x86_64 the pmd entries set up by init_espfix_ap() |
52 | * fit its pmd_bad() check (_PAGE_NX set and _PAGE_RW clear), |
53 | * and CONFIG_EFI_PGT_DUMP efi_mm goes so far as to walk them. |
54 | */ |
55 | if (walk->mm == &init_mm || addr >= TASK_SIZE) |
56 | pte = pte_offset_kernel(pmd, address: addr); |
57 | else |
58 | pte = pte_offset_map(pmd, addr); |
59 | if (pte) { |
60 | err = walk_pte_range_inner(pte, addr, end, walk); |
61 | if (walk->mm != &init_mm && addr < TASK_SIZE) |
62 | pte_unmap(pte); |
63 | } |
64 | } else { |
65 | pte = pte_offset_map_lock(mm: walk->mm, pmd, addr, ptlp: &ptl); |
66 | if (pte) { |
67 | err = walk_pte_range_inner(pte, addr, end, walk); |
68 | pte_unmap_unlock(pte, ptl); |
69 | } |
70 | } |
71 | if (!pte) |
72 | walk->action = ACTION_AGAIN; |
73 | return err; |
74 | } |
75 | |
76 | #ifdef CONFIG_ARCH_HAS_HUGEPD |
77 | static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr, |
78 | unsigned long end, struct mm_walk *walk, int pdshift) |
79 | { |
80 | int err = 0; |
81 | const struct mm_walk_ops *ops = walk->ops; |
82 | int shift = hugepd_shift(*phpd); |
83 | int page_size = 1 << shift; |
84 | |
85 | if (!ops->pte_entry) |
86 | return 0; |
87 | |
88 | if (addr & (page_size - 1)) |
89 | return 0; |
90 | |
91 | for (;;) { |
92 | pte_t *pte; |
93 | |
94 | spin_lock(&walk->mm->page_table_lock); |
95 | pte = hugepte_offset(*phpd, addr, pdshift); |
96 | err = ops->pte_entry(pte, addr, addr + page_size, walk); |
97 | spin_unlock(&walk->mm->page_table_lock); |
98 | |
99 | if (err) |
100 | break; |
101 | if (addr >= end - page_size) |
102 | break; |
103 | addr += page_size; |
104 | } |
105 | return err; |
106 | } |
107 | #else |
108 | static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr, |
109 | unsigned long end, struct mm_walk *walk, int pdshift) |
110 | { |
111 | return 0; |
112 | } |
113 | #endif |
114 | |
115 | static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, |
116 | struct mm_walk *walk) |
117 | { |
118 | pmd_t *pmd; |
119 | unsigned long next; |
120 | const struct mm_walk_ops *ops = walk->ops; |
121 | int err = 0; |
122 | int depth = real_depth(depth: 3); |
123 | |
124 | pmd = pmd_offset(pud, address: addr); |
125 | do { |
126 | again: |
127 | next = pmd_addr_end(addr, end); |
128 | if (pmd_none(pmd: *pmd)) { |
129 | if (ops->pte_hole) |
130 | err = ops->pte_hole(addr, next, depth, walk); |
131 | if (err) |
132 | break; |
133 | continue; |
134 | } |
135 | |
136 | walk->action = ACTION_SUBTREE; |
137 | |
138 | /* |
139 | * This implies that each ->pmd_entry() handler |
140 | * needs to know about pmd_trans_huge() pmds |
141 | */ |
142 | if (ops->pmd_entry) |
143 | err = ops->pmd_entry(pmd, addr, next, walk); |
144 | if (err) |
145 | break; |
146 | |
147 | if (walk->action == ACTION_AGAIN) |
148 | goto again; |
149 | |
150 | /* |
151 | * Check this here so we only break down trans_huge |
152 | * pages when we _need_ to |
153 | */ |
154 | if ((!walk->vma && (pmd_leaf(pte: *pmd) || !pmd_present(pmd: *pmd))) || |
155 | walk->action == ACTION_CONTINUE || |
156 | !(ops->pte_entry)) |
157 | continue; |
158 | |
159 | if (walk->vma) |
160 | split_huge_pmd(walk->vma, pmd, addr); |
161 | |
162 | if (is_hugepd(__hugepd(pmd_val(*pmd)))) |
163 | err = walk_hugepd_range(phpd: (hugepd_t *)pmd, addr, end: next, walk, PMD_SHIFT); |
164 | else |
165 | err = walk_pte_range(pmd, addr, end: next, walk); |
166 | if (err) |
167 | break; |
168 | |
169 | if (walk->action == ACTION_AGAIN) |
170 | goto again; |
171 | |
172 | } while (pmd++, addr = next, addr != end); |
173 | |
174 | return err; |
175 | } |
176 | |
177 | static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, |
178 | struct mm_walk *walk) |
179 | { |
180 | pud_t *pud; |
181 | unsigned long next; |
182 | const struct mm_walk_ops *ops = walk->ops; |
183 | int err = 0; |
184 | int depth = real_depth(depth: 2); |
185 | |
186 | pud = pud_offset(p4d, address: addr); |
187 | do { |
188 | again: |
189 | next = pud_addr_end(addr, end); |
190 | if (pud_none(pud: *pud)) { |
191 | if (ops->pte_hole) |
192 | err = ops->pte_hole(addr, next, depth, walk); |
193 | if (err) |
194 | break; |
195 | continue; |
196 | } |
197 | |
198 | walk->action = ACTION_SUBTREE; |
199 | |
200 | if (ops->pud_entry) |
201 | err = ops->pud_entry(pud, addr, next, walk); |
202 | if (err) |
203 | break; |
204 | |
205 | if (walk->action == ACTION_AGAIN) |
206 | goto again; |
207 | |
208 | if ((!walk->vma && (pud_leaf(pud: *pud) || !pud_present(pud: *pud))) || |
209 | walk->action == ACTION_CONTINUE || |
210 | !(ops->pmd_entry || ops->pte_entry)) |
211 | continue; |
212 | |
213 | if (walk->vma) |
214 | split_huge_pud(walk->vma, pud, addr); |
215 | if (pud_none(pud: *pud)) |
216 | goto again; |
217 | |
218 | if (is_hugepd(__hugepd(pud_val(*pud)))) |
219 | err = walk_hugepd_range(phpd: (hugepd_t *)pud, addr, end: next, walk, PUD_SHIFT); |
220 | else |
221 | err = walk_pmd_range(pud, addr, end: next, walk); |
222 | if (err) |
223 | break; |
224 | } while (pud++, addr = next, addr != end); |
225 | |
226 | return err; |
227 | } |
228 | |
229 | static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, |
230 | struct mm_walk *walk) |
231 | { |
232 | p4d_t *p4d; |
233 | unsigned long next; |
234 | const struct mm_walk_ops *ops = walk->ops; |
235 | int err = 0; |
236 | int depth = real_depth(depth: 1); |
237 | |
238 | p4d = p4d_offset(pgd, address: addr); |
239 | do { |
240 | next = p4d_addr_end(addr, end); |
241 | if (p4d_none_or_clear_bad(p4d)) { |
242 | if (ops->pte_hole) |
243 | err = ops->pte_hole(addr, next, depth, walk); |
244 | if (err) |
245 | break; |
246 | continue; |
247 | } |
248 | if (ops->p4d_entry) { |
249 | err = ops->p4d_entry(p4d, addr, next, walk); |
250 | if (err) |
251 | break; |
252 | } |
253 | if (is_hugepd(__hugepd(p4d_val(*p4d)))) |
254 | err = walk_hugepd_range(phpd: (hugepd_t *)p4d, addr, end: next, walk, P4D_SHIFT); |
255 | else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry) |
256 | err = walk_pud_range(p4d, addr, end: next, walk); |
257 | if (err) |
258 | break; |
259 | } while (p4d++, addr = next, addr != end); |
260 | |
261 | return err; |
262 | } |
263 | |
264 | static int walk_pgd_range(unsigned long addr, unsigned long end, |
265 | struct mm_walk *walk) |
266 | { |
267 | pgd_t *pgd; |
268 | unsigned long next; |
269 | const struct mm_walk_ops *ops = walk->ops; |
270 | int err = 0; |
271 | |
272 | if (walk->pgd) |
273 | pgd = walk->pgd + pgd_index(addr); |
274 | else |
275 | pgd = pgd_offset(walk->mm, addr); |
276 | do { |
277 | next = pgd_addr_end(addr, end); |
278 | if (pgd_none_or_clear_bad(pgd)) { |
279 | if (ops->pte_hole) |
280 | err = ops->pte_hole(addr, next, 0, walk); |
281 | if (err) |
282 | break; |
283 | continue; |
284 | } |
285 | if (ops->pgd_entry) { |
286 | err = ops->pgd_entry(pgd, addr, next, walk); |
287 | if (err) |
288 | break; |
289 | } |
290 | if (is_hugepd(__hugepd(pgd_val(*pgd)))) |
291 | err = walk_hugepd_range(phpd: (hugepd_t *)pgd, addr, end: next, walk, PGDIR_SHIFT); |
292 | else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry) |
293 | err = walk_p4d_range(pgd, addr, end: next, walk); |
294 | if (err) |
295 | break; |
296 | } while (pgd++, addr = next, addr != end); |
297 | |
298 | return err; |
299 | } |
300 | |
301 | #ifdef CONFIG_HUGETLB_PAGE |
302 | static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, |
303 | unsigned long end) |
304 | { |
305 | unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); |
306 | return boundary < end ? boundary : end; |
307 | } |
308 | |
309 | static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
310 | struct mm_walk *walk) |
311 | { |
312 | struct vm_area_struct *vma = walk->vma; |
313 | struct hstate *h = hstate_vma(vma); |
314 | unsigned long next; |
315 | unsigned long hmask = huge_page_mask(h); |
316 | unsigned long sz = huge_page_size(h); |
317 | pte_t *pte; |
318 | const struct mm_walk_ops *ops = walk->ops; |
319 | int err = 0; |
320 | |
321 | hugetlb_vma_lock_read(vma); |
322 | do { |
323 | next = hugetlb_entry_end(h, addr, end); |
324 | pte = hugetlb_walk(vma, addr: addr & hmask, sz); |
325 | if (pte) |
326 | err = ops->hugetlb_entry(pte, hmask, addr, next, walk); |
327 | else if (ops->pte_hole) |
328 | err = ops->pte_hole(addr, next, -1, walk); |
329 | if (err) |
330 | break; |
331 | } while (addr = next, addr != end); |
332 | hugetlb_vma_unlock_read(vma); |
333 | |
334 | return err; |
335 | } |
336 | |
337 | #else /* CONFIG_HUGETLB_PAGE */ |
338 | static int walk_hugetlb_range(unsigned long addr, unsigned long end, |
339 | struct mm_walk *walk) |
340 | { |
341 | return 0; |
342 | } |
343 | |
344 | #endif /* CONFIG_HUGETLB_PAGE */ |
345 | |
346 | /* |
347 | * Decide whether we really walk over the current vma on [@start, @end) |
348 | * or skip it via the returned value. Return 0 if we do walk over the |
349 | * current vma, and return 1 if we skip the vma. Negative values means |
350 | * error, where we abort the current walk. |
351 | */ |
352 | static int walk_page_test(unsigned long start, unsigned long end, |
353 | struct mm_walk *walk) |
354 | { |
355 | struct vm_area_struct *vma = walk->vma; |
356 | const struct mm_walk_ops *ops = walk->ops; |
357 | |
358 | if (ops->test_walk) |
359 | return ops->test_walk(start, end, walk); |
360 | |
361 | /* |
362 | * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP |
363 | * range, so we don't walk over it as we do for normal vmas. However, |
364 | * Some callers are interested in handling hole range and they don't |
365 | * want to just ignore any single address range. Such users certainly |
366 | * define their ->pte_hole() callbacks, so let's delegate them to handle |
367 | * vma(VM_PFNMAP). |
368 | */ |
369 | if (vma->vm_flags & VM_PFNMAP) { |
370 | int err = 1; |
371 | if (ops->pte_hole) |
372 | err = ops->pte_hole(start, end, -1, walk); |
373 | return err ? err : 1; |
374 | } |
375 | return 0; |
376 | } |
377 | |
378 | static int __walk_page_range(unsigned long start, unsigned long end, |
379 | struct mm_walk *walk) |
380 | { |
381 | int err = 0; |
382 | struct vm_area_struct *vma = walk->vma; |
383 | const struct mm_walk_ops *ops = walk->ops; |
384 | |
385 | if (ops->pre_vma) { |
386 | err = ops->pre_vma(start, end, walk); |
387 | if (err) |
388 | return err; |
389 | } |
390 | |
391 | if (is_vm_hugetlb_page(vma)) { |
392 | if (ops->hugetlb_entry) |
393 | err = walk_hugetlb_range(addr: start, end, walk); |
394 | } else |
395 | err = walk_pgd_range(addr: start, end, walk); |
396 | |
397 | if (ops->post_vma) |
398 | ops->post_vma(walk); |
399 | |
400 | return err; |
401 | } |
402 | |
403 | static inline void process_mm_walk_lock(struct mm_struct *mm, |
404 | enum page_walk_lock walk_lock) |
405 | { |
406 | if (walk_lock == PGWALK_RDLOCK) |
407 | mmap_assert_locked(mm); |
408 | else |
409 | mmap_assert_write_locked(mm); |
410 | } |
411 | |
412 | static inline void process_vma_walk_lock(struct vm_area_struct *vma, |
413 | enum page_walk_lock walk_lock) |
414 | { |
415 | #ifdef CONFIG_PER_VMA_LOCK |
416 | switch (walk_lock) { |
417 | case PGWALK_WRLOCK: |
418 | vma_start_write(vma); |
419 | break; |
420 | case PGWALK_WRLOCK_VERIFY: |
421 | vma_assert_write_locked(vma); |
422 | break; |
423 | case PGWALK_RDLOCK: |
424 | /* PGWALK_RDLOCK is handled by process_mm_walk_lock */ |
425 | break; |
426 | } |
427 | #endif |
428 | } |
429 | |
430 | /** |
431 | * walk_page_range - walk page table with caller specific callbacks |
432 | * @mm: mm_struct representing the target process of page table walk |
433 | * @start: start address of the virtual address range |
434 | * @end: end address of the virtual address range |
435 | * @ops: operation to call during the walk |
436 | * @private: private data for callbacks' usage |
437 | * |
438 | * Recursively walk the page table tree of the process represented by @mm |
439 | * within the virtual address range [@start, @end). During walking, we can do |
440 | * some caller-specific works for each entry, by setting up pmd_entry(), |
441 | * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these |
442 | * callbacks, the associated entries/pages are just ignored. |
443 | * The return values of these callbacks are commonly defined like below: |
444 | * |
445 | * - 0 : succeeded to handle the current entry, and if you don't reach the |
446 | * end address yet, continue to walk. |
447 | * - >0 : succeeded to handle the current entry, and return to the caller |
448 | * with caller specific value. |
449 | * - <0 : failed to handle the current entry, and return to the caller |
450 | * with error code. |
451 | * |
452 | * Before starting to walk page table, some callers want to check whether |
453 | * they really want to walk over the current vma, typically by checking |
454 | * its vm_flags. walk_page_test() and @ops->test_walk() are used for this |
455 | * purpose. |
456 | * |
457 | * If operations need to be staged before and committed after a vma is walked, |
458 | * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(), |
459 | * since it is intended to handle commit-type operations, can't return any |
460 | * errors. |
461 | * |
462 | * struct mm_walk keeps current values of some common data like vma and pmd, |
463 | * which are useful for the access from callbacks. If you want to pass some |
464 | * caller-specific data to callbacks, @private should be helpful. |
465 | * |
466 | * Locking: |
467 | * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock, |
468 | * because these function traverse vma list and/or access to vma's data. |
469 | */ |
470 | int walk_page_range(struct mm_struct *mm, unsigned long start, |
471 | unsigned long end, const struct mm_walk_ops *ops, |
472 | void *private) |
473 | { |
474 | int err = 0; |
475 | unsigned long next; |
476 | struct vm_area_struct *vma; |
477 | struct mm_walk walk = { |
478 | .ops = ops, |
479 | .mm = mm, |
480 | .private = private, |
481 | }; |
482 | |
483 | if (start >= end) |
484 | return -EINVAL; |
485 | |
486 | if (!walk.mm) |
487 | return -EINVAL; |
488 | |
489 | process_mm_walk_lock(mm: walk.mm, walk_lock: ops->walk_lock); |
490 | |
491 | vma = find_vma(mm: walk.mm, addr: start); |
492 | do { |
493 | if (!vma) { /* after the last vma */ |
494 | walk.vma = NULL; |
495 | next = end; |
496 | if (ops->pte_hole) |
497 | err = ops->pte_hole(start, next, -1, &walk); |
498 | } else if (start < vma->vm_start) { /* outside vma */ |
499 | walk.vma = NULL; |
500 | next = min(end, vma->vm_start); |
501 | if (ops->pte_hole) |
502 | err = ops->pte_hole(start, next, -1, &walk); |
503 | } else { /* inside vma */ |
504 | process_vma_walk_lock(vma, walk_lock: ops->walk_lock); |
505 | walk.vma = vma; |
506 | next = min(end, vma->vm_end); |
507 | vma = find_vma(mm, addr: vma->vm_end); |
508 | |
509 | err = walk_page_test(start, end: next, walk: &walk); |
510 | if (err > 0) { |
511 | /* |
512 | * positive return values are purely for |
513 | * controlling the pagewalk, so should never |
514 | * be passed to the callers. |
515 | */ |
516 | err = 0; |
517 | continue; |
518 | } |
519 | if (err < 0) |
520 | break; |
521 | err = __walk_page_range(start, end: next, walk: &walk); |
522 | } |
523 | if (err) |
524 | break; |
525 | } while (start = next, start < end); |
526 | return err; |
527 | } |
528 | |
529 | /** |
530 | * walk_page_range_novma - walk a range of pagetables not backed by a vma |
531 | * @mm: mm_struct representing the target process of page table walk |
532 | * @start: start address of the virtual address range |
533 | * @end: end address of the virtual address range |
534 | * @ops: operation to call during the walk |
535 | * @pgd: pgd to walk if different from mm->pgd |
536 | * @private: private data for callbacks' usage |
537 | * |
538 | * Similar to walk_page_range() but can walk any page tables even if they are |
539 | * not backed by VMAs. Because 'unusual' entries may be walked this function |
540 | * will also not lock the PTEs for the pte_entry() callback. This is useful for |
541 | * walking the kernel pages tables or page tables for firmware. |
542 | */ |
543 | int walk_page_range_novma(struct mm_struct *mm, unsigned long start, |
544 | unsigned long end, const struct mm_walk_ops *ops, |
545 | pgd_t *pgd, |
546 | void *private) |
547 | { |
548 | struct mm_walk walk = { |
549 | .ops = ops, |
550 | .mm = mm, |
551 | .pgd = pgd, |
552 | .private = private, |
553 | .no_vma = true |
554 | }; |
555 | |
556 | if (start >= end || !walk.mm) |
557 | return -EINVAL; |
558 | |
559 | mmap_assert_write_locked(mm: walk.mm); |
560 | |
561 | return walk_pgd_range(addr: start, end, walk: &walk); |
562 | } |
563 | |
564 | int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, |
565 | unsigned long end, const struct mm_walk_ops *ops, |
566 | void *private) |
567 | { |
568 | struct mm_walk walk = { |
569 | .ops = ops, |
570 | .mm = vma->vm_mm, |
571 | .vma = vma, |
572 | .private = private, |
573 | }; |
574 | |
575 | if (start >= end || !walk.mm) |
576 | return -EINVAL; |
577 | if (start < vma->vm_start || end > vma->vm_end) |
578 | return -EINVAL; |
579 | |
580 | process_mm_walk_lock(mm: walk.mm, walk_lock: ops->walk_lock); |
581 | process_vma_walk_lock(vma, walk_lock: ops->walk_lock); |
582 | return __walk_page_range(start, end, walk: &walk); |
583 | } |
584 | |
585 | int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, |
586 | void *private) |
587 | { |
588 | struct mm_walk walk = { |
589 | .ops = ops, |
590 | .mm = vma->vm_mm, |
591 | .vma = vma, |
592 | .private = private, |
593 | }; |
594 | |
595 | if (!walk.mm) |
596 | return -EINVAL; |
597 | |
598 | process_mm_walk_lock(mm: walk.mm, walk_lock: ops->walk_lock); |
599 | process_vma_walk_lock(vma, walk_lock: ops->walk_lock); |
600 | return __walk_page_range(start: vma->vm_start, end: vma->vm_end, walk: &walk); |
601 | } |
602 | |
603 | /** |
604 | * walk_page_mapping - walk all memory areas mapped into a struct address_space. |
605 | * @mapping: Pointer to the struct address_space |
606 | * @first_index: First page offset in the address_space |
607 | * @nr: Number of incremental page offsets to cover |
608 | * @ops: operation to call during the walk |
609 | * @private: private data for callbacks' usage |
610 | * |
611 | * This function walks all memory areas mapped into a struct address_space. |
612 | * The walk is limited to only the given page-size index range, but if |
613 | * the index boundaries cross a huge page-table entry, that entry will be |
614 | * included. |
615 | * |
616 | * Also see walk_page_range() for additional information. |
617 | * |
618 | * Locking: |
619 | * This function can't require that the struct mm_struct::mmap_lock is held, |
620 | * since @mapping may be mapped by multiple processes. Instead |
621 | * @mapping->i_mmap_rwsem must be held. This might have implications in the |
622 | * callbacks, and it's up tho the caller to ensure that the |
623 | * struct mm_struct::mmap_lock is not needed. |
624 | * |
625 | * Also this means that a caller can't rely on the struct |
626 | * vm_area_struct::vm_flags to be constant across a call, |
627 | * except for immutable flags. Callers requiring this shouldn't use |
628 | * this function. |
629 | * |
630 | * Return: 0 on success, negative error code on failure, positive number on |
631 | * caller defined premature termination. |
632 | */ |
633 | int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, |
634 | pgoff_t nr, const struct mm_walk_ops *ops, |
635 | void *private) |
636 | { |
637 | struct mm_walk walk = { |
638 | .ops = ops, |
639 | .private = private, |
640 | }; |
641 | struct vm_area_struct *vma; |
642 | pgoff_t vba, vea, cba, cea; |
643 | unsigned long start_addr, end_addr; |
644 | int err = 0; |
645 | |
646 | lockdep_assert_held(&mapping->i_mmap_rwsem); |
647 | vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, |
648 | first_index + nr - 1) { |
649 | /* Clip to the vma */ |
650 | vba = vma->vm_pgoff; |
651 | vea = vba + vma_pages(vma); |
652 | cba = first_index; |
653 | cba = max(cba, vba); |
654 | cea = first_index + nr; |
655 | cea = min(cea, vea); |
656 | |
657 | start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; |
658 | end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; |
659 | if (start_addr >= end_addr) |
660 | continue; |
661 | |
662 | walk.vma = vma; |
663 | walk.mm = vma->vm_mm; |
664 | |
665 | err = walk_page_test(start: vma->vm_start, end: vma->vm_end, walk: &walk); |
666 | if (err > 0) { |
667 | err = 0; |
668 | break; |
669 | } else if (err < 0) |
670 | break; |
671 | |
672 | err = __walk_page_range(start: start_addr, end: end_addr, walk: &walk); |
673 | if (err) |
674 | break; |
675 | } |
676 | |
677 | return err; |
678 | } |
679 | |