1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _LINUX_PAGEWALK_H |
3 | #define _LINUX_PAGEWALK_H |
4 | |
5 | #include <linux/mm.h> |
6 | |
7 | struct mm_walk; |
8 | |
9 | /* Locking requirement during a page walk. */ |
10 | enum page_walk_lock { |
11 | /* mmap_lock should be locked for read to stabilize the vma tree */ |
12 | PGWALK_RDLOCK = 0, |
13 | /* vma will be write-locked during the walk */ |
14 | PGWALK_WRLOCK = 1, |
15 | /* vma is expected to be already write-locked during the walk */ |
16 | PGWALK_WRLOCK_VERIFY = 2, |
17 | }; |
18 | |
19 | /** |
20 | * struct mm_walk_ops - callbacks for walk_page_range |
21 | * @pgd_entry: if set, called for each non-empty PGD (top-level) entry |
22 | * @p4d_entry: if set, called for each non-empty P4D entry |
23 | * @pud_entry: if set, called for each non-empty PUD entry |
24 | * @pmd_entry: if set, called for each non-empty PMD entry |
25 | * this handler is required to be able to handle |
26 | * pmd_trans_huge() pmds. They may simply choose to |
27 | * split_huge_page() instead of handling it explicitly. |
28 | * @pte_entry: if set, called for each PTE (lowest-level) entry, |
29 | * including empty ones |
30 | * @pte_hole: if set, called for each hole at all levels, |
31 | * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. |
32 | * Any folded depths (where PTRS_PER_P?D is equal to 1) |
33 | * are skipped. |
34 | * @hugetlb_entry: if set, called for each hugetlb entry. This hook |
35 | * function is called with the vma lock held, in order to |
36 | * protect against a concurrent freeing of the pte_t* or |
37 | * the ptl. In some cases, the hook function needs to drop |
38 | * and retake the vma lock in order to avoid deadlocks |
39 | * while calling other functions. In such cases the hook |
40 | * function must either refrain from accessing the pte or |
41 | * ptl after dropping the vma lock, or else revalidate |
42 | * those items after re-acquiring the vma lock and before |
43 | * accessing them. |
44 | * @test_walk: caller specific callback function to determine whether |
45 | * we walk over the current vma or not. Returning 0 means |
46 | * "do page table walk over the current vma", returning |
47 | * a negative value means "abort current page table walk |
48 | * right now" and returning 1 means "skip the current vma" |
49 | * Note that this callback is not called when the caller |
50 | * passes in a single VMA as for walk_page_vma(). |
51 | * @pre_vma: if set, called before starting walk on a non-null vma. |
52 | * @post_vma: if set, called after a walk on a non-null vma, provided |
53 | * that @pre_vma and the vma walk succeeded. |
54 | * |
55 | * p?d_entry callbacks are called even if those levels are folded on a |
56 | * particular architecture/configuration. |
57 | */ |
58 | struct mm_walk_ops { |
59 | int (*pgd_entry)(pgd_t *pgd, unsigned long addr, |
60 | unsigned long next, struct mm_walk *walk); |
61 | int (*p4d_entry)(p4d_t *p4d, unsigned long addr, |
62 | unsigned long next, struct mm_walk *walk); |
63 | int (*pud_entry)(pud_t *pud, unsigned long addr, |
64 | unsigned long next, struct mm_walk *walk); |
65 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
66 | unsigned long next, struct mm_walk *walk); |
67 | int (*pte_entry)(pte_t *pte, unsigned long addr, |
68 | unsigned long next, struct mm_walk *walk); |
69 | int (*pte_hole)(unsigned long addr, unsigned long next, |
70 | int depth, struct mm_walk *walk); |
71 | int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, |
72 | unsigned long addr, unsigned long next, |
73 | struct mm_walk *walk); |
74 | int (*test_walk)(unsigned long addr, unsigned long next, |
75 | struct mm_walk *walk); |
76 | int (*pre_vma)(unsigned long start, unsigned long end, |
77 | struct mm_walk *walk); |
78 | void (*post_vma)(struct mm_walk *walk); |
79 | enum page_walk_lock walk_lock; |
80 | }; |
81 | |
82 | /* |
83 | * Action for pud_entry / pmd_entry callbacks. |
84 | * ACTION_SUBTREE is the default |
85 | */ |
86 | enum page_walk_action { |
87 | /* Descend to next level, splitting huge pages if needed and possible */ |
88 | ACTION_SUBTREE = 0, |
89 | /* Continue to next entry at this level (ignoring any subtree) */ |
90 | ACTION_CONTINUE = 1, |
91 | /* Call again for this entry */ |
92 | ACTION_AGAIN = 2 |
93 | }; |
94 | |
95 | /** |
96 | * struct mm_walk - walk_page_range data |
97 | * @ops: operation to call during the walk |
98 | * @mm: mm_struct representing the target process of page table walk |
99 | * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL) |
100 | * @vma: vma currently walked (NULL if walking outside vmas) |
101 | * @action: next action to perform (see enum page_walk_action) |
102 | * @no_vma: walk ignoring vmas (vma will always be NULL) |
103 | * @private: private data for callbacks' usage |
104 | * |
105 | * (see the comment on walk_page_range() for more details) |
106 | */ |
107 | struct mm_walk { |
108 | const struct mm_walk_ops *ops; |
109 | struct mm_struct *mm; |
110 | pgd_t *pgd; |
111 | struct vm_area_struct *vma; |
112 | enum page_walk_action action; |
113 | bool no_vma; |
114 | void *private; |
115 | }; |
116 | |
117 | int walk_page_range(struct mm_struct *mm, unsigned long start, |
118 | unsigned long end, const struct mm_walk_ops *ops, |
119 | void *private); |
120 | int walk_page_range_novma(struct mm_struct *mm, unsigned long start, |
121 | unsigned long end, const struct mm_walk_ops *ops, |
122 | pgd_t *pgd, |
123 | void *private); |
124 | int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, |
125 | unsigned long end, const struct mm_walk_ops *ops, |
126 | void *private); |
127 | int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, |
128 | void *private); |
129 | int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, |
130 | pgoff_t nr, const struct mm_walk_ops *ops, |
131 | void *private); |
132 | |
133 | #endif /* _LINUX_PAGEWALK_H */ |
134 | |