1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/kernel.h> |
3 | #include <linux/errno.h> |
4 | #include <linux/err.h> |
5 | #include <linux/mm.h> |
6 | #include <linux/slab.h> |
7 | #include <linux/vmalloc.h> |
8 | #include <linux/pagemap.h> |
9 | #include <linux/sched.h> |
10 | |
11 | /** |
12 | * get_vaddr_frames() - map virtual addresses to pfns |
13 | * @start: starting user address |
14 | * @nr_frames: number of pages / pfns from start to map |
15 | * @gup_flags: flags modifying lookup behaviour |
16 | * @vec: structure which receives pages / pfns of the addresses mapped. |
17 | * It should have space for at least nr_frames entries. |
18 | * |
19 | * This function maps virtual addresses from @start and fills @vec structure |
20 | * with page frame numbers or page pointers to corresponding pages (choice |
21 | * depends on the type of the vma underlying the virtual address). If @start |
22 | * belongs to a normal vma, the function grabs reference to each of the pages |
23 | * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't |
24 | * touch page structures and the caller must make sure pfns aren't reused for |
25 | * anything else while he is using them. |
26 | * |
27 | * The function returns number of pages mapped which may be less than |
28 | * @nr_frames. In particular we stop mapping if there are more vmas of |
29 | * different type underlying the specified range of virtual addresses. |
30 | * When the function isn't able to map a single page, it returns error. |
31 | * |
32 | * This function takes care of grabbing mmap_sem as necessary. |
33 | */ |
34 | int get_vaddr_frames(unsigned long start, unsigned int nr_frames, |
35 | unsigned int gup_flags, struct frame_vector *vec) |
36 | { |
37 | struct mm_struct *mm = current->mm; |
38 | struct vm_area_struct *vma; |
39 | int ret = 0; |
40 | int err; |
41 | int locked; |
42 | |
43 | if (nr_frames == 0) |
44 | return 0; |
45 | |
46 | if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) |
47 | nr_frames = vec->nr_allocated; |
48 | |
49 | down_read(&mm->mmap_sem); |
50 | locked = 1; |
51 | vma = find_vma_intersection(mm, start, start + 1); |
52 | if (!vma) { |
53 | ret = -EFAULT; |
54 | goto out; |
55 | } |
56 | |
57 | /* |
58 | * While get_vaddr_frames() could be used for transient (kernel |
59 | * controlled lifetime) pinning of memory pages all current |
60 | * users establish long term (userspace controlled lifetime) |
61 | * page pinning. Treat get_vaddr_frames() like |
62 | * get_user_pages_longterm() and disallow it for filesystem-dax |
63 | * mappings. |
64 | */ |
65 | if (vma_is_fsdax(vma)) { |
66 | ret = -EOPNOTSUPP; |
67 | goto out; |
68 | } |
69 | |
70 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { |
71 | vec->got_ref = true; |
72 | vec->is_pfns = false; |
73 | ret = get_user_pages_locked(start, nr_frames, |
74 | gup_flags, (struct page **)(vec->ptrs), &locked); |
75 | goto out; |
76 | } |
77 | |
78 | vec->got_ref = false; |
79 | vec->is_pfns = true; |
80 | do { |
81 | unsigned long *nums = frame_vector_pfns(vec); |
82 | |
83 | while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { |
84 | err = follow_pfn(vma, start, &nums[ret]); |
85 | if (err) { |
86 | if (ret == 0) |
87 | ret = err; |
88 | goto out; |
89 | } |
90 | start += PAGE_SIZE; |
91 | ret++; |
92 | } |
93 | /* |
94 | * We stop if we have enough pages or if VMA doesn't completely |
95 | * cover the tail page. |
96 | */ |
97 | if (ret >= nr_frames || start < vma->vm_end) |
98 | break; |
99 | vma = find_vma_intersection(mm, start, start + 1); |
100 | } while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP)); |
101 | out: |
102 | if (locked) |
103 | up_read(&mm->mmap_sem); |
104 | if (!ret) |
105 | ret = -EFAULT; |
106 | if (ret > 0) |
107 | vec->nr_frames = ret; |
108 | return ret; |
109 | } |
110 | EXPORT_SYMBOL(get_vaddr_frames); |
111 | |
112 | /** |
113 | * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired |
114 | * them |
115 | * @vec: frame vector to put |
116 | * |
117 | * Drop references to pages if get_vaddr_frames() acquired them. We also |
118 | * invalidate the frame vector so that it is prepared for the next call into |
119 | * get_vaddr_frames(). |
120 | */ |
121 | void put_vaddr_frames(struct frame_vector *vec) |
122 | { |
123 | int i; |
124 | struct page **pages; |
125 | |
126 | if (!vec->got_ref) |
127 | goto out; |
128 | pages = frame_vector_pages(vec); |
129 | /* |
130 | * frame_vector_pages() might needed to do a conversion when |
131 | * get_vaddr_frames() got pages but vec was later converted to pfns. |
132 | * But it shouldn't really fail to convert pfns back... |
133 | */ |
134 | if (WARN_ON(IS_ERR(pages))) |
135 | goto out; |
136 | for (i = 0; i < vec->nr_frames; i++) |
137 | put_page(pages[i]); |
138 | vec->got_ref = false; |
139 | out: |
140 | vec->nr_frames = 0; |
141 | } |
142 | EXPORT_SYMBOL(put_vaddr_frames); |
143 | |
144 | /** |
145 | * frame_vector_to_pages - convert frame vector to contain page pointers |
146 | * @vec: frame vector to convert |
147 | * |
148 | * Convert @vec to contain array of page pointers. If the conversion is |
149 | * successful, return 0. Otherwise return an error. Note that we do not grab |
150 | * page references for the page structures. |
151 | */ |
152 | int frame_vector_to_pages(struct frame_vector *vec) |
153 | { |
154 | int i; |
155 | unsigned long *nums; |
156 | struct page **pages; |
157 | |
158 | if (!vec->is_pfns) |
159 | return 0; |
160 | nums = frame_vector_pfns(vec); |
161 | for (i = 0; i < vec->nr_frames; i++) |
162 | if (!pfn_valid(nums[i])) |
163 | return -EINVAL; |
164 | pages = (struct page **)nums; |
165 | for (i = 0; i < vec->nr_frames; i++) |
166 | pages[i] = pfn_to_page(nums[i]); |
167 | vec->is_pfns = false; |
168 | return 0; |
169 | } |
170 | EXPORT_SYMBOL(frame_vector_to_pages); |
171 | |
172 | /** |
173 | * frame_vector_to_pfns - convert frame vector to contain pfns |
174 | * @vec: frame vector to convert |
175 | * |
176 | * Convert @vec to contain array of pfns. |
177 | */ |
178 | void frame_vector_to_pfns(struct frame_vector *vec) |
179 | { |
180 | int i; |
181 | unsigned long *nums; |
182 | struct page **pages; |
183 | |
184 | if (vec->is_pfns) |
185 | return; |
186 | pages = (struct page **)(vec->ptrs); |
187 | nums = (unsigned long *)pages; |
188 | for (i = 0; i < vec->nr_frames; i++) |
189 | nums[i] = page_to_pfn(pages[i]); |
190 | vec->is_pfns = true; |
191 | } |
192 | EXPORT_SYMBOL(frame_vector_to_pfns); |
193 | |
194 | /** |
195 | * frame_vector_create() - allocate & initialize structure for pinned pfns |
196 | * @nr_frames: number of pfns slots we should reserve |
197 | * |
198 | * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns |
199 | * pfns. |
200 | */ |
201 | struct frame_vector *frame_vector_create(unsigned int nr_frames) |
202 | { |
203 | struct frame_vector *vec; |
204 | int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames; |
205 | |
206 | if (WARN_ON_ONCE(nr_frames == 0)) |
207 | return NULL; |
208 | /* |
209 | * This is absurdly high. It's here just to avoid strange effects when |
210 | * arithmetics overflows. |
211 | */ |
212 | if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2)) |
213 | return NULL; |
214 | /* |
215 | * Avoid higher order allocations, use vmalloc instead. It should |
216 | * be rare anyway. |
217 | */ |
218 | vec = kvmalloc(size, GFP_KERNEL); |
219 | if (!vec) |
220 | return NULL; |
221 | vec->nr_allocated = nr_frames; |
222 | vec->nr_frames = 0; |
223 | return vec; |
224 | } |
225 | EXPORT_SYMBOL(frame_vector_create); |
226 | |
227 | /** |
228 | * frame_vector_destroy() - free memory allocated to carry frame vector |
229 | * @vec: Frame vector to free |
230 | * |
231 | * Free structure allocated by frame_vector_create() to carry frames. |
232 | */ |
233 | void frame_vector_destroy(struct frame_vector *vec) |
234 | { |
235 | /* Make sure put_vaddr_frames() got called properly... */ |
236 | VM_BUG_ON(vec->nr_frames > 0); |
237 | kvfree(vec); |
238 | } |
239 | EXPORT_SYMBOL(frame_vector_destroy); |
240 | |