1 | /* |
2 | * Copyright 2007 Andi Kleen, SUSE Labs. |
3 | * Subject to the GPL, v.2 |
4 | * |
5 | * This contains most of the x86 vDSO kernel-side code. |
6 | */ |
7 | #include <linux/mm.h> |
8 | #include <linux/err.h> |
9 | #include <linux/sched.h> |
10 | #include <linux/sched/task_stack.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/init.h> |
13 | #include <linux/random.h> |
14 | #include <linux/elf.h> |
15 | #include <linux/cpu.h> |
16 | #include <linux/ptrace.h> |
17 | #include <asm/pvclock.h> |
18 | #include <asm/vgtod.h> |
19 | #include <asm/proto.h> |
20 | #include <asm/vdso.h> |
21 | #include <asm/vvar.h> |
22 | #include <asm/page.h> |
23 | #include <asm/desc.h> |
24 | #include <asm/cpufeature.h> |
25 | #include <asm/mshyperv.h> |
26 | |
27 | #if defined(CONFIG_X86_64) |
28 | unsigned int __read_mostly vdso64_enabled = 1; |
29 | #endif |
30 | |
31 | void __init init_vdso_image(const struct vdso_image *image) |
32 | { |
33 | BUG_ON(image->size % PAGE_SIZE != 0); |
34 | |
35 | apply_alternatives((struct alt_instr *)(image->data + image->alt), |
36 | (struct alt_instr *)(image->data + image->alt + |
37 | image->alt_len)); |
38 | } |
39 | |
40 | struct linux_binprm; |
41 | |
42 | static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, |
43 | struct vm_area_struct *vma, struct vm_fault *vmf) |
44 | { |
45 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; |
46 | |
47 | if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size) |
48 | return VM_FAULT_SIGBUS; |
49 | |
50 | vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT)); |
51 | get_page(vmf->page); |
52 | return 0; |
53 | } |
54 | |
55 | static void vdso_fix_landing(const struct vdso_image *image, |
56 | struct vm_area_struct *new_vma) |
57 | { |
58 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
59 | if (in_ia32_syscall() && image == &vdso_image_32) { |
60 | struct pt_regs *regs = current_pt_regs(); |
61 | unsigned long vdso_land = image->sym_int80_landing_pad; |
62 | unsigned long old_land_addr = vdso_land + |
63 | (unsigned long)current->mm->context.vdso; |
64 | |
65 | /* Fixing userspace landing - look at do_fast_syscall_32 */ |
66 | if (regs->ip == old_land_addr) |
67 | regs->ip = new_vma->vm_start + vdso_land; |
68 | } |
69 | #endif |
70 | } |
71 | |
72 | static int vdso_mremap(const struct vm_special_mapping *sm, |
73 | struct vm_area_struct *new_vma) |
74 | { |
75 | unsigned long new_size = new_vma->vm_end - new_vma->vm_start; |
76 | const struct vdso_image *image = current->mm->context.vdso_image; |
77 | |
78 | if (image->size != new_size) |
79 | return -EINVAL; |
80 | |
81 | vdso_fix_landing(image, new_vma); |
82 | current->mm->context.vdso = (void __user *)new_vma->vm_start; |
83 | |
84 | return 0; |
85 | } |
86 | |
87 | static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, |
88 | struct vm_area_struct *vma, struct vm_fault *vmf) |
89 | { |
90 | const struct vdso_image *image = vma->vm_mm->context.vdso_image; |
91 | long sym_offset; |
92 | |
93 | if (!image) |
94 | return VM_FAULT_SIGBUS; |
95 | |
96 | sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + |
97 | image->sym_vvar_start; |
98 | |
99 | /* |
100 | * Sanity check: a symbol offset of zero means that the page |
101 | * does not exist for this vdso image, not that the page is at |
102 | * offset zero relative to the text mapping. This should be |
103 | * impossible here, because sym_offset should only be zero for |
104 | * the page past the end of the vvar mapping. |
105 | */ |
106 | if (sym_offset == 0) |
107 | return VM_FAULT_SIGBUS; |
108 | |
109 | if (sym_offset == image->sym_vvar_page) { |
110 | return vmf_insert_pfn(vma, vmf->address, |
111 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT); |
112 | } else if (sym_offset == image->sym_pvclock_page) { |
113 | struct pvclock_vsyscall_time_info *pvti = |
114 | pvclock_get_pvti_cpu0_va(); |
115 | if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { |
116 | return vmf_insert_pfn_prot(vma, vmf->address, |
117 | __pa(pvti) >> PAGE_SHIFT, |
118 | pgprot_decrypted(vma->vm_page_prot)); |
119 | } |
120 | } else if (sym_offset == image->sym_hvclock_page) { |
121 | struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); |
122 | |
123 | if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) |
124 | return vmf_insert_pfn(vma, vmf->address, |
125 | vmalloc_to_pfn(tsc_pg)); |
126 | } |
127 | |
128 | return VM_FAULT_SIGBUS; |
129 | } |
130 | |
131 | static const struct vm_special_mapping vdso_mapping = { |
132 | .name = "[vdso]" , |
133 | .fault = vdso_fault, |
134 | .mremap = vdso_mremap, |
135 | }; |
136 | static const struct vm_special_mapping vvar_mapping = { |
137 | .name = "[vvar]" , |
138 | .fault = vvar_fault, |
139 | }; |
140 | |
141 | /* |
142 | * Add vdso and vvar mappings to current process. |
143 | * @image - blob to map |
144 | * @addr - request a specific address (zero to map at free addr) |
145 | */ |
146 | static int map_vdso(const struct vdso_image *image, unsigned long addr) |
147 | { |
148 | struct mm_struct *mm = current->mm; |
149 | struct vm_area_struct *vma; |
150 | unsigned long text_start; |
151 | int ret = 0; |
152 | |
153 | if (down_write_killable(&mm->mmap_sem)) |
154 | return -EINTR; |
155 | |
156 | addr = get_unmapped_area(NULL, addr, |
157 | image->size - image->sym_vvar_start, 0, 0); |
158 | if (IS_ERR_VALUE(addr)) { |
159 | ret = addr; |
160 | goto up_fail; |
161 | } |
162 | |
163 | text_start = addr - image->sym_vvar_start; |
164 | |
165 | /* |
166 | * MAYWRITE to allow gdb to COW and set breakpoints |
167 | */ |
168 | vma = _install_special_mapping(mm, |
169 | text_start, |
170 | image->size, |
171 | VM_READ|VM_EXEC| |
172 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
173 | &vdso_mapping); |
174 | |
175 | if (IS_ERR(vma)) { |
176 | ret = PTR_ERR(vma); |
177 | goto up_fail; |
178 | } |
179 | |
180 | vma = _install_special_mapping(mm, |
181 | addr, |
182 | -image->sym_vvar_start, |
183 | VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| |
184 | VM_PFNMAP, |
185 | &vvar_mapping); |
186 | |
187 | if (IS_ERR(vma)) { |
188 | ret = PTR_ERR(vma); |
189 | do_munmap(mm, text_start, image->size, NULL); |
190 | } else { |
191 | current->mm->context.vdso = (void __user *)text_start; |
192 | current->mm->context.vdso_image = image; |
193 | } |
194 | |
195 | up_fail: |
196 | up_write(&mm->mmap_sem); |
197 | return ret; |
198 | } |
199 | |
200 | #ifdef CONFIG_X86_64 |
201 | /* |
202 | * Put the vdso above the (randomized) stack with another randomized |
203 | * offset. This way there is no hole in the middle of address space. |
204 | * To save memory make sure it is still in the same PTE as the stack |
205 | * top. This doesn't give that many random bits. |
206 | * |
207 | * Note that this algorithm is imperfect: the distribution of the vdso |
208 | * start address within a PMD is biased toward the end. |
209 | * |
210 | * Only used for the 64-bit and x32 vdsos. |
211 | */ |
212 | static unsigned long vdso_addr(unsigned long start, unsigned len) |
213 | { |
214 | unsigned long addr, end; |
215 | unsigned offset; |
216 | |
217 | /* |
218 | * Round up the start address. It can start out unaligned as a result |
219 | * of stack start randomization. |
220 | */ |
221 | start = PAGE_ALIGN(start); |
222 | |
223 | /* Round the lowest possible end address up to a PMD boundary. */ |
224 | end = (start + len + PMD_SIZE - 1) & PMD_MASK; |
225 | if (end >= TASK_SIZE_MAX) |
226 | end = TASK_SIZE_MAX; |
227 | end -= len; |
228 | |
229 | if (end > start) { |
230 | offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); |
231 | addr = start + (offset << PAGE_SHIFT); |
232 | } else { |
233 | addr = start; |
234 | } |
235 | |
236 | /* |
237 | * Forcibly align the final address in case we have a hardware |
238 | * issue that requires alignment for performance reasons. |
239 | */ |
240 | addr = align_vdso_addr(addr); |
241 | |
242 | return addr; |
243 | } |
244 | |
245 | static int map_vdso_randomized(const struct vdso_image *image) |
246 | { |
247 | unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); |
248 | |
249 | return map_vdso(image, addr); |
250 | } |
251 | #endif |
252 | |
253 | int map_vdso_once(const struct vdso_image *image, unsigned long addr) |
254 | { |
255 | struct mm_struct *mm = current->mm; |
256 | struct vm_area_struct *vma; |
257 | |
258 | down_write(&mm->mmap_sem); |
259 | /* |
260 | * Check if we have already mapped vdso blob - fail to prevent |
261 | * abusing from userspace install_speciall_mapping, which may |
262 | * not do accounting and rlimit right. |
263 | * We could search vma near context.vdso, but it's a slowpath, |
264 | * so let's explicitly check all VMAs to be completely sure. |
265 | */ |
266 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
267 | if (vma_is_special_mapping(vma, &vdso_mapping) || |
268 | vma_is_special_mapping(vma, &vvar_mapping)) { |
269 | up_write(&mm->mmap_sem); |
270 | return -EEXIST; |
271 | } |
272 | } |
273 | up_write(&mm->mmap_sem); |
274 | |
275 | return map_vdso(image, addr); |
276 | } |
277 | |
278 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
279 | static int load_vdso32(void) |
280 | { |
281 | if (vdso32_enabled != 1) /* Other values all mean "disabled" */ |
282 | return 0; |
283 | |
284 | return map_vdso(&vdso_image_32, 0); |
285 | } |
286 | #endif |
287 | |
288 | #ifdef CONFIG_X86_64 |
289 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
290 | { |
291 | if (!vdso64_enabled) |
292 | return 0; |
293 | |
294 | return map_vdso_randomized(&vdso_image_64); |
295 | } |
296 | |
297 | #ifdef CONFIG_COMPAT |
298 | int compat_arch_setup_additional_pages(struct linux_binprm *bprm, |
299 | int uses_interp) |
300 | { |
301 | #ifdef CONFIG_X86_X32_ABI |
302 | if (test_thread_flag(TIF_X32)) { |
303 | if (!vdso64_enabled) |
304 | return 0; |
305 | return map_vdso_randomized(&vdso_image_x32); |
306 | } |
307 | #endif |
308 | #ifdef CONFIG_IA32_EMULATION |
309 | return load_vdso32(); |
310 | #else |
311 | return 0; |
312 | #endif |
313 | } |
314 | #endif |
315 | #else |
316 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
317 | { |
318 | return load_vdso32(); |
319 | } |
320 | #endif |
321 | |
322 | #ifdef CONFIG_X86_64 |
323 | static __init int vdso_setup(char *s) |
324 | { |
325 | vdso64_enabled = simple_strtoul(s, NULL, 0); |
326 | return 0; |
327 | } |
328 | __setup("vdso=" , vdso_setup); |
329 | |
330 | static int __init init_vdso(void) |
331 | { |
332 | init_vdso_image(&vdso_image_64); |
333 | |
334 | #ifdef CONFIG_X86_X32_ABI |
335 | init_vdso_image(&vdso_image_x32); |
336 | #endif |
337 | |
338 | return 0; |
339 | } |
340 | subsys_initcall(init_vdso); |
341 | #endif /* CONFIG_X86_64 */ |
342 | |