1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * mm/mremap.c |
4 | * |
5 | * (C) Copyright 1996 Linus Torvalds |
6 | * |
7 | * Address space accounting code <alan@lxorguk.ukuu.org.uk> |
8 | * (C) Copyright 2002 Red Hat Inc, All Rights Reserved |
9 | */ |
10 | |
11 | #include <linux/mm.h> |
12 | #include <linux/mm_inline.h> |
13 | #include <linux/hugetlb.h> |
14 | #include <linux/shm.h> |
15 | #include <linux/ksm.h> |
16 | #include <linux/mman.h> |
17 | #include <linux/swap.h> |
18 | #include <linux/capability.h> |
19 | #include <linux/fs.h> |
20 | #include <linux/swapops.h> |
21 | #include <linux/highmem.h> |
22 | #include <linux/security.h> |
23 | #include <linux/syscalls.h> |
24 | #include <linux/mmu_notifier.h> |
25 | #include <linux/uaccess.h> |
26 | #include <linux/userfaultfd_k.h> |
27 | #include <linux/mempolicy.h> |
28 | |
29 | #include <asm/cacheflush.h> |
30 | #include <asm/tlb.h> |
31 | #include <asm/pgalloc.h> |
32 | |
33 | #include "internal.h" |
34 | |
35 | static pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr) |
36 | { |
37 | pgd_t *pgd; |
38 | p4d_t *p4d; |
39 | pud_t *pud; |
40 | |
41 | pgd = pgd_offset(mm, addr); |
42 | if (pgd_none_or_clear_bad(pgd)) |
43 | return NULL; |
44 | |
45 | p4d = p4d_offset(pgd, address: addr); |
46 | if (p4d_none_or_clear_bad(p4d)) |
47 | return NULL; |
48 | |
49 | pud = pud_offset(p4d, address: addr); |
50 | if (pud_none_or_clear_bad(pud)) |
51 | return NULL; |
52 | |
53 | return pud; |
54 | } |
55 | |
56 | static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) |
57 | { |
58 | pud_t *pud; |
59 | pmd_t *pmd; |
60 | |
61 | pud = get_old_pud(mm, addr); |
62 | if (!pud) |
63 | return NULL; |
64 | |
65 | pmd = pmd_offset(pud, address: addr); |
66 | if (pmd_none(pmd: *pmd)) |
67 | return NULL; |
68 | |
69 | return pmd; |
70 | } |
71 | |
72 | static pud_t *alloc_new_pud(struct mm_struct *mm, struct vm_area_struct *vma, |
73 | unsigned long addr) |
74 | { |
75 | pgd_t *pgd; |
76 | p4d_t *p4d; |
77 | |
78 | pgd = pgd_offset(mm, addr); |
79 | p4d = p4d_alloc(mm, pgd, address: addr); |
80 | if (!p4d) |
81 | return NULL; |
82 | |
83 | return pud_alloc(mm, p4d, address: addr); |
84 | } |
85 | |
86 | static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, |
87 | unsigned long addr) |
88 | { |
89 | pud_t *pud; |
90 | pmd_t *pmd; |
91 | |
92 | pud = alloc_new_pud(mm, vma, addr); |
93 | if (!pud) |
94 | return NULL; |
95 | |
96 | pmd = pmd_alloc(mm, pud, address: addr); |
97 | if (!pmd) |
98 | return NULL; |
99 | |
100 | VM_BUG_ON(pmd_trans_huge(*pmd)); |
101 | |
102 | return pmd; |
103 | } |
104 | |
105 | static void take_rmap_locks(struct vm_area_struct *vma) |
106 | { |
107 | if (vma->vm_file) |
108 | i_mmap_lock_write(mapping: vma->vm_file->f_mapping); |
109 | if (vma->anon_vma) |
110 | anon_vma_lock_write(anon_vma: vma->anon_vma); |
111 | } |
112 | |
113 | static void drop_rmap_locks(struct vm_area_struct *vma) |
114 | { |
115 | if (vma->anon_vma) |
116 | anon_vma_unlock_write(anon_vma: vma->anon_vma); |
117 | if (vma->vm_file) |
118 | i_mmap_unlock_write(mapping: vma->vm_file->f_mapping); |
119 | } |
120 | |
121 | static pte_t move_soft_dirty_pte(pte_t pte) |
122 | { |
123 | /* |
124 | * Set soft dirty bit so we can notice |
125 | * in userspace the ptes were moved. |
126 | */ |
127 | #ifdef CONFIG_MEM_SOFT_DIRTY |
128 | if (pte_present(a: pte)) |
129 | pte = pte_mksoft_dirty(pte); |
130 | else if (is_swap_pte(pte)) |
131 | pte = pte_swp_mksoft_dirty(pte); |
132 | #endif |
133 | return pte; |
134 | } |
135 | |
136 | static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, |
137 | unsigned long old_addr, unsigned long old_end, |
138 | struct vm_area_struct *new_vma, pmd_t *new_pmd, |
139 | unsigned long new_addr, bool need_rmap_locks) |
140 | { |
141 | struct mm_struct *mm = vma->vm_mm; |
142 | pte_t *old_pte, *new_pte, pte; |
143 | spinlock_t *old_ptl, *new_ptl; |
144 | bool force_flush = false; |
145 | unsigned long len = old_end - old_addr; |
146 | int err = 0; |
147 | |
148 | /* |
149 | * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma |
150 | * locks to ensure that rmap will always observe either the old or the |
151 | * new ptes. This is the easiest way to avoid races with |
152 | * truncate_pagecache(), page migration, etc... |
153 | * |
154 | * When need_rmap_locks is false, we use other ways to avoid |
155 | * such races: |
156 | * |
157 | * - During exec() shift_arg_pages(), we use a specially tagged vma |
158 | * which rmap call sites look for using vma_is_temporary_stack(). |
159 | * |
160 | * - During mremap(), new_vma is often known to be placed after vma |
161 | * in rmap traversal order. This ensures rmap will always observe |
162 | * either the old pte, or the new pte, or both (the page table locks |
163 | * serialize access to individual ptes, but only rmap traversal |
164 | * order guarantees that we won't miss both the old and new ptes). |
165 | */ |
166 | if (need_rmap_locks) |
167 | take_rmap_locks(vma); |
168 | |
169 | /* |
170 | * We don't have to worry about the ordering of src and dst |
171 | * pte locks because exclusive mmap_lock prevents deadlock. |
172 | */ |
173 | old_pte = pte_offset_map_lock(mm, pmd: old_pmd, addr: old_addr, ptlp: &old_ptl); |
174 | if (!old_pte) { |
175 | err = -EAGAIN; |
176 | goto out; |
177 | } |
178 | new_pte = pte_offset_map_nolock(mm, pmd: new_pmd, addr: new_addr, ptlp: &new_ptl); |
179 | if (!new_pte) { |
180 | pte_unmap_unlock(old_pte, old_ptl); |
181 | err = -EAGAIN; |
182 | goto out; |
183 | } |
184 | if (new_ptl != old_ptl) |
185 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
186 | flush_tlb_batched_pending(mm: vma->vm_mm); |
187 | arch_enter_lazy_mmu_mode(); |
188 | |
189 | for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, |
190 | new_pte++, new_addr += PAGE_SIZE) { |
191 | if (pte_none(pte: ptep_get(ptep: old_pte))) |
192 | continue; |
193 | |
194 | pte = ptep_get_and_clear(mm, addr: old_addr, ptep: old_pte); |
195 | /* |
196 | * If we are remapping a valid PTE, make sure |
197 | * to flush TLB before we drop the PTL for the |
198 | * PTE. |
199 | * |
200 | * NOTE! Both old and new PTL matter: the old one |
201 | * for racing with page_mkclean(), the new one to |
202 | * make sure the physical page stays valid until |
203 | * the TLB entry for the old mapping has been |
204 | * flushed. |
205 | */ |
206 | if (pte_present(a: pte)) |
207 | force_flush = true; |
208 | pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); |
209 | pte = move_soft_dirty_pte(pte); |
210 | set_pte_at(mm, new_addr, new_pte, pte); |
211 | } |
212 | |
213 | arch_leave_lazy_mmu_mode(); |
214 | if (force_flush) |
215 | flush_tlb_range(vma, old_end - len, old_end); |
216 | if (new_ptl != old_ptl) |
217 | spin_unlock(lock: new_ptl); |
218 | pte_unmap(pte: new_pte - 1); |
219 | pte_unmap_unlock(old_pte - 1, old_ptl); |
220 | out: |
221 | if (need_rmap_locks) |
222 | drop_rmap_locks(vma); |
223 | return err; |
224 | } |
225 | |
226 | #ifndef arch_supports_page_table_move |
227 | #define arch_supports_page_table_move arch_supports_page_table_move |
228 | static inline bool arch_supports_page_table_move(void) |
229 | { |
230 | return IS_ENABLED(CONFIG_HAVE_MOVE_PMD) || |
231 | IS_ENABLED(CONFIG_HAVE_MOVE_PUD); |
232 | } |
233 | #endif |
234 | |
235 | #ifdef CONFIG_HAVE_MOVE_PMD |
236 | static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, |
237 | unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) |
238 | { |
239 | spinlock_t *old_ptl, *new_ptl; |
240 | struct mm_struct *mm = vma->vm_mm; |
241 | pmd_t pmd; |
242 | |
243 | if (!arch_supports_page_table_move()) |
244 | return false; |
245 | /* |
246 | * The destination pmd shouldn't be established, free_pgtables() |
247 | * should have released it. |
248 | * |
249 | * However, there's a case during execve() where we use mremap |
250 | * to move the initial stack, and in that case the target area |
251 | * may overlap the source area (always moving down). |
252 | * |
253 | * If everything is PMD-aligned, that works fine, as moving |
254 | * each pmd down will clear the source pmd. But if we first |
255 | * have a few 4kB-only pages that get moved down, and then |
256 | * hit the "now the rest is PMD-aligned, let's do everything |
257 | * one pmd at a time", we will still have the old (now empty |
258 | * of any 4kB pages, but still there) PMD in the page table |
259 | * tree. |
260 | * |
261 | * Warn on it once - because we really should try to figure |
262 | * out how to do this better - but then say "I won't move |
263 | * this pmd". |
264 | * |
265 | * One alternative might be to just unmap the target pmd at |
266 | * this point, and verify that it really is empty. We'll see. |
267 | */ |
268 | if (WARN_ON_ONCE(!pmd_none(*new_pmd))) |
269 | return false; |
270 | |
271 | /* |
272 | * We don't have to worry about the ordering of src and dst |
273 | * ptlocks because exclusive mmap_lock prevents deadlock. |
274 | */ |
275 | old_ptl = pmd_lock(mm: vma->vm_mm, pmd: old_pmd); |
276 | new_ptl = pmd_lockptr(mm, pmd: new_pmd); |
277 | if (new_ptl != old_ptl) |
278 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
279 | |
280 | /* Clear the pmd */ |
281 | pmd = *old_pmd; |
282 | pmd_clear(pmdp: old_pmd); |
283 | |
284 | VM_BUG_ON(!pmd_none(*new_pmd)); |
285 | |
286 | pmd_populate(mm, pmd: new_pmd, pmd_pgtable(pmd)); |
287 | flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); |
288 | if (new_ptl != old_ptl) |
289 | spin_unlock(lock: new_ptl); |
290 | spin_unlock(lock: old_ptl); |
291 | |
292 | return true; |
293 | } |
294 | #else |
295 | static inline bool move_normal_pmd(struct vm_area_struct *vma, |
296 | unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, |
297 | pmd_t *new_pmd) |
298 | { |
299 | return false; |
300 | } |
301 | #endif |
302 | |
303 | #if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) |
304 | static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, |
305 | unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) |
306 | { |
307 | spinlock_t *old_ptl, *new_ptl; |
308 | struct mm_struct *mm = vma->vm_mm; |
309 | pud_t pud; |
310 | |
311 | if (!arch_supports_page_table_move()) |
312 | return false; |
313 | /* |
314 | * The destination pud shouldn't be established, free_pgtables() |
315 | * should have released it. |
316 | */ |
317 | if (WARN_ON_ONCE(!pud_none(*new_pud))) |
318 | return false; |
319 | |
320 | /* |
321 | * We don't have to worry about the ordering of src and dst |
322 | * ptlocks because exclusive mmap_lock prevents deadlock. |
323 | */ |
324 | old_ptl = pud_lock(mm: vma->vm_mm, pud: old_pud); |
325 | new_ptl = pud_lockptr(mm, pud: new_pud); |
326 | if (new_ptl != old_ptl) |
327 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
328 | |
329 | /* Clear the pud */ |
330 | pud = *old_pud; |
331 | pud_clear(pudp: old_pud); |
332 | |
333 | VM_BUG_ON(!pud_none(*new_pud)); |
334 | |
335 | pud_populate(mm, pud: new_pud, pmd: pud_pgtable(pud)); |
336 | flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE); |
337 | if (new_ptl != old_ptl) |
338 | spin_unlock(lock: new_ptl); |
339 | spin_unlock(lock: old_ptl); |
340 | |
341 | return true; |
342 | } |
343 | #else |
344 | static inline bool move_normal_pud(struct vm_area_struct *vma, |
345 | unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, |
346 | pud_t *new_pud) |
347 | { |
348 | return false; |
349 | } |
350 | #endif |
351 | |
352 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) |
353 | static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, |
354 | unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) |
355 | { |
356 | spinlock_t *old_ptl, *new_ptl; |
357 | struct mm_struct *mm = vma->vm_mm; |
358 | pud_t pud; |
359 | |
360 | /* |
361 | * The destination pud shouldn't be established, free_pgtables() |
362 | * should have released it. |
363 | */ |
364 | if (WARN_ON_ONCE(!pud_none(*new_pud))) |
365 | return false; |
366 | |
367 | /* |
368 | * We don't have to worry about the ordering of src and dst |
369 | * ptlocks because exclusive mmap_lock prevents deadlock. |
370 | */ |
371 | old_ptl = pud_lock(mm: vma->vm_mm, pud: old_pud); |
372 | new_ptl = pud_lockptr(mm, pud: new_pud); |
373 | if (new_ptl != old_ptl) |
374 | spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); |
375 | |
376 | /* Clear the pud */ |
377 | pud = *old_pud; |
378 | pud_clear(pudp: old_pud); |
379 | |
380 | VM_BUG_ON(!pud_none(*new_pud)); |
381 | |
382 | /* Set the new pud */ |
383 | /* mark soft_ditry when we add pud level soft dirty support */ |
384 | set_pud_at(mm, addr: new_addr, pudp: new_pud, pud); |
385 | flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE); |
386 | if (new_ptl != old_ptl) |
387 | spin_unlock(lock: new_ptl); |
388 | spin_unlock(lock: old_ptl); |
389 | |
390 | return true; |
391 | } |
392 | #else |
393 | static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr, |
394 | unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) |
395 | { |
396 | WARN_ON_ONCE(1); |
397 | return false; |
398 | |
399 | } |
400 | #endif |
401 | |
402 | enum pgt_entry { |
403 | NORMAL_PMD, |
404 | HPAGE_PMD, |
405 | NORMAL_PUD, |
406 | HPAGE_PUD, |
407 | }; |
408 | |
409 | /* |
410 | * Returns an extent of the corresponding size for the pgt_entry specified if |
411 | * valid. Else returns a smaller extent bounded by the end of the source and |
412 | * destination pgt_entry. |
413 | */ |
414 | static __always_inline unsigned long get_extent(enum pgt_entry entry, |
415 | unsigned long old_addr, unsigned long old_end, |
416 | unsigned long new_addr) |
417 | { |
418 | unsigned long next, extent, mask, size; |
419 | |
420 | switch (entry) { |
421 | case HPAGE_PMD: |
422 | case NORMAL_PMD: |
423 | mask = PMD_MASK; |
424 | size = PMD_SIZE; |
425 | break; |
426 | case HPAGE_PUD: |
427 | case NORMAL_PUD: |
428 | mask = PUD_MASK; |
429 | size = PUD_SIZE; |
430 | break; |
431 | default: |
432 | BUILD_BUG(); |
433 | break; |
434 | } |
435 | |
436 | next = (old_addr + size) & mask; |
437 | /* even if next overflowed, extent below will be ok */ |
438 | extent = next - old_addr; |
439 | if (extent > old_end - old_addr) |
440 | extent = old_end - old_addr; |
441 | next = (new_addr + size) & mask; |
442 | if (extent > next - new_addr) |
443 | extent = next - new_addr; |
444 | return extent; |
445 | } |
446 | |
447 | /* |
448 | * Attempts to speedup the move by moving entry at the level corresponding to |
449 | * pgt_entry. Returns true if the move was successful, else false. |
450 | */ |
451 | static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, |
452 | unsigned long old_addr, unsigned long new_addr, |
453 | void *old_entry, void *new_entry, bool need_rmap_locks) |
454 | { |
455 | bool moved = false; |
456 | |
457 | /* See comment in move_ptes() */ |
458 | if (need_rmap_locks) |
459 | take_rmap_locks(vma); |
460 | |
461 | switch (entry) { |
462 | case NORMAL_PMD: |
463 | moved = move_normal_pmd(vma, old_addr, new_addr, old_pmd: old_entry, |
464 | new_pmd: new_entry); |
465 | break; |
466 | case NORMAL_PUD: |
467 | moved = move_normal_pud(vma, old_addr, new_addr, old_pud: old_entry, |
468 | new_pud: new_entry); |
469 | break; |
470 | case HPAGE_PMD: |
471 | moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
472 | move_huge_pmd(vma, old_addr, new_addr, old_pmd: old_entry, |
473 | new_pmd: new_entry); |
474 | break; |
475 | case HPAGE_PUD: |
476 | moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
477 | move_huge_pud(vma, old_addr, new_addr, old_pud: old_entry, |
478 | new_pud: new_entry); |
479 | break; |
480 | |
481 | default: |
482 | WARN_ON_ONCE(1); |
483 | break; |
484 | } |
485 | |
486 | if (need_rmap_locks) |
487 | drop_rmap_locks(vma); |
488 | |
489 | return moved; |
490 | } |
491 | |
492 | /* |
493 | * A helper to check if aligning down is OK. The aligned address should fall |
494 | * on *no mapping*. For the stack moving down, that's a special move within |
495 | * the VMA that is created to span the source and destination of the move, |
496 | * so we make an exception for it. |
497 | */ |
498 | static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_align, |
499 | unsigned long mask, bool for_stack) |
500 | { |
501 | unsigned long addr_masked = addr_to_align & mask; |
502 | |
503 | /* |
504 | * If @addr_to_align of either source or destination is not the beginning |
505 | * of the corresponding VMA, we can't align down or we will destroy part |
506 | * of the current mapping. |
507 | */ |
508 | if (!for_stack && vma->vm_start != addr_to_align) |
509 | return false; |
510 | |
511 | /* In the stack case we explicitly permit in-VMA alignment. */ |
512 | if (for_stack && addr_masked >= vma->vm_start) |
513 | return true; |
514 | |
515 | /* |
516 | * Make sure the realignment doesn't cause the address to fall on an |
517 | * existing mapping. |
518 | */ |
519 | return find_vma_intersection(mm: vma->vm_mm, start_addr: addr_masked, end_addr: vma->vm_start) == NULL; |
520 | } |
521 | |
522 | /* Opportunistically realign to specified boundary for faster copy. */ |
523 | static void try_realign_addr(unsigned long *old_addr, struct vm_area_struct *old_vma, |
524 | unsigned long *new_addr, struct vm_area_struct *new_vma, |
525 | unsigned long mask, bool for_stack) |
526 | { |
527 | /* Skip if the addresses are already aligned. */ |
528 | if ((*old_addr & ~mask) == 0) |
529 | return; |
530 | |
531 | /* Only realign if the new and old addresses are mutually aligned. */ |
532 | if ((*old_addr & ~mask) != (*new_addr & ~mask)) |
533 | return; |
534 | |
535 | /* Ensure realignment doesn't cause overlap with existing mappings. */ |
536 | if (!can_align_down(vma: old_vma, addr_to_align: *old_addr, mask, for_stack) || |
537 | !can_align_down(vma: new_vma, addr_to_align: *new_addr, mask, for_stack)) |
538 | return; |
539 | |
540 | *old_addr = *old_addr & mask; |
541 | *new_addr = *new_addr & mask; |
542 | } |
543 | |
544 | unsigned long move_page_tables(struct vm_area_struct *vma, |
545 | unsigned long old_addr, struct vm_area_struct *new_vma, |
546 | unsigned long new_addr, unsigned long len, |
547 | bool need_rmap_locks, bool for_stack) |
548 | { |
549 | unsigned long extent, old_end; |
550 | struct mmu_notifier_range range; |
551 | pmd_t *old_pmd, *new_pmd; |
552 | pud_t *old_pud, *new_pud; |
553 | |
554 | if (!len) |
555 | return 0; |
556 | |
557 | old_end = old_addr + len; |
558 | |
559 | if (is_vm_hugetlb_page(vma)) |
560 | return move_hugetlb_page_tables(vma, new_vma, old_addr, |
561 | new_addr, len); |
562 | |
563 | /* |
564 | * If possible, realign addresses to PMD boundary for faster copy. |
565 | * Only realign if the mremap copying hits a PMD boundary. |
566 | */ |
567 | if (len >= PMD_SIZE - (old_addr & ~PMD_MASK)) |
568 | try_realign_addr(old_addr: &old_addr, old_vma: vma, new_addr: &new_addr, new_vma, PMD_MASK, |
569 | for_stack); |
570 | |
571 | flush_cache_range(vma, start: old_addr, end: old_end); |
572 | mmu_notifier_range_init(range: &range, event: MMU_NOTIFY_UNMAP, flags: 0, mm: vma->vm_mm, |
573 | start: old_addr, end: old_end); |
574 | mmu_notifier_invalidate_range_start(range: &range); |
575 | |
576 | for (; old_addr < old_end; old_addr += extent, new_addr += extent) { |
577 | cond_resched(); |
578 | /* |
579 | * If extent is PUD-sized try to speed up the move by moving at the |
580 | * PUD level if possible. |
581 | */ |
582 | extent = get_extent(entry: NORMAL_PUD, old_addr, old_end, new_addr); |
583 | |
584 | old_pud = get_old_pud(mm: vma->vm_mm, addr: old_addr); |
585 | if (!old_pud) |
586 | continue; |
587 | new_pud = alloc_new_pud(mm: vma->vm_mm, vma, addr: new_addr); |
588 | if (!new_pud) |
589 | break; |
590 | if (pud_trans_huge(pud: *old_pud) || pud_devmap(pud: *old_pud)) { |
591 | if (extent == HPAGE_PUD_SIZE) { |
592 | move_pgt_entry(entry: HPAGE_PUD, vma, old_addr, new_addr, |
593 | old_entry: old_pud, new_entry: new_pud, need_rmap_locks); |
594 | /* We ignore and continue on error? */ |
595 | continue; |
596 | } |
597 | } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) { |
598 | |
599 | if (move_pgt_entry(entry: NORMAL_PUD, vma, old_addr, new_addr, |
600 | old_entry: old_pud, new_entry: new_pud, need_rmap_locks: true)) |
601 | continue; |
602 | } |
603 | |
604 | extent = get_extent(entry: NORMAL_PMD, old_addr, old_end, new_addr); |
605 | old_pmd = get_old_pmd(mm: vma->vm_mm, addr: old_addr); |
606 | if (!old_pmd) |
607 | continue; |
608 | new_pmd = alloc_new_pmd(mm: vma->vm_mm, vma, addr: new_addr); |
609 | if (!new_pmd) |
610 | break; |
611 | again: |
612 | if (is_swap_pmd(pmd: *old_pmd) || pmd_trans_huge(pmd: *old_pmd) || |
613 | pmd_devmap(pmd: *old_pmd)) { |
614 | if (extent == HPAGE_PMD_SIZE && |
615 | move_pgt_entry(entry: HPAGE_PMD, vma, old_addr, new_addr, |
616 | old_entry: old_pmd, new_entry: new_pmd, need_rmap_locks)) |
617 | continue; |
618 | split_huge_pmd(vma, old_pmd, old_addr); |
619 | } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) && |
620 | extent == PMD_SIZE) { |
621 | /* |
622 | * If the extent is PMD-sized, try to speed the move by |
623 | * moving at the PMD level if possible. |
624 | */ |
625 | if (move_pgt_entry(entry: NORMAL_PMD, vma, old_addr, new_addr, |
626 | old_entry: old_pmd, new_entry: new_pmd, need_rmap_locks: true)) |
627 | continue; |
628 | } |
629 | if (pmd_none(pmd: *old_pmd)) |
630 | continue; |
631 | if (pte_alloc(new_vma->vm_mm, new_pmd)) |
632 | break; |
633 | if (move_ptes(vma, old_pmd, old_addr, old_end: old_addr + extent, |
634 | new_vma, new_pmd, new_addr, need_rmap_locks) < 0) |
635 | goto again; |
636 | } |
637 | |
638 | mmu_notifier_invalidate_range_end(range: &range); |
639 | |
640 | /* |
641 | * Prevent negative return values when {old,new}_addr was realigned |
642 | * but we broke out of the above loop for the first PMD itself. |
643 | */ |
644 | if (len + old_addr < old_end) |
645 | return 0; |
646 | |
647 | return len + old_addr - old_end; /* how much done */ |
648 | } |
649 | |
650 | static unsigned long move_vma(struct vm_area_struct *vma, |
651 | unsigned long old_addr, unsigned long old_len, |
652 | unsigned long new_len, unsigned long new_addr, |
653 | bool *locked, unsigned long flags, |
654 | struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) |
655 | { |
656 | long to_account = new_len - old_len; |
657 | struct mm_struct *mm = vma->vm_mm; |
658 | struct vm_area_struct *new_vma; |
659 | unsigned long vm_flags = vma->vm_flags; |
660 | unsigned long new_pgoff; |
661 | unsigned long moved_len; |
662 | unsigned long account_start = 0; |
663 | unsigned long account_end = 0; |
664 | unsigned long hiwater_vm; |
665 | int err = 0; |
666 | bool need_rmap_locks; |
667 | struct vma_iterator vmi; |
668 | |
669 | /* |
670 | * We'd prefer to avoid failure later on in do_munmap: |
671 | * which may split one vma into three before unmapping. |
672 | */ |
673 | if (mm->map_count >= sysctl_max_map_count - 3) |
674 | return -ENOMEM; |
675 | |
676 | if (unlikely(flags & MREMAP_DONTUNMAP)) |
677 | to_account = new_len; |
678 | |
679 | if (vma->vm_ops && vma->vm_ops->may_split) { |
680 | if (vma->vm_start != old_addr) |
681 | err = vma->vm_ops->may_split(vma, old_addr); |
682 | if (!err && vma->vm_end != old_addr + old_len) |
683 | err = vma->vm_ops->may_split(vma, old_addr + old_len); |
684 | if (err) |
685 | return err; |
686 | } |
687 | |
688 | /* |
689 | * Advise KSM to break any KSM pages in the area to be moved: |
690 | * it would be confusing if they were to turn up at the new |
691 | * location, where they happen to coincide with different KSM |
692 | * pages recently unmapped. But leave vma->vm_flags as it was, |
693 | * so KSM can come around to merge on vma and new_vma afterwards. |
694 | */ |
695 | err = ksm_madvise(vma, start: old_addr, end: old_addr + old_len, |
696 | MADV_UNMERGEABLE, vm_flags: &vm_flags); |
697 | if (err) |
698 | return err; |
699 | |
700 | if (vm_flags & VM_ACCOUNT) { |
701 | if (security_vm_enough_memory_mm(mm, pages: to_account >> PAGE_SHIFT)) |
702 | return -ENOMEM; |
703 | } |
704 | |
705 | vma_start_write(vma); |
706 | new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); |
707 | new_vma = copy_vma(&vma, addr: new_addr, len: new_len, pgoff: new_pgoff, |
708 | need_rmap_locks: &need_rmap_locks); |
709 | if (!new_vma) { |
710 | if (vm_flags & VM_ACCOUNT) |
711 | vm_unacct_memory(pages: to_account >> PAGE_SHIFT); |
712 | return -ENOMEM; |
713 | } |
714 | |
715 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, len: old_len, |
716 | need_rmap_locks, for_stack: false); |
717 | if (moved_len < old_len) { |
718 | err = -ENOMEM; |
719 | } else if (vma->vm_ops && vma->vm_ops->mremap) { |
720 | err = vma->vm_ops->mremap(new_vma); |
721 | } |
722 | |
723 | if (unlikely(err)) { |
724 | /* |
725 | * On error, move entries back from new area to old, |
726 | * which will succeed since page tables still there, |
727 | * and then proceed to unmap new area instead of old. |
728 | */ |
729 | move_page_tables(vma: new_vma, old_addr: new_addr, new_vma: vma, new_addr: old_addr, len: moved_len, |
730 | need_rmap_locks: true, for_stack: false); |
731 | vma = new_vma; |
732 | old_len = new_len; |
733 | old_addr = new_addr; |
734 | new_addr = err; |
735 | } else { |
736 | mremap_userfaultfd_prep(new_vma, uf); |
737 | } |
738 | |
739 | if (is_vm_hugetlb_page(vma)) { |
740 | clear_vma_resv_huge_pages(vma); |
741 | } |
742 | |
743 | /* Conceal VM_ACCOUNT so old reservation is not undone */ |
744 | if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { |
745 | vm_flags_clear(vma, VM_ACCOUNT); |
746 | if (vma->vm_start < old_addr) |
747 | account_start = vma->vm_start; |
748 | if (vma->vm_end > old_addr + old_len) |
749 | account_end = vma->vm_end; |
750 | } |
751 | |
752 | /* |
753 | * If we failed to move page tables we still do total_vm increment |
754 | * since do_munmap() will decrement it by old_len == new_len. |
755 | * |
756 | * Since total_vm is about to be raised artificially high for a |
757 | * moment, we need to restore high watermark afterwards: if stats |
758 | * are taken meanwhile, total_vm and hiwater_vm appear too high. |
759 | * If this were a serious issue, we'd add a flag to do_munmap(). |
760 | */ |
761 | hiwater_vm = mm->hiwater_vm; |
762 | vm_stat_account(mm, vma->vm_flags, npages: new_len >> PAGE_SHIFT); |
763 | |
764 | /* Tell pfnmap has moved from this vma */ |
765 | if (unlikely(vma->vm_flags & VM_PFNMAP)) |
766 | untrack_pfn_clear(vma); |
767 | |
768 | if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { |
769 | /* We always clear VM_LOCKED[ONFAULT] on the old vma */ |
770 | vm_flags_clear(vma, VM_LOCKED_MASK); |
771 | |
772 | /* |
773 | * anon_vma links of the old vma is no longer needed after its page |
774 | * table has been moved. |
775 | */ |
776 | if (new_vma != vma && vma->vm_start == old_addr && |
777 | vma->vm_end == (old_addr + old_len)) |
778 | unlink_anon_vmas(vma); |
779 | |
780 | /* Because we won't unmap we don't need to touch locked_vm */ |
781 | return new_addr; |
782 | } |
783 | |
784 | vma_iter_init(vmi: &vmi, mm, addr: old_addr); |
785 | if (do_vmi_munmap(vmi: &vmi, mm, start: old_addr, len: old_len, uf: uf_unmap, unlock: false) < 0) { |
786 | /* OOM: unable to split vma, just get accounts right */ |
787 | if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) |
788 | vm_acct_memory(pages: old_len >> PAGE_SHIFT); |
789 | account_start = account_end = 0; |
790 | } |
791 | |
792 | if (vm_flags & VM_LOCKED) { |
793 | mm->locked_vm += new_len >> PAGE_SHIFT; |
794 | *locked = true; |
795 | } |
796 | |
797 | mm->hiwater_vm = hiwater_vm; |
798 | |
799 | /* Restore VM_ACCOUNT if one or two pieces of vma left */ |
800 | if (account_start) { |
801 | vma = vma_prev(vmi: &vmi); |
802 | vm_flags_set(vma, VM_ACCOUNT); |
803 | } |
804 | |
805 | if (account_end) { |
806 | vma = vma_next(vmi: &vmi); |
807 | vm_flags_set(vma, VM_ACCOUNT); |
808 | } |
809 | |
810 | return new_addr; |
811 | } |
812 | |
813 | static struct vm_area_struct *vma_to_resize(unsigned long addr, |
814 | unsigned long old_len, unsigned long new_len, unsigned long flags) |
815 | { |
816 | struct mm_struct *mm = current->mm; |
817 | struct vm_area_struct *vma; |
818 | unsigned long pgoff; |
819 | |
820 | vma = vma_lookup(mm, addr); |
821 | if (!vma) |
822 | return ERR_PTR(error: -EFAULT); |
823 | |
824 | /* |
825 | * !old_len is a special case where an attempt is made to 'duplicate' |
826 | * a mapping. This makes no sense for private mappings as it will |
827 | * instead create a fresh/new mapping unrelated to the original. This |
828 | * is contrary to the basic idea of mremap which creates new mappings |
829 | * based on the original. There are no known use cases for this |
830 | * behavior. As a result, fail such attempts. |
831 | */ |
832 | if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) { |
833 | pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n" , current->comm, current->pid); |
834 | return ERR_PTR(error: -EINVAL); |
835 | } |
836 | |
837 | if ((flags & MREMAP_DONTUNMAP) && |
838 | (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))) |
839 | return ERR_PTR(error: -EINVAL); |
840 | |
841 | /* We can't remap across vm area boundaries */ |
842 | if (old_len > vma->vm_end - addr) |
843 | return ERR_PTR(error: -EFAULT); |
844 | |
845 | if (new_len == old_len) |
846 | return vma; |
847 | |
848 | /* Need to be careful about a growing mapping */ |
849 | pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; |
850 | pgoff += vma->vm_pgoff; |
851 | if (pgoff + (new_len >> PAGE_SHIFT) < pgoff) |
852 | return ERR_PTR(error: -EINVAL); |
853 | |
854 | if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) |
855 | return ERR_PTR(error: -EFAULT); |
856 | |
857 | if (!mlock_future_ok(mm, flags: vma->vm_flags, bytes: new_len - old_len)) |
858 | return ERR_PTR(error: -EAGAIN); |
859 | |
860 | if (!may_expand_vm(mm, vma->vm_flags, |
861 | npages: (new_len - old_len) >> PAGE_SHIFT)) |
862 | return ERR_PTR(error: -ENOMEM); |
863 | |
864 | return vma; |
865 | } |
866 | |
867 | static unsigned long mremap_to(unsigned long addr, unsigned long old_len, |
868 | unsigned long new_addr, unsigned long new_len, bool *locked, |
869 | unsigned long flags, struct vm_userfaultfd_ctx *uf, |
870 | struct list_head *uf_unmap_early, |
871 | struct list_head *uf_unmap) |
872 | { |
873 | struct mm_struct *mm = current->mm; |
874 | struct vm_area_struct *vma; |
875 | unsigned long ret = -EINVAL; |
876 | unsigned long map_flags = 0; |
877 | |
878 | if (offset_in_page(new_addr)) |
879 | goto out; |
880 | |
881 | if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) |
882 | goto out; |
883 | |
884 | /* Ensure the old/new locations do not overlap */ |
885 | if (addr + old_len > new_addr && new_addr + new_len > addr) |
886 | goto out; |
887 | |
888 | /* |
889 | * move_vma() need us to stay 4 maps below the threshold, otherwise |
890 | * it will bail out at the very beginning. |
891 | * That is a problem if we have already unmaped the regions here |
892 | * (new_addr, and old_addr), because userspace will not know the |
893 | * state of the vma's after it gets -ENOMEM. |
894 | * So, to avoid such scenario we can pre-compute if the whole |
895 | * operation has high chances to success map-wise. |
896 | * Worst-scenario case is when both vma's (new_addr and old_addr) get |
897 | * split in 3 before unmapping it. |
898 | * That means 2 more maps (1 for each) to the ones we already hold. |
899 | * Check whether current map count plus 2 still leads us to 4 maps below |
900 | * the threshold, otherwise return -ENOMEM here to be more safe. |
901 | */ |
902 | if ((mm->map_count + 2) >= sysctl_max_map_count - 3) |
903 | return -ENOMEM; |
904 | |
905 | if (flags & MREMAP_FIXED) { |
906 | ret = do_munmap(mm, new_addr, new_len, uf: uf_unmap_early); |
907 | if (ret) |
908 | goto out; |
909 | } |
910 | |
911 | if (old_len > new_len) { |
912 | ret = do_munmap(mm, addr+new_len, old_len - new_len, uf: uf_unmap); |
913 | if (ret) |
914 | goto out; |
915 | old_len = new_len; |
916 | } |
917 | |
918 | vma = vma_to_resize(addr, old_len, new_len, flags); |
919 | if (IS_ERR(ptr: vma)) { |
920 | ret = PTR_ERR(ptr: vma); |
921 | goto out; |
922 | } |
923 | |
924 | /* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */ |
925 | if (flags & MREMAP_DONTUNMAP && |
926 | !may_expand_vm(mm, vma->vm_flags, npages: old_len >> PAGE_SHIFT)) { |
927 | ret = -ENOMEM; |
928 | goto out; |
929 | } |
930 | |
931 | if (flags & MREMAP_FIXED) |
932 | map_flags |= MAP_FIXED; |
933 | |
934 | if (vma->vm_flags & VM_MAYSHARE) |
935 | map_flags |= MAP_SHARED; |
936 | |
937 | ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff + |
938 | ((addr - vma->vm_start) >> PAGE_SHIFT), |
939 | map_flags); |
940 | if (IS_ERR_VALUE(ret)) |
941 | goto out; |
942 | |
943 | /* We got a new mapping */ |
944 | if (!(flags & MREMAP_FIXED)) |
945 | new_addr = ret; |
946 | |
947 | ret = move_vma(vma, old_addr: addr, old_len, new_len, new_addr, locked, flags, uf, |
948 | uf_unmap); |
949 | |
950 | out: |
951 | return ret; |
952 | } |
953 | |
954 | static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) |
955 | { |
956 | unsigned long end = vma->vm_end + delta; |
957 | |
958 | if (end < vma->vm_end) /* overflow */ |
959 | return 0; |
960 | if (find_vma_intersection(mm: vma->vm_mm, start_addr: vma->vm_end, end_addr: end)) |
961 | return 0; |
962 | if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start, |
963 | 0, MAP_FIXED) & ~PAGE_MASK) |
964 | return 0; |
965 | return 1; |
966 | } |
967 | |
968 | /* |
969 | * Expand (or shrink) an existing mapping, potentially moving it at the |
970 | * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) |
971 | * |
972 | * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise |
973 | * This option implies MREMAP_MAYMOVE. |
974 | */ |
975 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, |
976 | unsigned long, new_len, unsigned long, flags, |
977 | unsigned long, new_addr) |
978 | { |
979 | struct mm_struct *mm = current->mm; |
980 | struct vm_area_struct *vma; |
981 | unsigned long ret = -EINVAL; |
982 | bool locked = false; |
983 | struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; |
984 | LIST_HEAD(uf_unmap_early); |
985 | LIST_HEAD(uf_unmap); |
986 | |
987 | /* |
988 | * There is a deliberate asymmetry here: we strip the pointer tag |
989 | * from the old address but leave the new address alone. This is |
990 | * for consistency with mmap(), where we prevent the creation of |
991 | * aliasing mappings in userspace by leaving the tag bits of the |
992 | * mapping address intact. A non-zero tag will cause the subsequent |
993 | * range checks to reject the address as invalid. |
994 | * |
995 | * See Documentation/arch/arm64/tagged-address-abi.rst for more |
996 | * information. |
997 | */ |
998 | addr = untagged_addr(addr); |
999 | |
1000 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP)) |
1001 | return ret; |
1002 | |
1003 | if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE)) |
1004 | return ret; |
1005 | |
1006 | /* |
1007 | * MREMAP_DONTUNMAP is always a move and it does not allow resizing |
1008 | * in the process. |
1009 | */ |
1010 | if (flags & MREMAP_DONTUNMAP && |
1011 | (!(flags & MREMAP_MAYMOVE) || old_len != new_len)) |
1012 | return ret; |
1013 | |
1014 | |
1015 | if (offset_in_page(addr)) |
1016 | return ret; |
1017 | |
1018 | old_len = PAGE_ALIGN(old_len); |
1019 | new_len = PAGE_ALIGN(new_len); |
1020 | |
1021 | /* |
1022 | * We allow a zero old-len as a special case |
1023 | * for DOS-emu "duplicate shm area" thing. But |
1024 | * a zero new-len is nonsensical. |
1025 | */ |
1026 | if (!new_len) |
1027 | return ret; |
1028 | |
1029 | if (mmap_write_lock_killable(current->mm)) |
1030 | return -EINTR; |
1031 | vma = vma_lookup(mm, addr); |
1032 | if (!vma) { |
1033 | ret = -EFAULT; |
1034 | goto out; |
1035 | } |
1036 | |
1037 | if (is_vm_hugetlb_page(vma)) { |
1038 | struct hstate *h __maybe_unused = hstate_vma(vma); |
1039 | |
1040 | old_len = ALIGN(old_len, huge_page_size(h)); |
1041 | new_len = ALIGN(new_len, huge_page_size(h)); |
1042 | |
1043 | /* addrs must be huge page aligned */ |
1044 | if (addr & ~huge_page_mask(h)) |
1045 | goto out; |
1046 | if (new_addr & ~huge_page_mask(h)) |
1047 | goto out; |
1048 | |
1049 | /* |
1050 | * Don't allow remap expansion, because the underlying hugetlb |
1051 | * reservation is not yet capable to handle split reservation. |
1052 | */ |
1053 | if (new_len > old_len) |
1054 | goto out; |
1055 | } |
1056 | |
1057 | if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { |
1058 | ret = mremap_to(addr, old_len, new_addr, new_len, |
1059 | locked: &locked, flags, uf: &uf, uf_unmap_early: &uf_unmap_early, |
1060 | uf_unmap: &uf_unmap); |
1061 | goto out; |
1062 | } |
1063 | |
1064 | /* |
1065 | * Always allow a shrinking remap: that just unmaps |
1066 | * the unnecessary pages.. |
1067 | * do_vmi_munmap does all the needed commit accounting, and |
1068 | * unlocks the mmap_lock if so directed. |
1069 | */ |
1070 | if (old_len >= new_len) { |
1071 | VMA_ITERATOR(vmi, mm, addr + new_len); |
1072 | |
1073 | if (old_len == new_len) { |
1074 | ret = addr; |
1075 | goto out; |
1076 | } |
1077 | |
1078 | ret = do_vmi_munmap(vmi: &vmi, mm, start: addr + new_len, len: old_len - new_len, |
1079 | uf: &uf_unmap, unlock: true); |
1080 | if (ret) |
1081 | goto out; |
1082 | |
1083 | ret = addr; |
1084 | goto out_unlocked; |
1085 | } |
1086 | |
1087 | /* |
1088 | * Ok, we need to grow.. |
1089 | */ |
1090 | vma = vma_to_resize(addr, old_len, new_len, flags); |
1091 | if (IS_ERR(ptr: vma)) { |
1092 | ret = PTR_ERR(ptr: vma); |
1093 | goto out; |
1094 | } |
1095 | |
1096 | /* old_len exactly to the end of the area.. |
1097 | */ |
1098 | if (old_len == vma->vm_end - addr) { |
1099 | unsigned long delta = new_len - old_len; |
1100 | |
1101 | /* can we just expand the current mapping? */ |
1102 | if (vma_expandable(vma, delta)) { |
1103 | long pages = delta >> PAGE_SHIFT; |
1104 | VMA_ITERATOR(vmi, mm, vma->vm_end); |
1105 | long charged = 0; |
1106 | |
1107 | if (vma->vm_flags & VM_ACCOUNT) { |
1108 | if (security_vm_enough_memory_mm(mm, pages)) { |
1109 | ret = -ENOMEM; |
1110 | goto out; |
1111 | } |
1112 | charged = pages; |
1113 | } |
1114 | |
1115 | /* |
1116 | * Function vma_merge_extend() is called on the |
1117 | * extension we are adding to the already existing vma, |
1118 | * vma_merge_extend() will merge this extension with the |
1119 | * already existing vma (expand operation itself) and |
1120 | * possibly also with the next vma if it becomes |
1121 | * adjacent to the expanded vma and otherwise |
1122 | * compatible. |
1123 | */ |
1124 | vma = vma_merge_extend(vmi: &vmi, vma, delta); |
1125 | if (!vma) { |
1126 | vm_unacct_memory(pages: charged); |
1127 | ret = -ENOMEM; |
1128 | goto out; |
1129 | } |
1130 | |
1131 | vm_stat_account(mm, vma->vm_flags, npages: pages); |
1132 | if (vma->vm_flags & VM_LOCKED) { |
1133 | mm->locked_vm += pages; |
1134 | locked = true; |
1135 | new_addr = addr; |
1136 | } |
1137 | ret = addr; |
1138 | goto out; |
1139 | } |
1140 | } |
1141 | |
1142 | /* |
1143 | * We weren't able to just expand or shrink the area, |
1144 | * we need to create a new one and move it.. |
1145 | */ |
1146 | ret = -ENOMEM; |
1147 | if (flags & MREMAP_MAYMOVE) { |
1148 | unsigned long map_flags = 0; |
1149 | if (vma->vm_flags & VM_MAYSHARE) |
1150 | map_flags |= MAP_SHARED; |
1151 | |
1152 | new_addr = get_unmapped_area(vma->vm_file, 0, new_len, |
1153 | vma->vm_pgoff + |
1154 | ((addr - vma->vm_start) >> PAGE_SHIFT), |
1155 | map_flags); |
1156 | if (IS_ERR_VALUE(new_addr)) { |
1157 | ret = new_addr; |
1158 | goto out; |
1159 | } |
1160 | |
1161 | ret = move_vma(vma, old_addr: addr, old_len, new_len, new_addr, |
1162 | locked: &locked, flags, uf: &uf, uf_unmap: &uf_unmap); |
1163 | } |
1164 | out: |
1165 | if (offset_in_page(ret)) |
1166 | locked = false; |
1167 | mmap_write_unlock(current->mm); |
1168 | if (locked && new_len > old_len) |
1169 | mm_populate(addr: new_addr + old_len, len: new_len - old_len); |
1170 | out_unlocked: |
1171 | userfaultfd_unmap_complete(mm, uf: &uf_unmap_early); |
1172 | mremap_userfaultfd_complete(&uf, from: addr, to: ret, len: old_len); |
1173 | userfaultfd_unmap_complete(mm, uf: &uf_unmap); |
1174 | return ret; |
1175 | } |
1176 | |