1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds |
4 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> |
5 | * Copyright (C) 2002 Andi Kleen |
6 | * |
7 | * This handles calls from both 32bit and 64bit mode. |
8 | * |
9 | * Lock order: |
10 | * context.ldt_usr_sem |
11 | * mmap_lock |
12 | * context.lock |
13 | */ |
14 | |
15 | #include <linux/errno.h> |
16 | #include <linux/gfp.h> |
17 | #include <linux/sched.h> |
18 | #include <linux/string.h> |
19 | #include <linux/mm.h> |
20 | #include <linux/smp.h> |
21 | #include <linux/syscalls.h> |
22 | #include <linux/slab.h> |
23 | #include <linux/vmalloc.h> |
24 | #include <linux/uaccess.h> |
25 | |
26 | #include <asm/ldt.h> |
27 | #include <asm/tlb.h> |
28 | #include <asm/desc.h> |
29 | #include <asm/mmu_context.h> |
30 | #include <asm/pgtable_areas.h> |
31 | |
32 | #include <xen/xen.h> |
33 | |
34 | /* This is a multiple of PAGE_SIZE. */ |
35 | #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) |
36 | |
37 | static inline void *ldt_slot_va(int slot) |
38 | { |
39 | return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); |
40 | } |
41 | |
42 | void load_mm_ldt(struct mm_struct *mm) |
43 | { |
44 | struct ldt_struct *ldt; |
45 | |
46 | /* READ_ONCE synchronizes with smp_store_release */ |
47 | ldt = READ_ONCE(mm->context.ldt); |
48 | |
49 | /* |
50 | * Any change to mm->context.ldt is followed by an IPI to all |
51 | * CPUs with the mm active. The LDT will not be freed until |
52 | * after the IPI is handled by all such CPUs. This means that |
53 | * if the ldt_struct changes before we return, the values we see |
54 | * will be safe, and the new values will be loaded before we run |
55 | * any user code. |
56 | * |
57 | * NB: don't try to convert this to use RCU without extreme care. |
58 | * We would still need IRQs off, because we don't want to change |
59 | * the local LDT after an IPI loaded a newer value than the one |
60 | * that we can see. |
61 | */ |
62 | |
63 | if (unlikely(ldt)) { |
64 | if (static_cpu_has(X86_FEATURE_PTI)) { |
65 | if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { |
66 | /* |
67 | * Whoops -- either the new LDT isn't mapped |
68 | * (if slot == -1) or is mapped into a bogus |
69 | * slot (if slot > 1). |
70 | */ |
71 | clear_LDT(); |
72 | return; |
73 | } |
74 | |
75 | /* |
76 | * If page table isolation is enabled, ldt->entries |
77 | * will not be mapped in the userspace pagetables. |
78 | * Tell the CPU to access the LDT through the alias |
79 | * at ldt_slot_va(ldt->slot). |
80 | */ |
81 | set_ldt(addr: ldt_slot_va(slot: ldt->slot), entries: ldt->nr_entries); |
82 | } else { |
83 | set_ldt(addr: ldt->entries, entries: ldt->nr_entries); |
84 | } |
85 | } else { |
86 | clear_LDT(); |
87 | } |
88 | } |
89 | |
90 | void switch_ldt(struct mm_struct *prev, struct mm_struct *next) |
91 | { |
92 | /* |
93 | * Load the LDT if either the old or new mm had an LDT. |
94 | * |
95 | * An mm will never go from having an LDT to not having an LDT. Two |
96 | * mms never share an LDT, so we don't gain anything by checking to |
97 | * see whether the LDT changed. There's also no guarantee that |
98 | * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, |
99 | * then prev->context.ldt will also be non-NULL. |
100 | * |
101 | * If we really cared, we could optimize the case where prev == next |
102 | * and we're exiting lazy mode. Most of the time, if this happens, |
103 | * we don't actually need to reload LDTR, but modify_ldt() is mostly |
104 | * used by legacy code and emulators where we don't need this level of |
105 | * performance. |
106 | * |
107 | * This uses | instead of || because it generates better code. |
108 | */ |
109 | if (unlikely((unsigned long)prev->context.ldt | |
110 | (unsigned long)next->context.ldt)) |
111 | load_mm_ldt(mm: next); |
112 | |
113 | DEBUG_LOCKS_WARN_ON(preemptible()); |
114 | } |
115 | |
116 | static void refresh_ldt_segments(void) |
117 | { |
118 | #ifdef CONFIG_X86_64 |
119 | unsigned short sel; |
120 | |
121 | /* |
122 | * Make sure that the cached DS and ES descriptors match the updated |
123 | * LDT. |
124 | */ |
125 | savesegment(ds, sel); |
126 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) |
127 | loadsegment(ds, sel); |
128 | |
129 | savesegment(es, sel); |
130 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) |
131 | loadsegment(es, sel); |
132 | #endif |
133 | } |
134 | |
135 | /* context.lock is held by the task which issued the smp function call */ |
136 | static void flush_ldt(void *__mm) |
137 | { |
138 | struct mm_struct *mm = __mm; |
139 | |
140 | if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) |
141 | return; |
142 | |
143 | load_mm_ldt(mm); |
144 | |
145 | refresh_ldt_segments(); |
146 | } |
147 | |
148 | /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ |
149 | static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) |
150 | { |
151 | struct ldt_struct *new_ldt; |
152 | unsigned int alloc_size; |
153 | |
154 | if (num_entries > LDT_ENTRIES) |
155 | return NULL; |
156 | |
157 | new_ldt = kmalloc(size: sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT); |
158 | if (!new_ldt) |
159 | return NULL; |
160 | |
161 | BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); |
162 | alloc_size = num_entries * LDT_ENTRY_SIZE; |
163 | |
164 | /* |
165 | * Xen is very picky: it requires a page-aligned LDT that has no |
166 | * trailing nonzero bytes in any page that contains LDT descriptors. |
167 | * Keep it simple: zero the whole allocation and never allocate less |
168 | * than PAGE_SIZE. |
169 | */ |
170 | if (alloc_size > PAGE_SIZE) |
171 | new_ldt->entries = __vmalloc(size: alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
172 | else |
173 | new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); |
174 | |
175 | if (!new_ldt->entries) { |
176 | kfree(objp: new_ldt); |
177 | return NULL; |
178 | } |
179 | |
180 | /* The new LDT isn't aliased for PTI yet. */ |
181 | new_ldt->slot = -1; |
182 | |
183 | new_ldt->nr_entries = num_entries; |
184 | return new_ldt; |
185 | } |
186 | |
187 | #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION |
188 | |
189 | static void do_sanity_check(struct mm_struct *mm, |
190 | bool had_kernel_mapping, |
191 | bool had_user_mapping) |
192 | { |
193 | if (mm->context.ldt) { |
194 | /* |
195 | * We already had an LDT. The top-level entry should already |
196 | * have been allocated and synchronized with the usermode |
197 | * tables. |
198 | */ |
199 | WARN_ON(!had_kernel_mapping); |
200 | if (boot_cpu_has(X86_FEATURE_PTI)) |
201 | WARN_ON(!had_user_mapping); |
202 | } else { |
203 | /* |
204 | * This is the first time we're mapping an LDT for this process. |
205 | * Sync the pgd to the usermode tables. |
206 | */ |
207 | WARN_ON(had_kernel_mapping); |
208 | if (boot_cpu_has(X86_FEATURE_PTI)) |
209 | WARN_ON(had_user_mapping); |
210 | } |
211 | } |
212 | |
213 | #ifdef CONFIG_X86_PAE |
214 | |
215 | static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va) |
216 | { |
217 | p4d_t *p4d; |
218 | pud_t *pud; |
219 | |
220 | if (pgd->pgd == 0) |
221 | return NULL; |
222 | |
223 | p4d = p4d_offset(pgd, va); |
224 | if (p4d_none(*p4d)) |
225 | return NULL; |
226 | |
227 | pud = pud_offset(p4d, va); |
228 | if (pud_none(*pud)) |
229 | return NULL; |
230 | |
231 | return pmd_offset(pud, va); |
232 | } |
233 | |
234 | static void map_ldt_struct_to_user(struct mm_struct *mm) |
235 | { |
236 | pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); |
237 | pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); |
238 | pmd_t *k_pmd, *u_pmd; |
239 | |
240 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); |
241 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); |
242 | |
243 | if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
244 | set_pmd(u_pmd, *k_pmd); |
245 | } |
246 | |
247 | static void sanity_check_ldt_mapping(struct mm_struct *mm) |
248 | { |
249 | pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); |
250 | pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); |
251 | bool had_kernel, had_user; |
252 | pmd_t *k_pmd, *u_pmd; |
253 | |
254 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); |
255 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); |
256 | had_kernel = (k_pmd->pmd != 0); |
257 | had_user = (u_pmd->pmd != 0); |
258 | |
259 | do_sanity_check(mm, had_kernel, had_user); |
260 | } |
261 | |
262 | #else /* !CONFIG_X86_PAE */ |
263 | |
264 | static void map_ldt_struct_to_user(struct mm_struct *mm) |
265 | { |
266 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); |
267 | |
268 | if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
269 | set_pgd(kernel_to_user_pgdp(pgd), *pgd); |
270 | } |
271 | |
272 | static void sanity_check_ldt_mapping(struct mm_struct *mm) |
273 | { |
274 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); |
275 | bool had_kernel = (pgd->pgd != 0); |
276 | bool had_user = (kernel_to_user_pgdp(pgdp: pgd)->pgd != 0); |
277 | |
278 | do_sanity_check(mm, had_kernel_mapping: had_kernel, had_user_mapping: had_user); |
279 | } |
280 | |
281 | #endif /* CONFIG_X86_PAE */ |
282 | |
283 | /* |
284 | * If PTI is enabled, this maps the LDT into the kernelmode and |
285 | * usermode tables for the given mm. |
286 | */ |
287 | static int |
288 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) |
289 | { |
290 | unsigned long va; |
291 | bool is_vmalloc; |
292 | spinlock_t *ptl; |
293 | int i, nr_pages; |
294 | |
295 | if (!boot_cpu_has(X86_FEATURE_PTI)) |
296 | return 0; |
297 | |
298 | /* |
299 | * Any given ldt_struct should have map_ldt_struct() called at most |
300 | * once. |
301 | */ |
302 | WARN_ON(ldt->slot != -1); |
303 | |
304 | /* Check if the current mappings are sane */ |
305 | sanity_check_ldt_mapping(mm); |
306 | |
307 | is_vmalloc = is_vmalloc_addr(x: ldt->entries); |
308 | |
309 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); |
310 | |
311 | for (i = 0; i < nr_pages; i++) { |
312 | unsigned long offset = i << PAGE_SHIFT; |
313 | const void *src = (char *)ldt->entries + offset; |
314 | unsigned long pfn; |
315 | pgprot_t pte_prot; |
316 | pte_t pte, *ptep; |
317 | |
318 | va = (unsigned long)ldt_slot_va(slot) + offset; |
319 | pfn = is_vmalloc ? vmalloc_to_pfn(addr: src) : |
320 | page_to_pfn(virt_to_page(src)); |
321 | /* |
322 | * Treat the PTI LDT range as a *userspace* range. |
323 | * get_locked_pte() will allocate all needed pagetables |
324 | * and account for them in this mm. |
325 | */ |
326 | ptep = get_locked_pte(mm, addr: va, ptl: &ptl); |
327 | if (!ptep) |
328 | return -ENOMEM; |
329 | /* |
330 | * Map it RO so the easy to find address is not a primary |
331 | * target via some kernel interface which misses a |
332 | * permission check. |
333 | */ |
334 | pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); |
335 | /* Filter out unsuppored __PAGE_KERNEL* bits: */ |
336 | pgprot_val(pte_prot) &= __supported_pte_mask; |
337 | pte = pfn_pte(page_nr: pfn, pgprot: pte_prot); |
338 | set_pte_at(mm, va, ptep, pte); |
339 | pte_unmap_unlock(ptep, ptl); |
340 | } |
341 | |
342 | /* Propagate LDT mapping to the user page-table */ |
343 | map_ldt_struct_to_user(mm); |
344 | |
345 | ldt->slot = slot; |
346 | return 0; |
347 | } |
348 | |
349 | static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) |
350 | { |
351 | unsigned long va; |
352 | int i, nr_pages; |
353 | |
354 | if (!ldt) |
355 | return; |
356 | |
357 | /* LDT map/unmap is only required for PTI */ |
358 | if (!boot_cpu_has(X86_FEATURE_PTI)) |
359 | return; |
360 | |
361 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); |
362 | |
363 | for (i = 0; i < nr_pages; i++) { |
364 | unsigned long offset = i << PAGE_SHIFT; |
365 | spinlock_t *ptl; |
366 | pte_t *ptep; |
367 | |
368 | va = (unsigned long)ldt_slot_va(slot: ldt->slot) + offset; |
369 | ptep = get_locked_pte(mm, addr: va, ptl: &ptl); |
370 | if (!WARN_ON_ONCE(!ptep)) { |
371 | pte_clear(mm, addr: va, ptep); |
372 | pte_unmap_unlock(ptep, ptl); |
373 | } |
374 | } |
375 | |
376 | va = (unsigned long)ldt_slot_va(slot: ldt->slot); |
377 | flush_tlb_mm_range(mm, start: va, end: va + nr_pages * PAGE_SIZE, PAGE_SHIFT, freed_tables: false); |
378 | } |
379 | |
380 | #else /* !CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ |
381 | |
382 | static int |
383 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) |
384 | { |
385 | return 0; |
386 | } |
387 | |
388 | static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) |
389 | { |
390 | } |
391 | #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ |
392 | |
393 | static void free_ldt_pgtables(struct mm_struct *mm) |
394 | { |
395 | #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION |
396 | struct mmu_gather tlb; |
397 | unsigned long start = LDT_BASE_ADDR; |
398 | unsigned long end = LDT_END_ADDR; |
399 | |
400 | if (!boot_cpu_has(X86_FEATURE_PTI)) |
401 | return; |
402 | |
403 | /* |
404 | * Although free_pgd_range() is intended for freeing user |
405 | * page-tables, it also works out for kernel mappings on x86. |
406 | * We use tlb_gather_mmu_fullmm() to avoid confusing the |
407 | * range-tracking logic in __tlb_adjust_range(). |
408 | */ |
409 | tlb_gather_mmu_fullmm(tlb: &tlb, mm); |
410 | free_pgd_range(tlb: &tlb, addr: start, end, floor: start, ceiling: end); |
411 | tlb_finish_mmu(tlb: &tlb); |
412 | #endif |
413 | } |
414 | |
415 | /* After calling this, the LDT is immutable. */ |
416 | static void finalize_ldt_struct(struct ldt_struct *ldt) |
417 | { |
418 | paravirt_alloc_ldt(ldt: ldt->entries, entries: ldt->nr_entries); |
419 | } |
420 | |
421 | static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) |
422 | { |
423 | mutex_lock(&mm->context.lock); |
424 | |
425 | /* Synchronizes with READ_ONCE in load_mm_ldt. */ |
426 | smp_store_release(&mm->context.ldt, ldt); |
427 | |
428 | /* Activate the LDT for all CPUs using currents mm. */ |
429 | on_each_cpu_mask(mask: mm_cpumask(mm), func: flush_ldt, info: mm, wait: true); |
430 | |
431 | mutex_unlock(lock: &mm->context.lock); |
432 | } |
433 | |
434 | static void free_ldt_struct(struct ldt_struct *ldt) |
435 | { |
436 | if (likely(!ldt)) |
437 | return; |
438 | |
439 | paravirt_free_ldt(ldt: ldt->entries, entries: ldt->nr_entries); |
440 | if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE) |
441 | vfree_atomic(addr: ldt->entries); |
442 | else |
443 | free_page((unsigned long)ldt->entries); |
444 | kfree(objp: ldt); |
445 | } |
446 | |
447 | /* |
448 | * Called on fork from arch_dup_mmap(). Just copy the current LDT state, |
449 | * the new task is not running, so nothing can be installed. |
450 | */ |
451 | int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) |
452 | { |
453 | struct ldt_struct *new_ldt; |
454 | int retval = 0; |
455 | |
456 | if (!old_mm) |
457 | return 0; |
458 | |
459 | mutex_lock(&old_mm->context.lock); |
460 | if (!old_mm->context.ldt) |
461 | goto out_unlock; |
462 | |
463 | new_ldt = alloc_ldt_struct(num_entries: old_mm->context.ldt->nr_entries); |
464 | if (!new_ldt) { |
465 | retval = -ENOMEM; |
466 | goto out_unlock; |
467 | } |
468 | |
469 | memcpy(new_ldt->entries, old_mm->context.ldt->entries, |
470 | new_ldt->nr_entries * LDT_ENTRY_SIZE); |
471 | finalize_ldt_struct(ldt: new_ldt); |
472 | |
473 | retval = map_ldt_struct(mm, ldt: new_ldt, slot: 0); |
474 | if (retval) { |
475 | free_ldt_pgtables(mm); |
476 | free_ldt_struct(ldt: new_ldt); |
477 | goto out_unlock; |
478 | } |
479 | mm->context.ldt = new_ldt; |
480 | |
481 | out_unlock: |
482 | mutex_unlock(lock: &old_mm->context.lock); |
483 | return retval; |
484 | } |
485 | |
486 | /* |
487 | * No need to lock the MM as we are the last user |
488 | * |
489 | * 64bit: Don't touch the LDT register - we're already in the next thread. |
490 | */ |
491 | void destroy_context_ldt(struct mm_struct *mm) |
492 | { |
493 | free_ldt_struct(ldt: mm->context.ldt); |
494 | mm->context.ldt = NULL; |
495 | } |
496 | |
497 | void ldt_arch_exit_mmap(struct mm_struct *mm) |
498 | { |
499 | free_ldt_pgtables(mm); |
500 | } |
501 | |
502 | static int read_ldt(void __user *ptr, unsigned long bytecount) |
503 | { |
504 | struct mm_struct *mm = current->mm; |
505 | unsigned long entries_size; |
506 | int retval; |
507 | |
508 | down_read(sem: &mm->context.ldt_usr_sem); |
509 | |
510 | if (!mm->context.ldt) { |
511 | retval = 0; |
512 | goto out_unlock; |
513 | } |
514 | |
515 | if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) |
516 | bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; |
517 | |
518 | entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE; |
519 | if (entries_size > bytecount) |
520 | entries_size = bytecount; |
521 | |
522 | if (copy_to_user(to: ptr, from: mm->context.ldt->entries, n: entries_size)) { |
523 | retval = -EFAULT; |
524 | goto out_unlock; |
525 | } |
526 | |
527 | if (entries_size != bytecount) { |
528 | /* Zero-fill the rest and pretend we read bytecount bytes. */ |
529 | if (clear_user(to: ptr + entries_size, n: bytecount - entries_size)) { |
530 | retval = -EFAULT; |
531 | goto out_unlock; |
532 | } |
533 | } |
534 | retval = bytecount; |
535 | |
536 | out_unlock: |
537 | up_read(sem: &mm->context.ldt_usr_sem); |
538 | return retval; |
539 | } |
540 | |
541 | static int read_default_ldt(void __user *ptr, unsigned long bytecount) |
542 | { |
543 | /* CHECKME: Can we use _one_ random number ? */ |
544 | #ifdef CONFIG_X86_32 |
545 | unsigned long size = 5 * sizeof(struct desc_struct); |
546 | #else |
547 | unsigned long size = 128; |
548 | #endif |
549 | if (bytecount > size) |
550 | bytecount = size; |
551 | if (clear_user(to: ptr, n: bytecount)) |
552 | return -EFAULT; |
553 | return bytecount; |
554 | } |
555 | |
556 | static bool allow_16bit_segments(void) |
557 | { |
558 | if (!IS_ENABLED(CONFIG_X86_16BIT)) |
559 | return false; |
560 | |
561 | #ifdef CONFIG_XEN_PV |
562 | /* |
563 | * Xen PV does not implement ESPFIX64, which means that 16-bit |
564 | * segments will not work correctly. Until either Xen PV implements |
565 | * ESPFIX64 and can signal this fact to the guest or unless someone |
566 | * provides compelling evidence that allowing broken 16-bit segments |
567 | * is worthwhile, disallow 16-bit segments under Xen PV. |
568 | */ |
569 | if (xen_pv_domain()) { |
570 | pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n" ); |
571 | return false; |
572 | } |
573 | #endif |
574 | |
575 | return true; |
576 | } |
577 | |
578 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
579 | { |
580 | struct mm_struct *mm = current->mm; |
581 | struct ldt_struct *new_ldt, *old_ldt; |
582 | unsigned int old_nr_entries, new_nr_entries; |
583 | struct user_desc ldt_info; |
584 | struct desc_struct ldt; |
585 | int error; |
586 | |
587 | error = -EINVAL; |
588 | if (bytecount != sizeof(ldt_info)) |
589 | goto out; |
590 | error = -EFAULT; |
591 | if (copy_from_user(to: &ldt_info, from: ptr, n: sizeof(ldt_info))) |
592 | goto out; |
593 | |
594 | error = -EINVAL; |
595 | if (ldt_info.entry_number >= LDT_ENTRIES) |
596 | goto out; |
597 | if (ldt_info.contents == 3) { |
598 | if (oldmode) |
599 | goto out; |
600 | if (ldt_info.seg_not_present == 0) |
601 | goto out; |
602 | } |
603 | |
604 | if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || |
605 | LDT_empty(&ldt_info)) { |
606 | /* The user wants to clear the entry. */ |
607 | memset(&ldt, 0, sizeof(ldt)); |
608 | } else { |
609 | if (!ldt_info.seg_32bit && !allow_16bit_segments()) { |
610 | error = -EINVAL; |
611 | goto out; |
612 | } |
613 | |
614 | fill_ldt(desc: &ldt, info: &ldt_info); |
615 | if (oldmode) |
616 | ldt.avl = 0; |
617 | } |
618 | |
619 | if (down_write_killable(sem: &mm->context.ldt_usr_sem)) |
620 | return -EINTR; |
621 | |
622 | old_ldt = mm->context.ldt; |
623 | old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; |
624 | new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries); |
625 | |
626 | error = -ENOMEM; |
627 | new_ldt = alloc_ldt_struct(num_entries: new_nr_entries); |
628 | if (!new_ldt) |
629 | goto out_unlock; |
630 | |
631 | if (old_ldt) |
632 | memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE); |
633 | |
634 | new_ldt->entries[ldt_info.entry_number] = ldt; |
635 | finalize_ldt_struct(ldt: new_ldt); |
636 | |
637 | /* |
638 | * If we are using PTI, map the new LDT into the userspace pagetables. |
639 | * If there is already an LDT, use the other slot so that other CPUs |
640 | * will continue to use the old LDT until install_ldt() switches |
641 | * them over to the new LDT. |
642 | */ |
643 | error = map_ldt_struct(mm, ldt: new_ldt, slot: old_ldt ? !old_ldt->slot : 0); |
644 | if (error) { |
645 | /* |
646 | * This only can fail for the first LDT setup. If an LDT is |
647 | * already installed then the PTE page is already |
648 | * populated. Mop up a half populated page table. |
649 | */ |
650 | if (!WARN_ON_ONCE(old_ldt)) |
651 | free_ldt_pgtables(mm); |
652 | free_ldt_struct(ldt: new_ldt); |
653 | goto out_unlock; |
654 | } |
655 | |
656 | install_ldt(mm, ldt: new_ldt); |
657 | unmap_ldt_struct(mm, ldt: old_ldt); |
658 | free_ldt_struct(ldt: old_ldt); |
659 | error = 0; |
660 | |
661 | out_unlock: |
662 | up_write(sem: &mm->context.ldt_usr_sem); |
663 | out: |
664 | return error; |
665 | } |
666 | |
667 | SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , |
668 | unsigned long , bytecount) |
669 | { |
670 | int ret = -ENOSYS; |
671 | |
672 | switch (func) { |
673 | case 0: |
674 | ret = read_ldt(ptr, bytecount); |
675 | break; |
676 | case 1: |
677 | ret = write_ldt(ptr, bytecount, oldmode: 1); |
678 | break; |
679 | case 2: |
680 | ret = read_default_ldt(ptr, bytecount); |
681 | break; |
682 | case 0x11: |
683 | ret = write_ldt(ptr, bytecount, oldmode: 0); |
684 | break; |
685 | } |
686 | /* |
687 | * The SYSCALL_DEFINE() macros give us an 'unsigned long' |
688 | * return type, but the ABI for sys_modify_ldt() expects |
689 | * 'int'. This cast gives us an int-sized value in %rax |
690 | * for the return code. The 'unsigned' is necessary so |
691 | * the compiler does not try to sign-extend the negative |
692 | * return codes into the high half of the register when |
693 | * taking the value from int->long. |
694 | */ |
695 | return (unsigned int)ret; |
696 | } |
697 | |