This is a note to let you know that I've just added the patch titled x86/ldt: Rework locking to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: x86-ldt-rework-locking.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Date: Thu, 14 Dec 2017 12:27:30 +0100 Subject: x86/ldt: Rework locking From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> commit c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 upstream. The LDT is duplicated on fork() and on exec(), which is wrong as exec() should start from a clean state, i.e. without LDT. To fix this the LDT duplication code will be moved into arch_dup_mmap() which is only called for fork(). This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the parent process, but the LDT duplication code needs to acquire mm->context.lock to access the LDT data safely, which is the reverse lock order of write_ldt() where mmap_sem nests into context.lock. Solve this by introducing a new rw semaphore which serializes the read/write_ldt() syscall operations and use context.lock to protect the actual installment of the LDT descriptor. So context.lock stabilizes mm->context.ldt and can nest inside of the new semaphore or mmap_sem. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Andy Lutomirsky <luto@xxxxxxxxxx> Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Borislav Petkov <bpetkov@xxxxxxx> Cc: Brian Gerst <brgerst@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: David Laight <David.Laight@xxxxxxxxxx> Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx> Cc: Eduardo Valentin <eduval@xxxxxxxxxx> Cc: Greg KH <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: H. Peter Anvin <hpa@xxxxxxxxx> Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Cc: Juergen Gross <jgross@xxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Will Deacon <will.deacon@xxxxxxx> Cc: aliguori@xxxxxxxxxx Cc: dan.j.williams@xxxxxxxxx Cc: hughd@xxxxxxxxxx Cc: keescook@xxxxxxxxxx Cc: kirill.shutemov@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/mmu.h | 4 +++- arch/x86/include/asm/mmu_context.h | 2 ++ arch/x86/kernel/ldt.c | 33 +++++++++++++++++++++------------ 3 files changed, 26 insertions(+), 13 deletions(-) --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,6 +3,7 @@ #define _ASM_X86_MMU_H #include <linux/spinlock.h> +#include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/atomic.h> @@ -27,7 +28,8 @@ typedef struct { atomic64_t tlb_gen; #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; #endif #ifdef CONFIG_X86_64 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -132,6 +132,8 @@ void enter_lazy_tlb(struct mm_struct *mm static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mutex_init(&mm->context.lock); + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -5,6 +5,11 @@ * Copyright (C) 2002 Andi Kleen * * This handles calls from both 32bit and 64bit mode. + * + * Lock order: + * contex.ldt_usr_sem + * mmap_sem + * context.lock */ #include <linux/errno.h> @@ -42,7 +47,7 @@ static void refresh_ldt_segments(void) #endif } -/* context.lock is held for us, so we don't need any locking. */ +/* context.lock is held by the task which issued the smp function call */ static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; @@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct l paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } -/* context.lock is held */ -static void install_ldt(struct mm_struct *current_mm, - struct ldt_struct *ldt) +static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) { + mutex_lock(&mm->context.lock); + /* Synchronizes with READ_ONCE in load_mm_ldt. */ - smp_store_release(¤t_mm->context.ldt, ldt); + smp_store_release(&mm->context.ldt, ldt); - /* Activate the LDT for all CPUs using current_mm. */ - on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); + /* Activate the LDT for all CPUs using currents mm. */ + on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); + + mutex_unlock(&mm->context.lock); } static void free_ldt_struct(struct ldt_struct *ldt) @@ -133,7 +140,8 @@ int init_new_context_ldt(struct task_str struct mm_struct *old_mm; int retval = 0; - mutex_init(&mm->context.lock); + init_rwsem(&mm->context.ldt_usr_sem); + old_mm = current->mm; if (!old_mm) { mm->context.ldt = NULL; @@ -180,7 +188,7 @@ static int read_ldt(void __user *ptr, un unsigned long entries_size; int retval; - mutex_lock(&mm->context.lock); + down_read(&mm->context.ldt_usr_sem); if (!mm->context.ldt) { retval = 0; @@ -209,7 +217,7 @@ static int read_ldt(void __user *ptr, un retval = bytecount; out_unlock: - mutex_unlock(&mm->context.lock); + up_read(&mm->context.ldt_usr_sem); return retval; } @@ -269,7 +277,8 @@ static int write_ldt(void __user *ptr, u ldt.avl = 0; } - mutex_lock(&mm->context.lock); + if (down_write_killable(&mm->context.ldt_usr_sem)) + return -EINTR; old_ldt = mm->context.ldt; old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; @@ -291,7 +300,7 @@ static int write_ldt(void __user *ptr, u error = 0; out_unlock: - mutex_unlock(&mm->context.lock); + up_write(&mm->context.ldt_usr_sem); out: return error; } Patches currently in stable-queue which might be from peterz@xxxxxxxxxxxxx are queue-4.14/x86-entry-rename-sysenter_stack-to-cpu_entry_area_entry_stack.patch queue-4.14/x86-mm-put-mmu-to-hardware-asid-translation-in-one-place.patch queue-4.14/x86-vsyscall-64-explicitly-set-_page_user-in-the-pagetable-hierarchy.patch queue-4.14/x86-uv-use-the-right-tlb-flush-api.patch queue-4.14/x86-decoder-fix-and-update-the-opcodes-map.patch queue-4.14/x86-mm-dump_pagetables-check-page_present-for-real.patch queue-4.14/x86-ldt-prevent-ldt-inheritance-on-exec.patch queue-4.14/x86-microcode-dont-abuse-the-tlb-flush-interface.patch queue-4.14/x86-doc-remove-obvious-weirdnesses-from-the-x86-mm-layout-documentation.patch queue-4.14/init-invoke-init_espfix_bsp-from-mm_init.patch queue-4.14/x86-cpu_entry_area-move-it-to-a-separate-unit.patch queue-4.14/x86-vsyscall-64-warn-and-fail-vsyscall-emulation-in-native-mode.patch queue-4.14/x86-mm-create-asm-invpcid.h.patch queue-4.14/x86-mm-remove-superfluous-barriers.patch queue-4.14/x86-ldt-rework-locking.patch queue-4.14/arch-mm-allow-arch_dup_mmap-to-fail.patch queue-4.14/x86-cpu_entry_area-move-it-out-of-the-fixmap.patch queue-4.14/tools-headers-sync-objtool-uapi-header.patch queue-4.14/x86-mm-remove-hard-coded-asid-limit-checks.patch queue-4.14/x86-kconfig-limit-nr_cpus-on-32-bit-to-a-sane-amount.patch queue-4.14/objtool-fix-64-bit-build-on-32-bit-host.patch queue-4.14/x86-mm-add-comments-to-clarify-which-tlb-flush-functions-are-supposed-to-flush-what.patch queue-4.14/x86-mm-move-the-cr3-construction-functions-to-tlbflush.h.patch queue-4.14/x86-mm-dump_pagetables-make-the-address-hints-correct-and-readable.patch queue-4.14/x86-insn-eval-add-utility-functions-to-get-segment-selector.patch queue-4.14/objtool-move-synced-files-to-their-original-relative-locations.patch queue-4.14/x86-mm-use-__flush_tlb_one-for-kernel-memory.patch queue-4.14/objtool-move-kernel-headers-code-sync-check-to-a-script.patch queue-4.14/x86-mm-64-improve-the-memory-map-documentation.patch queue-4.14/objtool-fix-cross-build.patch