On Wed, Jul 22, 2015 at 12:23:46PM -0700, Andy Lutomirski wrote: > modify_ldt has questionable locking and does not synchronize > threads. Improve it: redesign the locking and synchronize all > threads' LDTs using an IPI on all modifications. > > This will dramatically slow down modify_ldt in multithreaded > programs, but there shouldn't be any multithreaded programs that > care about modify_ldt's performance in the first place. > > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Just minor stylistic nitpicks below. Otherwise looks ok to me. ... > diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c > index c37886d759cc..3ae308029dee 100644 > --- a/arch/x86/kernel/ldt.c > +++ b/arch/x86/kernel/ldt.c > @@ -12,6 +12,7 @@ > #include <linux/string.h> > #include <linux/mm.h> > #include <linux/smp.h> > +#include <linux/slab.h> > #include <linux/vmalloc.h> > #include <linux/uaccess.h> > > @@ -20,82 +21,83 @@ > #include <asm/mmu_context.h> > #include <asm/syscalls.h> > > -#ifdef CONFIG_SMP > static void flush_ldt(void *current_mm) > { > - if (current->active_mm == current_mm) > - load_LDT(¤t->active_mm->context); > + if (current->active_mm == current_mm) { Save indentation level: if (current->active_mm != current_mm) return; > + /* context.lock is held for us, so we don't need any locking. */ Stick that comment above the function name. > + mm_context_t *pc = ¤t->active_mm->context; > + set_ldt(pc->ldt->entries, pc->ldt->size); > + } > } > -#endif > > -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) > +/* The caller must call finalize_ldt_struct on the result. */ > +static struct ldt_struct *alloc_ldt_struct(int size) > { > - void *oldldt, *newldt; > - int oldsize; > + struct ldt_struct *new_ldt; > + int alloc_size; > > - if (mincount <= pc->size) > - return 0; > - oldsize = pc->size; > - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & > - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); > - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) > - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); > + if (size > LDT_ENTRIES) > + return NULL; > + > + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); > + if (!new_ldt) > + return NULL; > + > + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); > + alloc_size = size * LDT_ENTRY_SIZE; > + > + if (alloc_size > PAGE_SIZE) > + new_ldt->entries = vmalloc(alloc_size); > else > - newldt = (void *)__get_free_page(GFP_KERNEL); > + new_ldt->entries = (void *)__get_free_page(GFP_KERNEL); newline here. > + if (!new_ldt->entries) { > + kfree(new_ldt); > + return NULL; > + } > > - if (!newldt) > - return -ENOMEM; > + new_ldt->size = size; > + return new_ldt; > +} > + > +/* After calling this, the LDT is immutable. */ > +static void finalize_ldt_struct(struct ldt_struct *ldt) > +{ > + paravirt_alloc_ldt(ldt->entries, ldt->size); > +} > > - if (oldsize) > - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); > - oldldt = pc->ldt; > - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, > - (mincount - oldsize) * LDT_ENTRY_SIZE); > +static void install_ldt(struct mm_struct *current_mm, > + struct ldt_struct *ldt) > +{ > + /* context.lock is held */ > + preempt_disable(); > > - paravirt_alloc_ldt(newldt, mincount); > + /* Synchronizes with lockless_dereference in load_mm_ldt. */ Good. > + smp_store_release(¤t_mm->context.ldt, ldt); > > -#ifdef CONFIG_X86_64 > - /* CHECKME: Do we really need this ? */ > - wmb(); > -#endif > - pc->ldt = newldt; > - wmb(); > - pc->size = mincount; > - wmb(); > + /* Activate for this CPU. */ > + flush_ldt(current->mm); > > - if (reload) { > #ifdef CONFIG_SMP > - preempt_disable(); > - load_LDT(pc); > - if (!cpumask_equal(mm_cpumask(current->mm), > - cpumask_of(smp_processor_id()))) > - smp_call_function(flush_ldt, current->mm, 1); > - preempt_enable(); > -#else > - load_LDT(pc); > + /* Synchronize with other CPUs. */ > + if (!cpumask_equal(mm_cpumask(current_mm), > + cpumask_of(smp_processor_id()))) Let it stick out: if (!cpumask_equal(mm_cpumask(current_mm), cpumask_of(smp_processor_id()))) smp_call_function(flush_ldt, current_mm, 1); > #endif > - } > - if (oldsize) { > - paravirt_free_ldt(oldldt, oldsize); > - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) > - vfree(oldldt); > - else > - put_page(virt_to_page(oldldt)); > - } > - return 0; > + preempt_enable(); > } > > -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) > +static void free_ldt_struct(struct ldt_struct *ldt) > { > - int err = alloc_ldt(new, old->size, 0); > - int i; > - > - if (err < 0) > - return err; > - > - for (i = 0; i < old->size; i++) > - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); > - return 0; > + if (unlikely(ldt)) { Save an indentation level: int alloc_size; if (!ldt) return; alloc_size = sizeof(struct ldt_struct) + ldt->size * LDT_ENTRY_SIZE; ... > + paravirt_free_ldt(ldt->entries, ldt->size); > + if (alloc_size > PAGE_SIZE) > + vfree(ldt->entries); > + else > + put_page(virt_to_page(ldt->entries)); > + kfree(ldt); > + } > } > > /* > @@ -108,13 +110,32 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) > int retval = 0; > > mutex_init(&mm->context.lock); > - mm->context.size = 0; > old_mm = current->mm; > - if (old_mm && old_mm->context.size > 0) { > - mutex_lock(&old_mm->context.lock); > - retval = copy_ldt(&mm->context, &old_mm->context); > - mutex_unlock(&old_mm->context.lock); > + if (!old_mm) { > + mm->context.ldt = NULL; > + return 0; > + } > + > + mutex_lock(&old_mm->context.lock); > + if (old_mm->context.ldt) { Same here: if (!old_mm->context.ldt) { mm->context.ldt = NULL; goto out_unlock; } new_ldt = ... > + struct ldt_struct *new_ldt = > + alloc_ldt_struct(old_mm->context.ldt->size); > + if (!new_ldt) { > + retval = -ENOMEM; > + goto out_unlock; > + } > + > + memcpy(new_ldt->entries, old_mm->context.ldt->entries, > + new_ldt->size * LDT_ENTRY_SIZE); > + finalize_ldt_struct(new_ldt); > + > + mm->context.ldt = new_ldt; > + } else { > + mm->context.ldt = NULL; > } > + > +out_unlock: > + mutex_unlock(&old_mm->context.lock); > return retval; > } > > @@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) > */ > void destroy_context(struct mm_struct *mm) > { > - if (mm->context.size) { > -#ifdef CONFIG_X86_32 > - /* CHECKME: Can this ever happen ? */ > - if (mm == current->active_mm) > - clear_LDT(); > -#endif > - paravirt_free_ldt(mm->context.ldt, mm->context.size); > - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) > - vfree(mm->context.ldt); > - else > - put_page(virt_to_page(mm->context.ldt)); > - mm->context.size = 0; > - } > + free_ldt_struct(mm->context.ldt); > + mm->context.ldt = NULL; > } > > static int read_ldt(void __user *ptr, unsigned long bytecount) > { > - int err; > + int retval; > unsigned long size; > struct mm_struct *mm = current->mm; > > - if (!mm->context.size) > - return 0; > + mutex_lock(&mm->context.lock); > + > + if (!mm->context.ldt) { > + retval = 0; > + goto out_unlock; > + } > + > if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) > bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; > > - mutex_lock(&mm->context.lock); > - size = mm->context.size * LDT_ENTRY_SIZE; > + size = mm->context.ldt->size * LDT_ENTRY_SIZE; > if (size > bytecount) > size = bytecount; > > - err = 0; > - if (copy_to_user(ptr, mm->context.ldt, size)) > - err = -EFAULT; > - mutex_unlock(&mm->context.lock); > - if (err < 0) > - goto error_return; > + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { > + retval = -EFAULT; > + goto out_unlock; > + } > + > if (size != bytecount) { > - /* zero-fill the rest */ > + /* Zero-fill the rest and pretend we read bytecount bytes. */ > if (clear_user(ptr + size, bytecount - size) != 0) { Make that: if (clear_user(ptr + size, bytecount - size)) > - err = -EFAULT; > - goto error_return; > + retval = -EFAULT; > + goto out_unlock; > } > } > - return bytecount; > -error_return: > - return err; > + retval = bytecount; > + > +out_unlock: > + mutex_unlock(&mm->context.lock); > + return retval; > } > > static int read_default_ldt(void __user *ptr, unsigned long bytecount) > @@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) > struct desc_struct ldt; > int error; > struct user_desc ldt_info; > + int oldsize, newsize; > + struct ldt_struct *new_ldt, *old_ldt; > > error = -EINVAL; > if (bytecount != sizeof(ldt_info)) > @@ -213,34 +230,43 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) > goto out; > } > > - mutex_lock(&mm->context.lock); > - if (ldt_info.entry_number >= mm->context.size) { > - error = alloc_ldt(¤t->mm->context, > - ldt_info.entry_number + 1, 1); > - if (error < 0) > - goto out_unlock; > - } > - > - /* Allow LDTs to be cleared by the user. */ > - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { > - if (oldmode || LDT_empty(&ldt_info)) { > - memset(&ldt, 0, sizeof(ldt)); > - goto install; > + if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) || Shorten: if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || > + LDT_empty(&ldt_info)) { > + /* The user wants to clear the entry. */ > + memset(&ldt, 0, sizeof(ldt)); > + } else { > + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { > + error = -EINVAL; > + goto out; > } > + > + fill_ldt(&ldt, &ldt_info); > + if (oldmode) > + ldt.avl = 0; > } > > - if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { > - error = -EINVAL; > + mutex_lock(&mm->context.lock); > + > + old_ldt = mm->context.ldt; > + oldsize = old_ldt ? old_ldt->size : 0; > + newsize = max((int)(ldt_info.entry_number + 1), oldsize); > + > + error = -ENOMEM; > + new_ldt = alloc_ldt_struct(newsize); > + if (!new_ldt) > goto out_unlock; > - } > > - fill_ldt(&ldt, &ldt_info); > - if (oldmode) > - ldt.avl = 0; > + if (old_ldt) { > + memcpy(new_ldt->entries, old_ldt->entries, > + oldsize * LDT_ENTRY_SIZE); > + } Single if-statement doesn't need {} and you don't absolutely need to keep 80cols. Just let it stick out. -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html