Thank all friends for your advices. I find my patch introduces a deadlock bug: exec_mmap task_lock(tsk); activate_mm(active_mm, mm); get_new_mmu_context clear_other_mmu_contexts read_lock(&tasklist_lock); t = find_lock_task_mm(p, &irqflags); task_lock(t, irqflags); /* Possible deadlock! */ I think out another solution: Link all mm's in a linked list, which is protected by mmlink_lock. static inline void clear_other_mmu_contexts(struct mm_struct *mm, unsigned long cpu) { unsigned long flags; struct mm_struct *p; spin_lock_irqsave(&mmlink_lock, flags); list_for_each_entry(p, &mmlist, mmlink) { if ((p != mm) && cpu_context(cpu, p)) cpu_context(cpu, p) = 0; } spin_unlock_irqrestore(&mmlink_lock, flags); } I commit new patch. Signed-off-by: Yu Huabing <yhb@xxxxxxxxxxxxx> --- arch/mips/include/asm/mmu_context.h | 41 +++++++++++++++++++++++++++++++++++-- arch/x86/kernel/tboot.c | 1 + drivers/firmware/efi/arm-runtime.c | 1 + include/linux/mm_types.h | 4 ++++ kernel/fork.c | 12 +++++++++++ mm/init-mm.c | 1 + mm/memory.c | 4 ++++ 7 files changed, 62 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index 45914b5..5767a08 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -97,6 +97,41 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #define ASID_VERSION_MASK ((unsigned long)~(ASID_MASK|(ASID_MASK-1))) #define ASID_FIRST_VERSION ((unsigned long)(~ASID_VERSION_MASK) + 1) +/* + * Yu Huabing + * Suppose that asid_cache(cpu) wraps to 0 every n days. + * case 1: + * (1)Process 1 got ASID 0x101. + * (2)Process 1 slept for n days. + * (3)asid_cache(cpu) wrapped to 0x101, and process 2 got ASID 0x101. + * (4)Process 1 is woken,and ASID of process 1 is same as ASID of process 2. + * + * case 2: + * (1)Process 1 got ASID 0x101 on CPU 1. + * (2)Process 1 migrated to CPU 2. + * (3)Process 1 migrated to CPU 1 after n days. + * (4)asid_cache on CPU 1 wrapped to 0x101, and process 2 got ASID 0x101. + * (5)Process 1 is scheduled,and ASID of process 1 is same as ASID of process 2. + * + * So we need to clear MMU contexts of all other processes when asid_cache(cpu) + * wraps to 0. + * + * This function might be called from hardirq context or process context. + */ +static inline void clear_other_mmu_contexts(struct mm_struct *mm, + unsigned long cpu) +{ + unsigned long flags; + struct mm_struct *p; + + spin_lock_irqsave(&mmlink_lock, flags); + list_for_each_entry(p, &mmlist, mmlink) { + if ((p != mm) && cpu_context(cpu, p)) + cpu_context(cpu, p) = 0; + } + spin_unlock_irqrestore(&mmlink_lock, flags); +} + /* Normal, classic MIPS get_new_mmu_context */ static inline void get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) @@ -112,8 +147,10 @@ get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) #else local_flush_tlb_all(); /* start new asid cycle */ #endif - if (!asid) /* fix version if needed */ - asid = ASID_FIRST_VERSION; + if (!asid) { + asid = ASID_FIRST_VERSION; /* fix version if needed */ + clear_other_mmu_contexts(mm, cpu); + } } cpu_context(cpu, mm) = asid_cache(cpu) = asid; diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e72a07f..4e2eb68 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -112,6 +112,7 @@ static struct mm_struct tboot_mm = { .mm_count = ATOMIC_INIT(1), .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), + .mmlink = LIST_HEAD_INIT(tboot_mm.mmlink), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), }; diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 6ae21e4..7b1b250 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -36,6 +36,7 @@ static struct mm_struct efi_mm = { .mm_count = ATOMIC_INIT(1), .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlink = LIST_HEAD_INIT(efi_mm.mmlink), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c2d75b4..bde0c79 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -412,6 +412,7 @@ struct mm_struct { spinlock_t page_table_lock; /* Protects page tables and some counters */ struct rw_semaphore mmap_sem; + struct list_head mmlink; /* List of all mm's, and head is mmlist. */ struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung * together off init_mm.mmlist, and are protected * by mmlist_lock @@ -511,6 +512,9 @@ struct mm_struct { #endif }; +extern struct list_head mmlist; +extern spinlock_t mmlink_lock; + static inline void mm_init_cpumask(struct mm_struct *mm) { #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/kernel/fork.c b/kernel/fork.c index d277e83..813f5df 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -603,6 +603,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); + INIT_LIST_HEAD(&mm->mmlink); INIT_LIST_HEAD(&mm->mmlist); mm->core_state = NULL; atomic_long_set(&mm->nr_ptes, 0); @@ -707,6 +708,12 @@ void mmput(struct mm_struct *mm) might_sleep(); if (atomic_dec_and_test(&mm->mm_users)) { + unsigned long flags; + + spin_lock_irqsave(&mmlink_lock, flags); + list_del(&mm->mmlink); + spin_unlock_irqrestore(&mmlink_lock, flags); + uprobe_clear_state(mm); exit_aio(mm); ksm_exit(mm); @@ -924,6 +931,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; + unsigned long flags; mm = allocate_mm(); if (!mm) @@ -944,6 +952,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) if (mm->binfmt && !try_module_get(mm->binfmt->module)) goto free_pt; + spin_lock_irqsave(&mmlink_lock, flags); + list_add_tail(&mm->mmlink, &mmlist); + spin_unlock_irqrestore(&mmlink_lock, flags); + return mm; free_pt: diff --git a/mm/init-mm.c b/mm/init-mm.c index a56a851..30d554e 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -20,6 +20,7 @@ struct mm_struct init_mm = { .mm_count = ATOMIC_INIT(1), .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), + .mmlink = LIST_HEAD_INIT(init_mm.mmlink), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/memory.c b/mm/memory.c index 07493e3..eb9693e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -78,6 +78,10 @@ #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif +/* Link all mm's in a linked list, which is protected by mmlink_lock. */ +LIST_HEAD(mmlist); +DEFINE_SPINLOCK(mmlink_lock); + #ifndef CONFIG_NEED_MULTIPLE_NODES /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; --