From: Jason Gunthorpe <jgg@xxxxxxxxxxxx> As coded this function can false-fail in various racy situations. Make it reliable by running only under the write side of the mmap_sem and avoiding the false-failing compare/exchange pattern. Also make the locking very easy to understand by only ever reading or writing mm->hmm while holding the write side of the mmap_sem. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> --- v2: - Fix error unwind of mmgrab (Jerome) - Use hmm local instead of 2nd container_of (Jerome) --- mm/hmm.c | 80 ++++++++++++++++++++------------------------------------ 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index cc7c26fda3300e..dc30edad9a8a02 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -40,16 +40,6 @@ #if IS_ENABLED(CONFIG_HMM_MIRROR) static const struct mmu_notifier_ops hmm_mmu_notifier_ops; -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) -{ - struct hmm *hmm = READ_ONCE(mm->hmm); - - if (hmm && kref_get_unless_zero(&hmm->kref)) - return hmm; - - return NULL; -} - /** * hmm_get_or_create - register HMM against an mm (HMM internal) * @@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) */ static struct hmm *hmm_get_or_create(struct mm_struct *mm) { - struct hmm *hmm = mm_get_hmm(mm); - bool cleanup = false; + struct hmm *hmm; - if (hmm) - return hmm; + lockdep_assert_held_exclusive(&mm->mmap_sem); + + if (mm->hmm) { + if (kref_get_unless_zero(&mm->hmm->kref)) + return mm->hmm; + /* + * The hmm is being freed by some other CPU and is pending a + * RCU grace period, but this CPU can NULL now it since we + * have the mmap_sem. + */ + mm->hmm = NULL; + } hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); if (!hmm) @@ -83,57 +82,36 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) hmm->notifiers = 0; hmm->dead = false; hmm->mm = mm; - mmgrab(hmm->mm); - - spin_lock(&mm->page_table_lock); - if (!mm->hmm) - mm->hmm = hmm; - else - cleanup = true; - spin_unlock(&mm->page_table_lock); - if (cleanup) - goto error; - - /* - * We should only get here if hold the mmap_sem in write mode ie on - * registration of first mirror through hmm_mirror_register() - */ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) - goto error_mm; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } + mmgrab(hmm->mm); + mm->hmm = hmm; return hmm; - -error_mm: - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); -error: - mmdrop(hmm->mm); - kfree(hmm); - return NULL; } static void hmm_free_rcu(struct rcu_head *rcu) { - kfree(container_of(rcu, struct hmm, rcu)); + struct hmm *hmm = container_of(rcu, struct hmm, rcu); + + down_write(&hmm->mm->mmap_sem); + if (hmm->mm->hmm == hmm) + hmm->mm->hmm = NULL; + up_write(&hmm->mm->mmap_sem); + mmdrop(hmm->mm); + + kfree(hmm); } static void hmm_free(struct kref *kref) { struct hmm *hmm = container_of(kref, struct hmm, kref); - struct mm_struct *mm = hmm->mm; - - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); - spin_lock(&mm->page_table_lock); - if (mm->hmm == hmm) - mm->hmm = NULL; - spin_unlock(&mm->page_table_lock); - - mmdrop(hmm->mm); + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); } -- 2.21.0