On 6/6/19 11:44 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
As coded this function can false-fail in various racy situations. Make it
reliable by running only under the write side of the mmap_sem and avoiding
the false-failing compare/exchange pattern.
Also make the locking very easy to understand by only ever reading or
writing mm->hmm while holding the write side of the mmap_sem.
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
Reviewed-by: Ralph Campbell <rcampbell@xxxxxxxxxx>
---
v2:
- Fix error unwind of mmgrab (Jerome)
- Use hmm local instead of 2nd container_of (Jerome)
---
mm/hmm.c | 80 ++++++++++++++++++++------------------------------------
1 file changed, 29 insertions(+), 51 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c
index cc7c26fda3300e..dc30edad9a8a02 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -40,16 +40,6 @@
#if IS_ENABLED(CONFIG_HMM_MIRROR)
static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
-static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
-{
- struct hmm *hmm = READ_ONCE(mm->hmm);
-
- if (hmm && kref_get_unless_zero(&hmm->kref))
- return hmm;
-
- return NULL;
-}
-
/**
* hmm_get_or_create - register HMM against an mm (HMM internal)
*
@@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
*/
static struct hmm *hmm_get_or_create(struct mm_struct *mm)
{
- struct hmm *hmm = mm_get_hmm(mm);
- bool cleanup = false;
+ struct hmm *hmm;
- if (hmm)
- return hmm;
+ lockdep_assert_held_exclusive(&mm->mmap_sem);
+
+ if (mm->hmm) {
+ if (kref_get_unless_zero(&mm->hmm->kref))
+ return mm->hmm;
+ /*
+ * The hmm is being freed by some other CPU and is pending a
+ * RCU grace period, but this CPU can NULL now it since we
+ * have the mmap_sem.
+ */
+ mm->hmm = NULL;
+ }
hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
if (!hmm)
@@ -83,57 +82,36 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm)
hmm->notifiers = 0;
hmm->dead = false;
hmm->mm = mm;
- mmgrab(hmm->mm);
-
- spin_lock(&mm->page_table_lock);
- if (!mm->hmm)
- mm->hmm = hmm;
- else
- cleanup = true;
- spin_unlock(&mm->page_table_lock);
- if (cleanup)
- goto error;
-
- /*
- * We should only get here if hold the mmap_sem in write mode ie on
- * registration of first mirror through hmm_mirror_register()
- */
hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
- if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
- goto error_mm;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
+ kfree(hmm);
+ return NULL;
+ }
+ mmgrab(hmm->mm);
+ mm->hmm = hmm;
return hmm;
-
-error_mm:
- spin_lock(&mm->page_table_lock);
- if (mm->hmm == hmm)
- mm->hmm = NULL;
- spin_unlock(&mm->page_table_lock);
-error:
- mmdrop(hmm->mm);
- kfree(hmm);
- return NULL;
}
static void hmm_free_rcu(struct rcu_head *rcu)
{
- kfree(container_of(rcu, struct hmm, rcu));
+ struct hmm *hmm = container_of(rcu, struct hmm, rcu);
+
+ down_write(&hmm->mm->mmap_sem);
+ if (hmm->mm->hmm == hmm)
+ hmm->mm->hmm = NULL;
+ up_write(&hmm->mm->mmap_sem);
+ mmdrop(hmm->mm);
+
+ kfree(hmm);
}
static void hmm_free(struct kref *kref)
{
struct hmm *hmm = container_of(kref, struct hmm, kref);
- struct mm_struct *mm = hmm->mm;
-
- mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
- spin_lock(&mm->page_table_lock);
- if (mm->hmm == hmm)
- mm->hmm = NULL;
- spin_unlock(&mm->page_table_lock);
-
- mmdrop(hmm->mm);
+ mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm);
mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu);
}