Re: [RFC PATCH 04/11] mm/hmm: Simplify hmm_get_or_create and make it reliable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 5/23/19 8:34 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe <jgg@xxxxxxxxxxxx>

As coded this function can false-fail in various racy situations. Make it
reliable by running only under the write side of the mmap_sem and avoiding
the false-failing compare/exchange pattern.

Also make the locking very easy to understand by only ever reading or
writing mm->hmm while holding the write side of the mmap_sem.

Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
---
  mm/hmm.c | 75 ++++++++++++++++++++------------------------------------
  1 file changed, 27 insertions(+), 48 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index e27058e92508b9..ec54be54d81135 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -40,16 +40,6 @@
  #if IS_ENABLED(CONFIG_HMM_MIRROR)
  static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
-static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
-{
-	struct hmm *hmm = READ_ONCE(mm->hmm);
-
-	if (hmm && kref_get_unless_zero(&hmm->kref))
-		return hmm;
-
-	return NULL;
-}
-
  /**
   * hmm_get_or_create - register HMM against an mm (HMM internal)
   *
@@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
   */
  static struct hmm *hmm_get_or_create(struct mm_struct *mm)
  {
-	struct hmm *hmm = mm_get_hmm(mm);
-	bool cleanup = false;
+	struct hmm *hmm;
- if (hmm)
-		return hmm;
+	lockdep_assert_held_exclusive(mm->mmap_sem);
+
+	if (mm->hmm) {
+		if (kref_get_unless_zero(&mm->hmm->kref))
+			return mm->hmm;
+		/*
+		 * The hmm is being freed by some other CPU and is pending a
+		 * RCU grace period, but this CPU can NULL now it since we
+		 * have the mmap_sem.
+		 */
+		mm->hmm = NULL;

Shouldn't there be a "return NULL;" here so it doesn't fall through and
allocate a struct hmm below?

+	}
hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
  	if (!hmm)
@@ -85,54 +84,34 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm)
  	hmm->mm = mm;
  	mmgrab(hmm->mm);
- spin_lock(&mm->page_table_lock);
-	if (!mm->hmm)
-		mm->hmm = hmm;
-	else
-		cleanup = true;
-	spin_unlock(&mm->page_table_lock);
-
-	if (cleanup)
-		goto error;
-
-	/*
-	 * We should only get here if hold the mmap_sem in write mode ie on
-	 * registration of first mirror through hmm_mirror_register()
-	 */
  	hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
-	if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
-		goto error_mm;
+	if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
+		kfree(hmm);
+		return NULL;
+	}
+ mm->hmm = hmm;
  	return hmm;
-
-error_mm:
-	spin_lock(&mm->page_table_lock);
-	if (mm->hmm == hmm)
-		mm->hmm = NULL;
-	spin_unlock(&mm->page_table_lock);
-error:
-	kfree(hmm);
-	return NULL;
  }
static void hmm_fee_rcu(struct rcu_head *rcu)

I see Jerome already saw and named this hmm_free_rcu()
which I agree with.

  {
+	struct hmm *hmm = container_of(rcu, struct hmm, rcu);
+
+	down_write(&hmm->mm->mmap_sem);
+	if (hmm->mm->hmm == hmm)
+		hmm->mm->hmm = NULL;
+	up_write(&hmm->mm->mmap_sem);
+	mmdrop(hmm->mm);
+
  	kfree(container_of(rcu, struct hmm, rcu));
  }
static void hmm_free(struct kref *kref)
  {
  	struct hmm *hmm = container_of(kref, struct hmm, kref);
-	struct mm_struct *mm = hmm->mm;
-
-	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
- spin_lock(&mm->page_table_lock);
-	if (mm->hmm == hmm)
-		mm->hmm = NULL;
-	spin_unlock(&mm->page_table_lock);
-
-	mmdrop(hmm->mm);
+	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm);
  	mmu_notifier_call_srcu(&hmm->rcu, hmm_fee_rcu);
  }

-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux