On Thu, Jun 06, 2019 at 03:44:31PM -0300, Jason Gunthorpe wrote: > From: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > > As coded this function can false-fail in various racy situations. Make it > reliable by running only under the write side of the mmap_sem and avoiding > the false-failing compare/exchange pattern. > > Also make the locking very easy to understand by only ever reading or > writing mm->hmm while holding the write side of the mmap_sem. > > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx> > --- > v2: > - Fix error unwind of mmgrab (Jerome) > - Use hmm local instead of 2nd container_of (Jerome) > --- > mm/hmm.c | 80 ++++++++++++++++++++------------------------------------ > 1 file changed, 29 insertions(+), 51 deletions(-) > > diff --git a/mm/hmm.c b/mm/hmm.c > index cc7c26fda3300e..dc30edad9a8a02 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -40,16 +40,6 @@ > #if IS_ENABLED(CONFIG_HMM_MIRROR) > static const struct mmu_notifier_ops hmm_mmu_notifier_ops; > > -static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > -{ > - struct hmm *hmm = READ_ONCE(mm->hmm); > - > - if (hmm && kref_get_unless_zero(&hmm->kref)) > - return hmm; > - > - return NULL; > -} > - > /** > * hmm_get_or_create - register HMM against an mm (HMM internal) > * > @@ -64,11 +54,20 @@ static inline struct hmm *mm_get_hmm(struct mm_struct *mm) > */ > static struct hmm *hmm_get_or_create(struct mm_struct *mm) > { > - struct hmm *hmm = mm_get_hmm(mm); > - bool cleanup = false; > + struct hmm *hmm; > > - if (hmm) > - return hmm; > + lockdep_assert_held_exclusive(&mm->mmap_sem); > + > + if (mm->hmm) { > + if (kref_get_unless_zero(&mm->hmm->kref)) > + return mm->hmm; > + /* > + * The hmm is being freed by some other CPU and is pending a > + * RCU grace period, but this CPU can NULL now it since we > + * have the mmap_sem. > + */ > + mm->hmm = NULL; > + } > > hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); > if (!hmm) > @@ -83,57 +82,36 @@ static struct hmm *hmm_get_or_create(struct mm_struct *mm) > hmm->notifiers = 0; > hmm->dead = false; > hmm->mm = mm; > - mmgrab(hmm->mm); > - > - spin_lock(&mm->page_table_lock); > - if (!mm->hmm) > - mm->hmm = hmm; > - else > - cleanup = true; > - spin_unlock(&mm->page_table_lock); > > - if (cleanup) > - goto error; > - > - /* > - * We should only get here if hold the mmap_sem in write mode ie on > - * registration of first mirror through hmm_mirror_register() > - */ > hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; > - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) > - goto error_mm; > + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { > + kfree(hmm); > + return NULL; > + } > > + mmgrab(hmm->mm); > + mm->hmm = hmm; > return hmm; > - > -error_mm: > - spin_lock(&mm->page_table_lock); > - if (mm->hmm == hmm) > - mm->hmm = NULL; > - spin_unlock(&mm->page_table_lock); > -error: > - mmdrop(hmm->mm); > - kfree(hmm); > - return NULL; > } > > static void hmm_free_rcu(struct rcu_head *rcu) > { > - kfree(container_of(rcu, struct hmm, rcu)); > + struct hmm *hmm = container_of(rcu, struct hmm, rcu); > + > + down_write(&hmm->mm->mmap_sem); > + if (hmm->mm->hmm == hmm) > + hmm->mm->hmm = NULL; > + up_write(&hmm->mm->mmap_sem); > + mmdrop(hmm->mm); > + > + kfree(hmm); > } > > static void hmm_free(struct kref *kref) > { > struct hmm *hmm = container_of(kref, struct hmm, kref); > - struct mm_struct *mm = hmm->mm; > - > - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm); > > - spin_lock(&mm->page_table_lock); > - if (mm->hmm == hmm) > - mm->hmm = NULL; > - spin_unlock(&mm->page_table_lock); > - > - mmdrop(hmm->mm); > + mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); > mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); > } > > -- > 2.21.0 >