On Thu, Jun 17, 2021 at 11:08:03AM +0200, Peter Zijlstra wrote: > diff --git a/kernel/fork.c b/kernel/fork.c > index e595e77913eb..57415cca088c 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -1104,6 +1104,8 @@ static inline void __mmput(struct mm_struct *mm) > } > if (mm->binfmt) > module_put(mm->binfmt->module); > + > + mm_unlazy_mm_count(mm); > mmdrop(mm); > } > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 8ac693d542f6..e102ec53c2f6 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -19,6 +19,7 @@ > +/* > + * This converts all lazy_mm references to mm to mm_count refcounts. Our > + * caller holds an mm_count reference, so we don't need to worry about mm > + * being freed out from under us. > + */ > +void mm_unlazy_mm_count(struct mm_struct *mm) > +{ > + unsigned int drop_count = num_possible_cpus(); > + int cpu; > + > + /* > + * mm_users is zero, so no cpu will set its rq->lazy_mm to mm. > + */ > + WARN_ON_ONCE(atomic_read(&mm->mm_users) != 0); > + > + /* Grab enough references for the rest of this function. */ > + atomic_add(drop_count, &mm->mm_count); So that had me puzzled for a little while. Would something like this be a better comment? /* * Because this can race with mmdrop_lazy(), mm_count must be * incremented before setting any rq->drop_mm value, otherwise * it is possible to free mm early. */ > + > + for_each_possible_lazymm_cpu(cpu, mm) { > + struct rq *rq = cpu_rq(cpu); > + struct mm_struct *old_mm; > + > + if (smp_load_acquire(&rq->lazy_mm) != mm) > + continue; > + > + drop_count--; /* grab a reference; cpu will drop it later. */ Totally confusing comment that :-) > + And with that, we rely on xchg() here to be at at least RELEASE, such that that mm_count increment must be visible when drop_mm is seen. > + old_mm = xchg(&rq->drop_mm, mm); Similarly, we rely on it being at least ACQUIRE for the !NULL return case I think. > + > + /* > + * We know that old_mm != mm: when we did the xchg(), we were > + * the only cpu to be putting mm into any drop_mm variable. > + */ > + WARN_ON_ONCE(old_mm == mm); > + if (unlikely(old_mm)) { > + /* > + * We just stole an mm reference from the target CPU. > + * > + * drop_mm was set to old by another call to > + * mm_unlazy_mm_count(). After that call xchg'd old > + * into drop_mm, the target CPU did: > + * > + * smp_store_release(&rq->lazy_mm, mm); > + * > + * which synchronized with our smp_load_acquire() > + * above, so we know that the target CPU is done with > + * old. Drop old on its behalf. > + */ > + mmdrop(old_mm); > + } > + } > + > + atomic_sub(drop_count, &mm->mm_count); > +}