* Suren Baghdasaryan <surenb@xxxxxxxxxx> [241121 11:28]: > Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock > variants to increment it, in-line with the usual seqcount usage pattern. > This lets us check whether the mmap_lock is write-locked by checking > mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be > used when implementing mmap_lock speculation functions. > As a result vm_lock_seq is also change to be unsigned to match the type > of mm_lock_seq.sequence. > > Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> > --- > Changes since v1 [1] > - Added ASSERT_EXCLUSIVE_WRITER() instead of a comment in > vma_end_write_all, per Peter Zijlstra > > [1] https://lore.kernel.org/all/20241024205231.1944747-1-surenb@xxxxxxxxxx/ > > include/linux/mm.h | 12 +++---- > include/linux/mm_types.h | 7 ++-- > include/linux/mmap_lock.h | 55 +++++++++++++++++++++----------- > kernel/fork.c | 5 +-- > mm/init-mm.c | 2 +- > tools/testing/vma/vma.c | 4 +-- > tools/testing/vma/vma_internal.h | 4 +-- > 7 files changed, 53 insertions(+), 36 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index feb5c8021bef..e6de22738ee1 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -710,7 +710,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) > * we don't rely on for anything - the mm_lock_seq read against which we > * need ordering is below. > */ > - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) > + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) > return false; > > if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) > @@ -727,7 +727,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) > * after it has been unlocked. > * This pairs with RELEASE semantics in vma_end_write_all(). > */ > - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { > + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { > up_read(&vma->vm_lock->lock); > return false; > } > @@ -742,7 +742,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) > } > > /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ > -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) > +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) > { > mmap_assert_write_locked(vma->vm_mm); > > @@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) > * current task is holding mmap_write_lock, both vma->vm_lock_seq and > * mm->mm_lock_seq can't be concurrently modified. > */ > - *mm_lock_seq = vma->vm_mm->mm_lock_seq; > + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; > return (vma->vm_lock_seq == *mm_lock_seq); > } > > @@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) > */ > static inline void vma_start_write(struct vm_area_struct *vma) > { > - int mm_lock_seq; > + unsigned int mm_lock_seq; > > if (__is_vma_write_locked(vma, &mm_lock_seq)) > return; > @@ -779,7 +779,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) > > static inline void vma_assert_write_locked(struct vm_area_struct *vma) > { > - int mm_lock_seq; > + unsigned int mm_lock_seq; > > VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); > } > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 381d22eba088..ac72888a54b8 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -715,7 +715,7 @@ struct vm_area_struct { > * counter reuse can only lead to occasional unnecessary use of the > * slowpath. > */ > - int vm_lock_seq; > + unsigned int vm_lock_seq; > /* Unstable RCU readers are allowed to read this. */ > struct vma_lock *vm_lock; > #endif > @@ -909,6 +909,9 @@ struct mm_struct { > * Roughly speaking, incrementing the sequence number is > * equivalent to releasing locks on VMAs; reading the sequence > * number can be part of taking a read lock on a VMA. > + * Incremented every time mmap_lock is write-locked/unlocked. > + * Initialized to 0, therefore odd values indicate mmap_lock > + * is write-locked and even values that it's released. > * > * Can be modified under write mmap_lock using RELEASE > * semantics. > @@ -917,7 +920,7 @@ struct mm_struct { > * Can be read with ACQUIRE semantics if not holding write > * mmap_lock. > */ > - int mm_lock_seq; > + seqcount_t mm_lock_seq; > #endif > > > diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h > index de9dc20b01ba..083b7fa2588e 100644 > --- a/include/linux/mmap_lock.h > +++ b/include/linux/mmap_lock.h > @@ -71,39 +71,38 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) > } > > #ifdef CONFIG_PER_VMA_LOCK > -/* > - * Drop all currently-held per-VMA locks. > - * This is called from the mmap_lock implementation directly before releasing > - * a write-locked mmap_lock (or downgrading it to read-locked). > - * This should normally NOT be called manually from other places. > - * If you want to call this manually anyway, keep in mind that this will release > - * *all* VMA write locks, including ones from further up the stack. > - */ > -static inline void vma_end_write_all(struct mm_struct *mm) > +static inline void mm_lock_seqcount_init(struct mm_struct *mm) > { > - mmap_assert_write_locked(mm); > - /* > - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive > - * mmap_lock being held. > - * We need RELEASE semantics here to ensure that preceding stores into > - * the VMA take effect before we unlock it with this store. > - * Pairs with ACQUIRE semantics in vma_start_read(). > - */ > - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); > + seqcount_init(&mm->mm_lock_seq); > +} > + > +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) > +{ > + do_raw_write_seqcount_begin(&mm->mm_lock_seq); > +} > + > +static inline void mm_lock_seqcount_end(struct mm_struct *mm) > +{ > + do_raw_write_seqcount_end(&mm->mm_lock_seq); > } > + > #else > -static inline void vma_end_write_all(struct mm_struct *mm) {} > +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} > +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} > +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} > #endif > > static inline void mmap_init_lock(struct mm_struct *mm) > { > init_rwsem(&mm->mmap_lock); > + mm_lock_seqcount_init(mm); > } > > static inline void mmap_write_lock(struct mm_struct *mm) > { > __mmap_lock_trace_start_locking(mm, true); > down_write(&mm->mmap_lock); > + mm_lock_seqcount_begin(mm); > __mmap_lock_trace_acquire_returned(mm, true, true); > } > > @@ -111,6 +110,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) > { > __mmap_lock_trace_start_locking(mm, true); > down_write_nested(&mm->mmap_lock, subclass); > + mm_lock_seqcount_begin(mm); > __mmap_lock_trace_acquire_returned(mm, true, true); > } > > @@ -120,10 +120,27 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) > > __mmap_lock_trace_start_locking(mm, true); > ret = down_write_killable(&mm->mmap_lock); > + if (!ret) > + mm_lock_seqcount_begin(mm); > __mmap_lock_trace_acquire_returned(mm, true, ret == 0); > return ret; > } > > +/* > + * Drop all currently-held per-VMA locks. > + * This is called from the mmap_lock implementation directly before releasing > + * a write-locked mmap_lock (or downgrading it to read-locked). > + * This should normally NOT be called manually from other places. > + * If you want to call this manually anyway, keep in mind that this will release > + * *all* VMA write locks, including ones from further up the stack. > + */ > +static inline void vma_end_write_all(struct mm_struct *mm) > +{ > + mmap_assert_write_locked(mm); > + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); > + mm_lock_seqcount_end(mm); > +} > + > static inline void mmap_write_unlock(struct mm_struct *mm) > { > __mmap_lock_trace_released(mm, true); > diff --git a/kernel/fork.c b/kernel/fork.c > index e58d27c05788..8cd36645b9fc 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -449,7 +449,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma) > return false; > > init_rwsem(&vma->vm_lock->lock); > - vma->vm_lock_seq = -1; > + vma->vm_lock_seq = UINT_MAX; > > return true; > } > @@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, > seqcount_init(&mm->write_protect_seq); > mmap_init_lock(mm); > INIT_LIST_HEAD(&mm->mmlist); > -#ifdef CONFIG_PER_VMA_LOCK > - mm->mm_lock_seq = 0; > -#endif > mm_pgtables_bytes_init(mm); > mm->map_count = 0; > mm->locked_vm = 0; > diff --git a/mm/init-mm.c b/mm/init-mm.c > index 24c809379274..6af3ad675930 100644 > --- a/mm/init-mm.c > +++ b/mm/init-mm.c > @@ -40,7 +40,7 @@ struct mm_struct init_mm = { > .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), > .mmlist = LIST_HEAD_INIT(init_mm.mmlist), > #ifdef CONFIG_PER_VMA_LOCK > - .mm_lock_seq = 0, > + .mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq), > #endif > .user_ns = &init_user_ns, > .cpu_bitmap = CPU_BITS_NONE, > diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c > index b33b47342d41..9074aaced9c5 100644 > --- a/tools/testing/vma/vma.c > +++ b/tools/testing/vma/vma.c > @@ -87,7 +87,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, > * begun. Linking to the tree will have caused this to be incremented, > * which means we will get a false positive otherwise. > */ > - vma->vm_lock_seq = -1; > + vma->vm_lock_seq = UINT_MAX; > > return vma; > } > @@ -212,7 +212,7 @@ static bool vma_write_started(struct vm_area_struct *vma) > int seq = vma->vm_lock_seq; > > /* We reset after each check. */ > - vma->vm_lock_seq = -1; > + vma->vm_lock_seq = UINT_MAX; > > /* The vma_start_write() stub simply increments this value. */ > return seq > -1; > diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h > index c5b9da034511..4007ec580f85 100644 > --- a/tools/testing/vma/vma_internal.h > +++ b/tools/testing/vma/vma_internal.h > @@ -231,7 +231,7 @@ struct vm_area_struct { > * counter reuse can only lead to occasional unnecessary use of the > * slowpath. > */ > - int vm_lock_seq; > + unsigned int vm_lock_seq; > struct vma_lock *vm_lock; > #endif > > @@ -406,7 +406,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma) > return false; > > init_rwsem(&vma->vm_lock->lock); > - vma->vm_lock_seq = -1; > + vma->vm_lock_seq = UINT_MAX; > > return true; > } > -- > 2.47.0.338.g60cca15819-goog >