The patch titled Subject: mm: convert mm_lock_seq to a proper seqcount has been added to the -mm mm-unstable branch. Its filename is mm-convert-mm_lock_seq-to-a-proper-seqcount.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-convert-mm_lock_seq-to-a-proper-seqcount.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Suren Baghdasaryan <surenb@xxxxxxxxxx> Subject: mm: convert mm_lock_seq to a proper seqcount Date: Fri, 22 Nov 2024 09:44:15 -0800 Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock variants to increment it, in-line with the usual seqcount usage pattern. This lets us check whether the mmap_lock is write-locked by checking mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be used when implementing mmap_lock speculation functions. As a result vm_lock_seq is also change to be unsigned to match the type of mm_lock_seq.sequence. Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@xxxxxxxxxx Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> Reviewed-by: Liam R. Howlett <Liam.Howlett@xxxxxxxxxx> Cc: Christian Brauner <brauner@xxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: David Howells <dhowells@xxxxxxxxxx> Cc: Davidlohr Bueso <dave@xxxxxxxxxxxx> Cc: Hillf Danton <hdanton@xxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Jann Horn <jannh@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> Cc: Mateusz Guzik <mjguzik@xxxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Oleg Nesterov <oleg@xxxxxxxxxx> Cc: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Paul E. McKenney <paulmck@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Shakeel Butt <shakeel.butt@xxxxxxxxx> Cc: Sourav Panda <souravpanda@xxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Wei Yang <richard.weiyang@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/mm.h | 12 +++--- include/linux/mm_types.h | 7 ++- include/linux/mmap_lock.h | 55 ++++++++++++++++++----------- kernel/fork.c | 5 -- mm/init-mm.c | 2 - tools/testing/vma/vma.c | 4 +- tools/testing/vma/vma_internal.h | 4 +- 7 files changed, 53 insertions(+), 36 deletions(-) --- a/include/linux/mmap_lock.h~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/include/linux/mmap_lock.h @@ -71,39 +71,39 @@ static inline void mmap_assert_write_loc } #ifdef CONFIG_PER_VMA_LOCK -/* - * Drop all currently-held per-VMA locks. - * This is called from the mmap_lock implementation directly before releasing - * a write-locked mmap_lock (or downgrading it to read-locked). - * This should normally NOT be called manually from other places. - * If you want to call this manually anyway, keep in mind that this will release - * *all* VMA write locks, including ones from further up the stack. - */ -static inline void vma_end_write_all(struct mm_struct *mm) +static inline void mm_lock_seqcount_init(struct mm_struct *mm) { - mmap_assert_write_locked(mm); - /* - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive - * mmap_lock being held. - * We need RELEASE semantics here to ensure that preceding stores into - * the VMA take effect before we unlock it with this store. - * Pairs with ACQUIRE semantics in vma_start_read(). - */ - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); + seqcount_init(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) +{ + do_raw_write_seqcount_begin(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_end(struct mm_struct *mm) +{ + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); + do_raw_write_seqcount_end(&mm->mm_lock_seq); } + #else -static inline void vma_end_write_all(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} #endif static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); + mm_lock_seqcount_init(mm); } static inline void mmap_write_lock(struct mm_struct *mm) { __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -111,6 +111,7 @@ static inline void mmap_write_lock_neste { __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -120,10 +121,26 @@ static inline int mmap_write_lock_killab __mmap_lock_trace_start_locking(mm, true); ret = down_write_killable(&mm->mmap_lock); + if (!ret) + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, ret == 0); return ret; } +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + mm_lock_seqcount_end(mm); +} + static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); --- a/include/linux/mm.h~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/include/linux/mm.h @@ -710,7 +710,7 @@ static inline bool vma_start_read(struct * we don't rely on for anything - the mm_lock_seq read against which we * need ordering is below. */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) return false; if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) @@ -727,7 +727,7 @@ static inline bool vma_start_read(struct * after it has been unlocked. * This pairs with RELEASE semantics in vma_end_write_all(). */ - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { up_read(&vma->vm_lock->lock); return false; } @@ -742,7 +742,7 @@ static inline void vma_end_read(struct v } /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct * current task is holding mmap_write_lock, both vma->vm_lock_seq and * mm->mm_lock_seq can't be concurrently modified. */ - *mm_lock_seq = vma->vm_mm->mm_lock_seq; + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; return (vma->vm_lock_seq == *mm_lock_seq); } @@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct */ static inline void vma_start_write(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; if (__is_vma_write_locked(vma, &mm_lock_seq)) return; @@ -779,7 +779,7 @@ static inline void vma_start_write(struc static inline void vma_assert_write_locked(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); } --- a/include/linux/mm_types.h~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/include/linux/mm_types.h @@ -699,7 +699,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif @@ -893,6 +893,9 @@ struct mm_struct { * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. + * Incremented every time mmap_lock is write-locked/unlocked. + * Initialized to 0, therefore odd values indicate mmap_lock + * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. @@ -901,7 +904,7 @@ struct mm_struct { * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ - int mm_lock_seq; + seqcount_t mm_lock_seq; #endif --- a/kernel/fork.c~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/kernel/fork.c @@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_are return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } @@ -1267,9 +1267,6 @@ static struct mm_struct *mm_init(struct seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); -#ifdef CONFIG_PER_VMA_LOCK - mm->mm_lock_seq = 0; -#endif mm_pgtables_bytes_init(mm); mm->map_count = 0; mm->locked_vm = 0; --- a/mm/init-mm.c~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/mm/init-mm.c @@ -40,7 +40,7 @@ struct mm_struct init_mm = { .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), #ifdef CONFIG_PER_VMA_LOCK - .mm_lock_seq = 0, + .mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq), #endif .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, --- a/tools/testing/vma/vma.c~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/tools/testing/vma/vma.c @@ -100,7 +100,7 @@ static struct vm_area_struct *alloc_and_ * begun. Linking to the tree will have caused this to be incremented, * which means we will get a false positive otherwise. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return vma; } @@ -225,7 +225,7 @@ static bool vma_write_started(struct vm_ int seq = vma->vm_lock_seq; /* We reset after each check. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; /* The vma_start_write() stub simply increments this value. */ return seq > -1; --- a/tools/testing/vma/vma_internal.h~mm-convert-mm_lock_seq-to-a-proper-seqcount +++ a/tools/testing/vma/vma_internal.h @@ -281,7 +281,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; struct vma_lock *vm_lock; #endif @@ -467,7 +467,7 @@ static inline bool vma_lock_alloc(struct return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } _ Patches currently in -mm which might be from surenb@xxxxxxxxxx are alloc_tag-fix-module-allocation-tags-populated-area-calculation.patch alloc_tag-fix-set_codetag_empty-when-config_mem_alloc_profiling_debug.patch seqlock-add-raw_seqcount_try_begin.patch mm-convert-mm_lock_seq-to-a-proper-seqcount.patch mm-introduce-mmap_lock_speculate_try_beginretry.patch mm-introduce-vma_start_read_locked_nested-helpers.patch mm-move-per-vma-lock-into-vm_area_struct.patch mm-mark-vma-as-detached-until-its-added-into-vma-tree.patch mm-make-vma-cache-slab_typesafe_by_rcu.patch mm-slab-allow-freeptr_offset-to-be-used-with-ctor.patch docs-mm-document-latest-changes-to-vm_lock.patch