On Mon, Jan 16, 2023 at 3:08 PM Suren Baghdasaryan <surenb@xxxxxxxxxx> wrote: > > On Mon, Jan 16, 2023 at 6:07 AM Hillf Danton <hdanton@xxxxxxxx> wrote: > > > > On Mon, 9 Jan 2023 12:53:36 -0800 Suren Baghdasaryan <surenb@xxxxxxxxxx> > > > --- a/include/linux/mm.h > > > +++ b/include/linux/mm.h > > > @@ -627,12 +627,16 @@ static inline void vma_write_lock(struct vm_area_struct *vma) > > > * mm->mm_lock_seq can't be concurrently modified. > > > */ > > > mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq); > > > - if (vma->vm_lock_seq == mm_lock_seq) > > > + if (vma->vm_lock->lock_seq == mm_lock_seq) > > > return; > > > > lock acquire for write to info lockdep. > > Thanks for the review Hillf! > > Good idea. Will add in the next version. > > > > > > > - down_write(&vma->vm_lock->lock); > > > - vma->vm_lock_seq = mm_lock_seq; > > > - up_write(&vma->vm_lock->lock); > > > + if (atomic_cmpxchg(&vma->vm_lock->count, 0, -1)) > > > + wait_event(vma->vm_mm->vma_writer_wait, > > > + atomic_cmpxchg(&vma->vm_lock->count, 0, -1) == 0); > > > + vma->vm_lock->lock_seq = mm_lock_seq; > > > + /* Write barrier to ensure lock_seq change is visible before count */ > > > + smp_wmb(); > > > + atomic_set(&vma->vm_lock->count, 0); > > > } > > > > > > /* > > > @@ -643,20 +647,28 @@ static inline void vma_write_lock(struct vm_area_struct *vma) > > > static inline bool vma_read_trylock(struct vm_area_struct *vma) > > > { > > > /* Check before locking. A race might cause false locked result. */ > > > - if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) > > > + if (vma->vm_lock->lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) > > > return false; > > > > Add mb to pair with the above wmb like > > The wmb above is to ensure the ordering between updates of lock_seq > and vm_lock->count (lock_seq is updated first and vm_lock->count only > after that). The first access to vm_lock->count in this function is > atomic_inc_unless_negative() and it's an atomic RMW operation with a > return value. According to documentation such functions are fully > ordered, therefore I think we already have an implicit full memory > barrier between reads of lock_seq and vm_lock->count here. Am I wrong? > > > > > if (READ_ONCE(vma->vm_lock->lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) { > > smp_acquire__after_ctrl_dep(); > > return false; > > } > > > > > > - if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) > > > + if (unlikely(!atomic_inc_unless_negative(&vma->vm_lock->count))) > > > return false; > > > > > > + /* If atomic_t overflows, restore and fail to lock. */ > > > + if (unlikely(atomic_read(&vma->vm_lock->count) < 0)) { > > > + if (atomic_dec_and_test(&vma->vm_lock->count)) > > > + wake_up(&vma->vm_mm->vma_writer_wait); > > > + return false; > > > + } > > > + > > > /* > > > * Overflow might produce false locked result. > > > * False unlocked result is impossible because we modify and check > > > * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq > > > * modification invalidates all existing locks. > > > */ > > > - if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) { > > > - up_read(&vma->vm_lock->lock); > > > + if (unlikely(vma->vm_lock->lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) { > > > + if (atomic_dec_and_test(&vma->vm_lock->count)) > > > + wake_up(&vma->vm_mm->vma_writer_wait); > > > return false; > > > } > > > > Simpler way to detect write lock owner and count overflow like > > > > int count = atomic_read(&vma->vm_lock->count); > > for (;;) { > > int new = count + 1; > > > > if (count < 0 || new < 0) > > return false; > > > > new = atomic_cmpxchg(&vma->vm_lock->count, count, new); > > if (new == count) > > break; > > count = new; > > cpu_relax(); > > } > > > > (wake up waiting readers after taking the lock; > > but the write lock owner before this read trylock should be > > responsible for waking waiters up.) > > > > lock acquire for read. > > This schema might cause readers to wait, which is not an exact > replacement for down_read_trylock(). The requirement to wake up > waiting readers also complicates things and since we can always fall > back to mmap_lock, that complication is unnecessary IMHO. I could use > part of your suggestion like this: > > int new = count + 1; > > if (count < 0 || new < 0) > return false; > > new = atomic_cmpxchg(&vma->vm_lock->count, count, new); > if (new == count) > return false; Made a mistake above. It should have been: if (new != count) return false; > > Compared to doing atomic_inc_unless_negative() first, like I did > originally, this schema opens a bit wider window for the writer to get > in the middle and cause the reader to fail locking but I don't think > it would result in any visible regression. > > > > > > return true; > > > @@ -664,7 +676,8 @@ static inline bool vma_read_trylock(struct vm_area_struct *vma) > > > > > > static inline void vma_read_unlock(struct vm_area_struct *vma) > > > { > > lock release for read. > > Ack. > > > > > > - up_read(&vma->vm_lock->lock); > > > + if (atomic_dec_and_test(&vma->vm_lock->count)) > > > + wake_up(&vma->vm_mm->vma_writer_wait); > > > } > >