On Fri, Jan 10, 2025 at 3:19 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > > On Tue, Nov 05, 2024, James Houghton wrote: > > +static unsigned long kvm_rmap_lock(struct kvm_rmap_head *rmap_head) > > +{ > > + unsigned long old_val, new_val; > > + > > + /* > > + * Elide the lock if the rmap is empty, as lockless walkers (read-only > > + * mode) don't need to (and can't) walk an empty rmap, nor can they add > > + * entries to the rmap. I.e. the only paths that process empty rmaps > > + * do so while holding mmu_lock for write, and are mutually exclusive. > > + */ > > + old_val = atomic_long_read(&rmap_head->val); > > + if (!old_val) > > + return 0; > > + > > + do { > > + /* > > + * If the rmap is locked, wait for it to be unlocked before > > + * trying acquire the lock, e.g. to bounce the cache line. > > + */ > > + while (old_val & KVM_RMAP_LOCKED) { > > + old_val = atomic_long_read(&rmap_head->val); > > + cpu_relax(); > > + } > > As Lai Jiangshan pointed out[1][2], this should PAUSE first, then re-read the SPTE, > and KVM needs to disable preemption while holding the lock, because this is nothing > more than a rudimentary spinlock. Ah! Sorry for missing this. > > [1] https://lore.kernel.org/all/ZrooozABEWSnwzxh@xxxxxxxxxx > [2] https://lore.kernel.org/all/Zrt5eNArfQA7x1qj@xxxxxxxxxx > > I think this? > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 1a0950b77126..9dac1bbb77d4 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -873,6 +873,8 @@ static unsigned long __kvm_rmap_lock(struct kvm_rmap_head *rmap_head) > { > unsigned long old_val, new_val; > > + lockdep_assert_preemption_disabled(); > + > /* > * Elide the lock if the rmap is empty, as lockless walkers (read-only > * mode) don't need to (and can't) walk an empty rmap, nor can they add > @@ -889,8 +891,8 @@ static unsigned long __kvm_rmap_lock(struct kvm_rmap_head *rmap_head) > * trying acquire the lock, e.g. to bounce the cache line. > */ > while (old_val & KVM_RMAP_LOCKED) { > - old_val = atomic_long_read(&rmap_head->val); > cpu_relax(); > + old_val = atomic_long_read(&rmap_head->val); > } > > /* > @@ -931,6 +933,8 @@ static unsigned long kvm_rmap_lock(struct kvm *kvm, > static void kvm_rmap_unlock(struct kvm_rmap_head *rmap_head, > unsigned long new_val) > { > + lockdep_assert_held_write(&kvm->mmu_lock); > + > KVM_MMU_WARN_ON(new_val & KVM_RMAP_LOCKED); > /* > * Ensure that all accesses to the rmap have completed > @@ -948,12 +952,21 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head) > > /* > * If mmu_lock isn't held, rmaps can only locked in read-only mode. The actual > - * locking is the same, but the caller is disallowed from modifying the rmap, > - * and so the unlock flow is a nop if the rmap is/was empty. > + * locking is the same, but preemption needs to be manually disabled (because > + * a spinlock isn't already held) and the caller is disallowed from modifying > + * the rmap, and so the unlock flow is a nop if the rmap is/was empty. Note, > + * preemption must be disable *before* acquiring the bitlock. If the rmap is > + * empty, i.e. isn't truly locked, immediately re-enable preemption. > */ > static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head) > { > - return __kvm_rmap_lock(rmap_head); > + unsigned rmap_val; > + preempt_disable(); > + > + rmap_val = __kvm_rmap_lock(rmap_head); > + if (!rmap_val) > + preempt_enable(); > + return rmap_val; > } > > static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head, > @@ -964,6 +977,7 @@ static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head, > > KVM_MMU_WARN_ON(old_val != kvm_rmap_get(rmap_head)); > atomic_long_set(&rmap_head->val, old_val); > + preempt_enable(); > } > > /* I don't see anything wrong with these changes. Thanks! Applied.