On Fri, Jan 10, 2025 at 05:04:05PM -0800, Sean Christopherson wrote: > Cap the number of ring entries that are reset in a single ioctl to INT_MAX > to ensure userspace isn't confused by a wrap into negative space, and so > that, in a truly pathological scenario, KVM doesn't miss a TLB flush due > to the count wrapping to zero. While the size of the ring is fixed at > 0x10000 entries and KVM (currently) supports at most 4096, userspace is > allowed to harvest entries from the ring while the reset is in-progress, > i.e. it's possible for the ring to always harvested entries. > > Opportunistically return an actual error code from the helper so that a > future fix to handle pending signals can gracefully return -EINTR. > > Cc: Peter Xu <peterx@xxxxxxxxxx> > Cc: Yan Zhao <yan.y.zhao@xxxxxxxxx> > Cc: Maxim Levitsky <mlevitsk@xxxxxxxxxx> > Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking") > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > include/linux/kvm_dirty_ring.h | 8 +++++--- > virt/kvm/dirty_ring.c | 10 +++++----- > virt/kvm/kvm_main.c | 9 ++++++--- > 3 files changed, 16 insertions(+), 11 deletions(-) > > diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h > index 4862c98d80d3..82829243029d 100644 > --- a/include/linux/kvm_dirty_ring.h > +++ b/include/linux/kvm_dirty_ring.h > @@ -49,9 +49,10 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, > } > > static inline int kvm_dirty_ring_reset(struct kvm *kvm, > - struct kvm_dirty_ring *ring) > + struct kvm_dirty_ring *ring, > + int *nr_entries_reset) > { > - return 0; > + return -ENOENT; > } > > static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, > @@ -81,7 +82,8 @@ int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); > * called with kvm->slots_lock held, returns the number of > * processed pages. > */ > -int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); > +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring, > + int *nr_entries_reset); > > /* > * returns =0: successfully pushed > diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c > index 7bc74969a819..2faf894dec5a 100644 > --- a/virt/kvm/dirty_ring.c > +++ b/virt/kvm/dirty_ring.c > @@ -104,19 +104,19 @@ static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) > return smp_load_acquire(&gfn->flags) & KVM_DIRTY_GFN_F_RESET; > } > > -int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) > +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring, > + int *nr_entries_reset) > { > u32 cur_slot, next_slot; > u64 cur_offset, next_offset; > unsigned long mask; > - int count = 0; > struct kvm_dirty_gfn *entry; > bool first_round = true; > > /* This is only needed to make compilers happy */ > cur_slot = cur_offset = mask = 0; > > - while (true) { > + while (likely((*nr_entries_reset) < INT_MAX)) { > entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; > > if (!kvm_dirty_gfn_harvested(entry)) > @@ -129,7 +129,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) > kvm_dirty_gfn_set_invalid(entry); > > ring->reset_index++; > - count++; > + (*nr_entries_reset)++; > /* > * Try to coalesce the reset operations when the guest is > * scanning pages in the same slot. > @@ -166,7 +166,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) > > trace_kvm_dirty_ring_reset(ring); > > - return count; > + return 0; > } > > void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset) > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 9d54473d18e3..2d63b4d46ccb 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -4877,15 +4877,18 @@ static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) > { > unsigned long i; > struct kvm_vcpu *vcpu; > - int cleared = 0; > + int cleared = 0, r; > > if (!kvm->dirty_ring_size) > return -EINVAL; > > mutex_lock(&kvm->slots_lock); > > - kvm_for_each_vcpu(i, vcpu, kvm) > - cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); Previously "cleared" counts all cleared count in all vCPUs. > + kvm_for_each_vcpu(i, vcpu, kvm) { > + r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring, &cleared); Here it's reset to the cleared count in the last vCPU, possibly causing loss of kvm_flush_remote_tlbs((). > + if (r) > + break; > + } > > mutex_unlock(&kvm->slots_lock); > > -- > 2.47.1.613.gc27f4b7a9f-goog >