Re: [RFC PATCH 2/8] KVM: Add KVM_MAP_MEMORY vcpu ioctl to pre-populate guest memory

Isaku Yamahata <isaku.yamahata@xxxxxxxxx> · Mon, 11 Mar 2024 15:19:31 -0700

On Mon, Mar 11, 2024 at 10:23:28AM -0700,
Sean Christopherson <seanjc@xxxxxxxxxx> wrote:

> On Fri, Mar 01, 2024, isaku.yamahata@xxxxxxxxx wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index d1fd9cb5d037..d77c9b79d76b 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -4419,6 +4419,69 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
> >  	return fd;
> >  }
> >  
> > +__weak int kvm_arch_vcpu_pre_map_memory(struct kvm_vcpu *vcpu)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
> > +__weak int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
> > +				    struct kvm_memory_mapping *mapping)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
> > +static int kvm_vcpu_map_memory(struct kvm_vcpu *vcpu,
> > +			       struct kvm_memory_mapping *mapping)
> > +{
> > +	bool added = false;
> > +	int idx, r = 0;
> 
> Pointless initialization of 'r'.
> 
> > +
> > +	if (mapping->flags & ~(KVM_MEMORY_MAPPING_FLAG_WRITE |
> > +			       KVM_MEMORY_MAPPING_FLAG_EXEC |
> > +			       KVM_MEMORY_MAPPING_FLAG_USER |
> > +			       KVM_MEMORY_MAPPING_FLAG_PRIVATE))
> > +		return -EINVAL;
> > +	if ((mapping->flags & KVM_MEMORY_MAPPING_FLAG_PRIVATE) &&
> > +	    !kvm_arch_has_private_mem(vcpu->kvm))
> > +		return -EINVAL;
> > +
> > +	/* Sanity check */
> 
> Pointless comment.
> 
> > +	if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
> > +	    !mapping->nr_pages ||
> 
> > +	    mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
> > +		return -EINVAL;
> > +
> > +	vcpu_load(vcpu);
> > +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> > +	r = kvm_arch_vcpu_pre_map_memory(vcpu);
> 
> This hooks is unnecessary, x86's kvm_mmu_reload() is optimized for the happy path
> where the MMU is already loaded.  Just make the call from kvm_arch_vcpu_map_memory().
> 
> > +	if (r)
> > +		return r;
> 
> Which is a good thing, because this leaks the SRCU lock.
> 
> > +
> > +	while (mapping->nr_pages) {
> > +		if (signal_pending(current)) {
> > +			r = -ERESTARTSYS;
> 
> Why -ERESTARTSYS instead of -EINTR?  The latter is KVM's typical response to a
> pending signal.
> 
> > +			break;
> > +		}
> > +
> > +		if (need_resched())
> 
> No need to manually check need_resched(), the below is a _conditional_ resched.
> The reason KVM explicitly checks need_resched() in MMU flows is because KVM needs
> to drop mmu_lock before rescheduling, i.e. calling cond_resched() directly would
> try to schedule() while holding a spinlock.
> 
> > +			cond_resched();
> > +
> > +		r = kvm_arch_vcpu_map_memory(vcpu, mapping);
> > +		if (r)
> > +			break;
> > +
> > +		added = true;
> > +	}
> > +
> > +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> > +	vcpu_put(vcpu);
> > +
> > +	if (added && mapping->nr_pages > 0)
> > +		r = -EAGAIN;
> 
> No, this clobbers 'r', which might hold a fatal error code.  I don't see any
> reason for common code to ever force -EAGAIN, it can't possibly know if trying
> again is reasonable.

Thanks for review.  With those included, the hunk is as follows.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d1fd9cb5d037..342269ef9f13 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4419,6 +4419,47 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
        return fd;
 }
 
+__weak int kvm_arch_vcpu_map_memory(struct kvm_vcpu *vcpu,
+                                   struct kvm_memory_mapping *mapping)
+{
+       return -EOPNOTSUPP;
+}
+
+static int kvm_vcpu_map_memory(struct kvm_vcpu *vcpu,
+                              struct kvm_memory_mapping *mapping)
+{
+       int idx, r;
+
+       if (mapping->flags)
+               return -EINVAL;
+
+       if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
+           mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
+               return -EINVAL;
+
+       vcpu_load(vcpu);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+       r = 0;
+       while (mapping->nr_pages) {
+               if (signal_pending(current)) {
+                       r = -EINTR;
+                       break;
+               }
+
+               r = kvm_arch_vcpu_map_memory(vcpu, mapping);
+               if (r)
+                       break;
+
+               cond_resched();
+       }
+
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+       vcpu_put(vcpu);
+
+       return r;
+}
+
 static long kvm_vcpu_ioctl(struct file *filp,
                           unsigned int ioctl, unsigned long arg)
 {
-- 
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>