Re: [PATCH v3 2/2] kvm: x86: hyperv: guest->host event signaling via eventfd

Roman Kagan <rkagan@xxxxxxxxxxxxx> · Tue, 12 Dec 2017 12:02:17 +0300

On Mon, Dec 11, 2017 at 10:14:48PM +0100, David Hildenbrand wrote:
> 
> >  #endif /* __LINUX_KVM_H */
> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > index 015fb06c7522..0303bd8f4d6a 100644
> > --- a/arch/x86/kvm/hyperv.c
> > +++ b/arch/x86/kvm/hyperv.c
> > @@ -29,6 +29,7 @@
> >  #include <linux/kvm_host.h>
> >  #include <linux/highmem.h>
> >  #include <linux/sched/cputime.h>
> > +#include <linux/eventfd.h>
> >  
> >  #include <asm/apicdef.h>
> >  #include <trace/events/kvm.h>
> > @@ -1226,6 +1227,54 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
> >  	return 1;
> >  }
> >  
> > +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *param)
> > +{
> > +	struct page *page;
> > +	void *pg;
> > +	u64 *msg;
> > +
> > +	if ((gpa & (__alignof__(*msg) - 1)) ||
> > +	    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
> > +		return HV_STATUS_INVALID_ALIGNMENT;
> > +
> 
> Don't we also need srcu_read_lock(&vcpu->kvm->srcu) for
> kvm_vcpu_gfn_to_page?

Seems so indeed.  I also think I'll switch to using
kvm_vcpu_read_guest().

> > +	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
> > +	if (is_error_page(page))
> > +		return HV_STATUS_INSUFFICIENT_MEMORY;
> > +
> > +	pg = kmap_atomic(page);
> > +	msg = pg + offset_in_page(gpa);
> > +	*param = *msg;
> > +	kunmap_atomic(pg);
> > +	return HV_STATUS_SUCCESS;
> > +}
> > +
> > +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
> > +{
> > +	u16 ret;
> > +	u32 conn_id;
> > +	int idx;
> > +	struct eventfd_ctx *eventfd;
> > +
> > +	if (unlikely(!fast)) {
> > +		u64 gpa = param;
> > +		ret = hvcall_sigevent_param(vcpu, gpa, &param);
> > +		if (ret != HV_STATUS_SUCCESS)
> > +			return ret;
> > +	}
> > +
> > +	conn_id = (param & 0xffffffff) + ((param >> 32) & 0xffff);
> 
> conn_id = param + ((param >> 32) & 0xffff);
> 
> (as param will me automatically truncated to 32 bits - sizeof(conn_id))

Agreed.  However, it looks like what I really need is

conn_id = (param & KVM_HYPERV_CONN_ID_MASK) + ((param >> 32) & 0xffff);

otherwise the overflown conn_id may pass the following check

> > +	if (conn_id & ~KVM_HYPERV_CONN_ID_MASK)
> > +		return HV_STATUS_INVALID_CONNECTION_ID;
> > +
> > +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> > +	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id);
> > +	if (eventfd)
> > +		eventfd_signal(eventfd, 1);
> > +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> > +
> > +	return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID;
> > +}
> > +
> >  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> >  {
> >  	u64 param, ingpa, outgpa, ret;
> > @@ -1276,8 +1325,12 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> >  	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
> >  		kvm_vcpu_on_spin(vcpu, true);
> >  		break;
> > -	case HVCALL_POST_MESSAGE:
> >  	case HVCALL_SIGNAL_EVENT:
> > +		res = kvm_hvcall_signal_event(vcpu, fast, ingpa);
> > +		if (res != HV_STATUS_INVALID_CONNECTION_ID)
> > +			break;
> > +		/* maybe userspace knows this conn_id: fall through */
> > +	case HVCALL_POST_MESSAGE:
> >  		/* don't bother userspace if it has no way to handle it */
> >  		if (!vcpu_to_synic(vcpu)->active) {
> >  			res = HV_STATUS_INVALID_HYPERCALL_CODE;
> > @@ -1305,8 +1358,67 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
> >  void kvm_hv_init_vm(struct kvm *kvm)
> >  {
> >  	mutex_init(&kvm->arch.hyperv.hv_lock);
> > +	idr_init(&kvm->arch.hyperv.conn_to_evt);
> >  }
> >  
> >  void kvm_hv_destroy_vm(struct kvm *kvm)
> >  {
> > +	struct eventfd_ctx *eventfd;
> > +	int i;
> > +
> 
> At this point we are guaranteed that nobody else can call
> kvm_hv_eventfd_assign/unassign? (I assume so, just want to have it
> verified that we don't need the lock :) )

Yes, it's in kvm_destroy_vm, which won't get called until the kvm vm fd
is released, so no concurrency with kvm_vm_ioctl is possible.

> > +	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
> > +		eventfd_ctx_put(eventfd);
> > +	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
> > +}
> > +
> > +static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
> > +{
> > +	struct kvm_hv *hv = &kvm->arch.hyperv;
> > +	struct eventfd_ctx *eventfd;
> > +	int ret;
> > +
> > +	eventfd = eventfd_ctx_fdget(fd);
> > +	if (IS_ERR(eventfd))
> > +		return PTR_ERR(eventfd);
> > +
> > +	mutex_lock(&hv->hv_lock);
> > +	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
> > +			GFP_KERNEL);
> > +	mutex_unlock(&hv->hv_lock);
> > +
> > +	if (ret >= 0)
> > +		return 0;
> > +
> > +	if (ret == -ENOSPC)
> > +		ret = -EEXIST;
> > +	eventfd_ctx_put(eventfd);
> > +	return ret;
> > +}
> > +
> > +static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
> > +{
> > +	struct kvm_hv *hv = &kvm->arch.hyperv;
> 
> Guess we could move that inline without exceeding any line lengths, but
> just my opinion. (same applies in the function above where we already
> need two lines either way for idr_alloc)

Right, but it still reads easier with fewer dots and arrows IMHO.

Thanks,
Roman.