On 07.12.2017 15:18, Roman Kagan wrote: > In Hyper-V, the fast guest->host notification mechanism is the > SIGNAL_EVENT hypercall, with a single parameter of the connection ID to > signal. > > Currently this hypercall incurs a user exit and requires the userspace > to decode the parameters and trigger the notification of the potentially > different I/O context. > > To avoid the costly user exit, process this hypercall and signal the > corresponding eventfd in KVM, similar to ioeventfd. The association > between the connection id and the eventfd is established via the newly > introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an > (srcu-protected) IDR. > > Signed-off-by: Roman Kagan <rkagan@xxxxxxxxxxxxx> > --- > v1 -> v2: > - make data types consistent > - get by without the recently dropped struct hv_input_signal_event > - fix subject prefix > > Documentation/virtual/kvm/api.txt | 23 ++++++++ > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/hyperv.h | 1 + > include/uapi/linux/kvm.h | 13 +++++ > arch/x86/kvm/hyperv.c | 115 +++++++++++++++++++++++++++++++++++++- > arch/x86/kvm/x86.c | 10 ++++ > 6 files changed, 163 insertions(+), 1 deletion(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index f670e4b9e7f3..e4f319add8b7 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3394,6 +3394,29 @@ invalid, if invalid pages are written to (e.g. after the end of memory) > or if no page table is present for the addresses (e.g. when using > hugepages). > > +4.109 KVM_HYPERV_EVENTFD > + > +Capability: KVM_CAP_HYPERV_EVENTFD > +Architectures: x86 > +Type: vm ioctl > +Parameters: struct kvm_hyperv_eventfd (in) > +Returns: 0 on success, !0 on error > + > +This ioctl (un)registers an eventfd to receive notifications from the guest on > +the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without > +causing a user exit. > + > +struct kvm_hyperv_eventfd { > + __u32 conn_id; > + __s32 fd; > + __u32 flags; > + __u32 padding[3]; > +}; > + > +The acceptable values for the flags field: > + > +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) Do you also want to document the conn_id valid values? And for both cases, what happens if other values are provided. (-EINVALID) > + > 5. The kvm_run structure > ------------------------ > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 977de5fb968b..d2d318540c87 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -739,6 +739,8 @@ struct kvm_hv { > u64 hv_crash_ctl; > > HV_REFERENCE_TSC_PAGE tsc_ref; > + > + struct idr conn_to_evt; > }; > > enum kvm_irqchip_mode { > diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h > index cc2468244ca2..837465d69c6d 100644 > --- a/arch/x86/kvm/hyperv.h > +++ b/arch/x86/kvm/hyperv.h > @@ -90,5 +90,6 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, > > void kvm_hv_init_vm(struct kvm *kvm); > void kvm_hv_destroy_vm(struct kvm *kvm); > +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); > > #endif > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 282d7613fce8..465f45c13cdc 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_HYPERV_SYNIC2 148 > #define KVM_CAP_HYPERV_VP_INDEX 149 > #define KVM_CAP_S390_AIS_MIGRATION 150 > +#define KVM_CAP_HYPERV_EVENTFD 151 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1359,6 +1360,8 @@ struct kvm_s390_ucas_mapping { > #define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) > #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) > > +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xba, struct kvm_hyperv_eventfd) > + > #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) > #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) > #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) > @@ -1419,4 +1422,14 @@ struct kvm_assigned_msix_entry { > #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) > #define KVM_ARM_DEV_PMU (1 << 2) > > +struct kvm_hyperv_eventfd { > + __u32 conn_id; > + __s32 fd; > + __u32 flags; > + __u32 padding[3]; > +}; > + > +#define KVM_HYPERV_CONN_ID_BITS 24 You could directly use/introduce KVM_HYPERV_CONN_ID_MASK instead, so no need to do calculations with this value below. > +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) > + > #endif /* __LINUX_KVM_H */ > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index 015fb06c7522..3b5d4203696b 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -29,6 +29,7 @@ > #include <linux/kvm_host.h> > #include <linux/highmem.h> > #include <linux/sched/cputime.h> > +#include <linux/eventfd.h> > > #include <asm/apicdef.h> > #include <trace/events/kvm.h> > @@ -1226,6 +1227,54 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) > return 1; > } > > +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *param) > +{ > + struct page *page; > + void *pg; > + u64 *msg; > + > + if ((gpa & (__alignof__(*msg) - 1)) || > + offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE) > + return HV_STATUS_INVALID_ALIGNMENT; > + > + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); > + if (is_error_page(page)) > + return HV_STATUS_INSUFFICIENT_MEMORY; > + > + pg = kmap_atomic(page); > + msg = pg + offset_in_page(gpa); > + *param = *msg; > + kunmap_atomic(pg); > + return HV_STATUS_SUCCESS; > +} > + > +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) > +{ > + u16 ret; > + u32 conn_id; > + int idx; > + struct eventfd_ctx *eventfd; > + > + if (unlikely(!fast)) { > + u64 gpa = param; > + ret = hvcall_sigevent_param(vcpu, gpa, ¶m); > + if (ret != HV_STATUS_SUCCESS) > + return ret; > + } > + > + conn_id = (param & 0xffffffff) + ((param >> 32) & 0xffff); > + if (conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1)) > + return HV_STATUS_INVALID_CONNECTION_ID; > + > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id); > + if (eventfd) > + eventfd_signal(eventfd, 1); > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + > + return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID; > +} > + > int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > { > u64 param, ingpa, outgpa, ret; > @@ -1276,8 +1325,12 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > case HVCALL_NOTIFY_LONG_SPIN_WAIT: > kvm_vcpu_on_spin(vcpu, true); > break; > - case HVCALL_POST_MESSAGE: > case HVCALL_SIGNAL_EVENT: > + res = kvm_hvcall_signal_event(vcpu, fast, ingpa); > + if (res != HV_STATUS_INVALID_CONNECTION_ID) > + break; > + /* maybe userspace knows this conn_id: fall through */ > + case HVCALL_POST_MESSAGE: > /* don't bother userspace if it has no way to handle it */ > if (!vcpu_to_synic(vcpu)->active) { > res = HV_STATUS_INVALID_HYPERCALL_CODE; > @@ -1305,8 +1358,68 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > void kvm_hv_init_vm(struct kvm *kvm) > { > mutex_init(&kvm->arch.hyperv.hv_lock); > + idr_init(&kvm->arch.hyperv.conn_to_evt); > } > > void kvm_hv_destroy_vm(struct kvm *kvm) > { > + int i; I'd declare i last. > + struct eventfd_ctx *eventfd; > + > + idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) > + eventfd_ctx_put(eventfd); > + idr_destroy(&kvm->arch.hyperv.conn_to_evt); > +} > + > +static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) > +{ > + int ret; dito for ret. > + struct eventfd_ctx *eventfd; > + struct kvm_hv *hv = &kvm->arch.hyperv; > + > + eventfd = eventfd_ctx_fdget(fd); > + if (IS_ERR(eventfd)) > + return PTR_ERR(eventfd); > + > + mutex_lock(&hv->hv_lock); > + ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, > + GFP_KERNEL); > + mutex_unlock(&hv->hv_lock); > + > + if (ret >= 0) > + return 0; > + > + if (ret == -ENOSPC) > + ret = -EEXIST; > + eventfd_ctx_put(eventfd); > + return ret; > +} > + > +static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) > +{ > + int ret; dito > + struct eventfd_ctx *eventfd; > + struct kvm_hv *hv = &kvm->arch.hyperv; > + > + mutex_lock(&hv->hv_lock); > + eventfd = idr_remove(&hv->conn_to_evt, conn_id); > + mutex_unlock(&hv->hv_lock); > + > + if (!eventfd) > + return -ENOENT; > + > + synchronize_srcu(&kvm->srcu); > + eventfd_ctx_put(eventfd); > + return ret; > +} > + > +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) > +{ > + if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || > + (args->conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1))) > + return -EINVAL; > + > + return args->flags == KVM_HYPERV_EVENTFD_DEASSIGN ? > + kvm_hv_eventfd_deassign(kvm, args->conn_id) : > + kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); I'd prefer if(..) return X return Y same number of lines but improved readability. -- Thanks, David / dhildenb