Jon Cargille <jcargill@xxxxxxxxxx> writes: > From: David Matlack <dmatlack@xxxxxxxxxx> > > KVM_CAP_HALT_POLL is a per-VM capability that lets userspace > control the halt-polling time, allowing halt-polling to be tuned or > disabled on particular VMs. > > With dynamic halt-polling, a VM's VCPUs can poll from anywhere from > [0, halt_poll_ns] on each halt. KVM_CAP_HALT_POLL sets the > upper limit on the poll time. Out of pure curiosity, why is this a per-VM and not a per-VCPU property? > > Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> > Signed-off-by: Jon Cargille <jcargill@xxxxxxxxxx> > Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> > --- > Documentation/virt/kvm/api.rst | 17 +++++++++++++++++ > include/linux/kvm_host.h | 1 + > include/uapi/linux/kvm.h | 1 + > virt/kvm/kvm_main.c | 19 +++++++++++++++---- > 4 files changed, 34 insertions(+), 4 deletions(-) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index efbbe570aa9b7b..d871dacb984e98 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -5802,6 +5802,23 @@ If present, this capability can be enabled for a VM, meaning that KVM > will allow the transition to secure guest mode. Otherwise KVM will > veto the transition. > > +7.20 KVM_CAP_HALT_POLL > +---------------------- > + > +:Architectures: all > +:Target: VM > +:Parameters: args[0] is the maximum poll time in nanoseconds > +:Returns: 0 on success; -1 on error > + > +This capability overrides the kvm module parameter halt_poll_ns for the > +target VM. > + > +VCPU polling allows a VCPU to poll for wakeup events instead of immediately > +scheduling during guest halts. The maximum time a VCPU can spend polling is > +controlled by the kvm module parameter halt_poll_ns. This capability allows > +the maximum halt time to specified on a per-VM basis, effectively overriding > +the module parameter for the target VM. > + > 8. Other capabilities. > ====================== > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 6d58beb65454f7..922b24ce5e7297 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -503,6 +503,7 @@ struct kvm { > struct srcu_struct srcu; > struct srcu_struct irq_srcu; > pid_t userspace_pid; > + unsigned int max_halt_poll_ns; > }; > > #define kvm_err(fmt, ...) \ > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 428c7dde6b4b37..ac9eba0289d1b6 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_S390_VCPU_RESETS 179 > #define KVM_CAP_S390_PROTECTED 180 > #define KVM_CAP_PPC_SECURE_GUEST 181 > +#define KVM_CAP_HALT_POLL 182 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 74bdb7bf32952e..ec038a9e60a275 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -710,6 +710,8 @@ static struct kvm *kvm_create_vm(unsigned long type) > goto out_err_no_arch_destroy_vm; > } > > + kvm->max_halt_poll_ns = halt_poll_ns; > + > r = kvm_arch_init_vm(kvm, type); > if (r) > goto out_err_no_arch_destroy_vm; > @@ -2716,15 +2718,16 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) > if (!kvm_arch_no_poll(vcpu)) { > if (!vcpu_valid_wakeup(vcpu)) { > shrink_halt_poll_ns(vcpu); > - } else if (halt_poll_ns) { > + } else if (vcpu->kvm->max_halt_poll_ns) { > if (block_ns <= vcpu->halt_poll_ns) > ; > /* we had a long block, shrink polling */ > - else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) > + else if (vcpu->halt_poll_ns && > + block_ns > vcpu->kvm->max_halt_poll_ns) > shrink_halt_poll_ns(vcpu); > /* we had a short halt and our poll time is too small */ > - else if (vcpu->halt_poll_ns < halt_poll_ns && > - block_ns < halt_poll_ns) > + else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && > + block_ns < vcpu->kvm->max_halt_poll_ns) > grow_halt_poll_ns(vcpu); > } else { > vcpu->halt_poll_ns = 0; > @@ -3516,6 +3519,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) > case KVM_CAP_IOEVENTFD_ANY_LENGTH: > case KVM_CAP_CHECK_EXTENSION_VM: > case KVM_CAP_ENABLE_CAP_VM: > + case KVM_CAP_HALT_POLL: > return 1; > #ifdef CONFIG_KVM_MMIO > case KVM_CAP_COALESCED_MMIO: > @@ -3566,6 +3570,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, > return 0; > } > #endif > + case KVM_CAP_HALT_POLL: { > + if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) > + return -EINVAL; > + > + kvm->max_halt_poll_ns = cap->args[0]; Is it safe to allow any value from userspace here or would it maybe make sense to only allow [0, global halt_poll_ns]? > + return 0; > + } > default: > return kvm_vm_ioctl_enable_cap(kvm, cap); > } -- Vitaly