We had disscuss the idea here: https://www.spinics.net/lists/kvm/msg140593.html I think it's also suitable for other architectures. If the vcpu(me) exit due to request a usermode spinlock, then the spinlock-holder may be preempted in usermode or kernmode. But if the vcpu(me) is in kernmode, then the holder must be preempted in kernmode, so we should choose a vcpu in kernmode as the most eligible candidate. PS: I only implement X86 arch currently for I'm not familiar with other architecture. Signed-off-by: Longpeng(Mike) <longpeng2@xxxxxxxxxx> --- arch/mips/kvm/mips.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 5 +++++ arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm_host.h | 4 ++++ virt/kvm/arm/arm.c | 5 +++++ virt/kvm/kvm_main.c | 9 ++++++++- 7 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d4b2ad1..2e2701d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return !!(vcpu->arch.pending_exceptions); } +bool kvm_arch_vcpu_spin_kernmode(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1a75c0b..2489f64 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_vcpu_spin_kernmode(struct kvm_vcpu *vcpu) +{ + return false; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f2884e..9d7c42e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2443,6 +2443,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_s390_vcpu_has_irq(vcpu, 0); } +bool kvm_arch_vcpu_spin_kernmode(struct kvm_vcpu *vcpu) +{ + return false; +} + void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) { atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 82a63c5..b5a2e53 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8435,6 +8435,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_vcpu_spin_kernmode(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->get_cpl(vcpu) == 0; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 648b34c..f8f0d74 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -272,6 +272,9 @@ struct kvm_vcpu { } spin_loop; #endif bool preempted; + /* If vcpu is in kernel-mode when preempted */ + bool in_kernmode; + struct kvm_vcpu_arch arch; struct dentry *debugfs_dentry; }; @@ -797,6 +800,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_spin_kernmode(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); #ifndef __KVM_HAVE_ARCH_VM_ALLOC diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a39a1e1..ca6a394 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) && !v->arch.power_off && !v->arch.pause); } +bool kvm_arch_vcpu_spin_kernmode(struct kvm_vcpu *vcpu) +{ + return false; +} + /* Just ensure a guest exit from a particular CPU */ static void exit_vm_noop(void *info) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 82987d4..8d83caa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -290,6 +290,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; + vcpu->in_kernmode = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -2330,6 +2331,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) int pass; int i; + me->in_kernmode = kvm_arch_vcpu_spin_kernmode(me); kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not @@ -2351,6 +2353,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; + if (me->in_kernmode && !vcpu->in_kernmode) + continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; @@ -4009,8 +4013,11 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - if (current->state == TASK_RUNNING) + if (current->state == TASK_RUNNING) { vcpu->preempted = true; + vcpu->in_kernmode = kvm_arch_vcpu_spin_kernmode(vcpu); + } + kvm_arch_vcpu_put(vcpu); } -- 1.8.3.1