Thank you for your comments. I understood the scenario of why my previous change was unsafe. Also, I like the concept of KVM_VCPU_IN_HOST for tracking whether a vCPU is scheduled out because it can be helpful to understand the vCPU's situation from guests. I have tested the attached changes, but I found that the performance improvement was somewhat limited. The boundary-checking code prevents KVM from setting KVM_VCPU_IN_HOST and KVM_VCPU_PREEMPTED, which may be contributing to this limitation. I think this is a conservative approach to avoid using stale TLB entries. I referred to this patch: https://lore.kernel.org/lkml/20220614021116.1101331-1-sashal@xxxxxxxxxx/ Since PV waiting causes the most significant overhead, is it possible to allow guests to perform PV flush if vCPUs are PV waiting and scheduled out? > diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h > index a1efa7907a0b..e3a6e6ecf70b 100644 > --- a/arch/x86/include/uapi/asm/kvm_para.h > +++ b/arch/x86/include/uapi/asm/kvm_para.h > @@ -70,6 +70,7 @@ struct kvm_steal_time { > > #define KVM_VCPU_PREEMPTED (1 << 0) > #define KVM_VCPU_FLUSH_TLB (1 << 1) > +#define KVM_VCPU_IN_HOST (1 << 2) > > #define KVM_CLOCK_PAIRING_WALLCLOCK 0 > struct kvm_clock_pairing { > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index acdd72e89bb0..5e3dc209e86c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5018,8 +5018,11 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) > struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; > struct kvm_steal_time __user *st; > struct kvm_memslots *slots; > - static const u8 preempted = KVM_VCPU_PREEMPTED; > gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; > + u8 preempted = KVM_VCPU_IN_HOST; > + > + if (vcpu->preempted) > + preempted |= KVM_VCPU_PREEMPTED; > > /* > * The vCPU can be marked preempted if and only if the VM-Exit was on > @@ -5037,7 +5040,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) > if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) > return; > > - if (vcpu->arch.st.preempted) > + if (vcpu->arch.st.preempted == preempted) This code may clear KVM_VCPU_FLUSH_TLB. https://lore.kernel.org/lkml/20200803121849.869521189@xxxxxxxxxxxxxxxxxxx/ > return; > > /* This happens on process exit */ > @@ -5055,7 +5058,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) > BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); > > if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) > - vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; > + vcpu->arch.st.preempted = preempted; > > mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); > } > @@ -5064,7 +5067,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) > { > int idx; > > - if (vcpu->preempted) { > + if (vcpu->preempted || !kvm_xen_msr_enabled(vcpu->kvm)) { > /* > * Assume protected guests are in-kernel. Inefficient yielding > * due to false positives is preferable to never yielding due --- arch/x86/include/uapi/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 2 +- arch/x86/kvm/x86.c | 11 +++++++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index a1efa7907a0b..6ef06c3f234b 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -70,6 +70,7 @@ struct kvm_steal_time { #define KVM_VCPU_PREEMPTED (1 << 0) #define KVM_VCPU_FLUSH_TLB (1 << 1) +#define KVM_VCPU_IN_HOST (1 << 2) #define KVM_CLOCK_PAIRING_WALLCLOCK 0 struct kvm_clock_pairing { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 21e9e4845354..17ea1111a158 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -668,7 +668,7 @@ static void kvm_flush_tlb_multi(const struct cpumask *cpumask, */ src = &per_cpu(steal_time, cpu); state = READ_ONCE(src->preempted); - if ((state & KVM_VCPU_PREEMPTED)) { + if ((state & KVM_VCPU_PREEMPTED) || (state & KVM_VCPU_IN_HOST)) { if (try_cmpxchg(&src->preempted, &state, state | KVM_VCPU_FLUSH_TLB)) __cpumask_clear_cpu(cpu, flushmask); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c79a8cc57ba4..27b558ae1ad2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5019,8 +5019,11 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; struct kvm_steal_time __user *st; struct kvm_memslots *slots; - static const u8 preempted = KVM_VCPU_PREEMPTED; gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; + u8 preempted = KVM_VCPU_IN_HOST; + + if (vcpu->preempted) + preempted |= KVM_VCPU_PREEMPTED; /* * The vCPU can be marked preempted if and only if the VM-Exit was on @@ -5038,7 +5041,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (vcpu->arch.st.preempted) + if (vcpu->arch.st.preempted == preempted) return; /* This happens on process exit */ @@ -5056,7 +5059,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) - vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; + vcpu->arch.st.preempted = preempted; mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } @@ -5065,7 +5068,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted) { + if (vcpu->preempted || !kvm_xen_msr_enabled(vcpu->kvm)) { /* * Assume protected guests are in-kernel. Inefficient yielding * due to false positives is preferable to never yielding due -- 2.25.1