From: Nikunj A. Dadhania <nikunj@xxxxxxxxxxxxxxxxxx> flush_tlb_others_ipi depends on lot of statics in tlb.c. Replicated the flush_tlb_others_ipi as kvm_flush_tlb_others to further adapt to paravirtualization. Use the vcpu state information inside the kvm_flush_tlb_others to avoid sending ipi to pre-empted vcpus. * Do not send ipi's to offline vcpus and set flush_on_enter flag * For online vcpus: Wait for them to clear the flag The approach was discussed here: https://lkml.org/lkml/2012/2/20/157 v3: * use only one state variable for vcpu-running/flush_on_enter * use cmpxchg to update the state * adapt to Alex Shi's TLB flush optimization v2: * use ACCESS_ONCE so the value is not register cached * Separate HV and Guest code Suggested-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Signed-off-by: Nikunj A. Dadhania <nikunj@xxxxxxxxxxxxxxxxxx> -- Pseudo Algo: ------------ Hypervisor ========== guest_exit() if (!(xchg(state, NOT_IN_GUEST) == SHOULD_FLUSH)) tlb_flush(vcpu); guest_enter() if (!(xchg(state, IN_GUEST) == SHOULD_FLUSH)) tlb_flush(vcpu); Guest ===== flushcpumask = cpumask; for_each_cpu(i, flushmask) { state = vs->state; if(!test_bit(IN_GUEST_MODE, state)) { if (cmpxchg(&vs->state, state, state | (1 << SHOULD_FLUSH)) == SUCCESS) cpumask_clear_cpu(flushmask,i) } } smp_call_function_many(f->flushmask, flush_tlb_func) --- arch/x86/include/asm/tlbflush.h | 11 +++++++++++ arch/x86/kernel/kvm.c | 4 +++- arch/x86/mm/tlb.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 74a4433..0a343a1 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -119,6 +119,13 @@ static inline void native_flush_tlb_others(const struct cpumask *cpumask, { } +static inline void kvm_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ +} + static inline void reset_lazy_tlbstate(void) { } @@ -153,6 +160,10 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, unsigned long end); +void kvm_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long start, + unsigned long end); + #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 37e6599..b538a31 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -501,8 +501,10 @@ void __init kvm_guest_init(void) apic_set_eoi_write(kvm_guest_apic_eoi_write); #ifdef CONFIG_PARAVIRT_TLB_FLUSH - if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) + if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) { has_vcpu_state = 1; + pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; + } #endif #ifdef CONFIG_SMP diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 613cd83..645df99 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -6,6 +6,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/cpu.h> +#include <linux/kvm_para.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -119,6 +120,41 @@ static void flush_tlb_func(void *info) } +#ifdef CONFIG_KVM_GUEST + +DECLARE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64); + +void kvm_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + struct flush_tlb_info info; + struct kvm_vcpu_state *v_state; + u64 state; + int cpu; + cpumask_t flushmask; + + cpumask_copy(&flushmask, cpumask); + info.flush_mm = mm; + info.flush_start = start; + info.flush_end = end; + /* + * We have to call flush only on online vCPUs. And + * queue flush_on_enter for pre-empted vCPUs + */ + for_each_cpu(cpu, to_cpumask(&flushmask)) { + v_state = &per_cpu(vcpu_state, cpu); + state = v_state->state; + if (!test_bit(KVM_VCPU_STATE_IN_GUEST_MODE, &state)) { + if (cmpxchg(&v_state->state, state, state | 1 << KVM_VCPU_STATE_SHOULD_FLUSH)) + cpumask_clear_cpu(cpu, to_cpumask(&flushmask)); + } + } + + smp_call_function_many(&flushmask, flush_tlb_func, &info, 1); +} +#endif /* CONFIG_KVM_GUEST */ + void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, unsigned long end) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html