Re: [PATCH v2 3/4] KVM: Dirty memory tracking for performant checkpointing solutions

Wanpeng Li <kernellwp@xxxxxxxxx> · Thu, 5 Jan 2017 11:01:27 +0800

Hi Cao,
2017-01-05 4:44 GMT+08:00 Cao, Lei <Lei.Cao@xxxxxxxxxxx>:
> Implement dirty list full forcing vcpus to exit.
>

I saw your presentation slides in kvm forum, you mentioned that "Cpu
throttling may not be effective for some workloads where memory write
speed is not dependent on CPU execution speed", could you point out
which kinds of workload where memory write speed is not dependent on
CPU execution speed? If the memory is mainly dirtied by DMA or
something else in this workload?

Regards,
Wanpeng Li

> Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  7 +++++++
>  arch/x86/kvm/mmu.c              |  7 +++++++
>  arch/x86/kvm/vmx.c              |  7 +++++++
>  arch/x86/kvm/x86.c              | 10 ++++++++++
>  include/linux/kvm_host.h        |  1 +
>  include/uapi/linux/kvm.h        |  1 +
>  virt/kvm/kvm_main.c             | 36 ++++++++++++++++++++++++++++++++++++
>  7 files changed, 69 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6dfb14a..20a9fc8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -75,6 +75,7 @@
>  #define KVM_REQ_HV_RESET          28
>  #define KVM_REQ_HV_EXIT           29
>  #define KVM_REQ_HV_STIMER         30
> +#define KVM_REQ_EXIT_DIRTY_LOG_FULL 31
>
>  #define CR0_RESERVED_BITS                                               \
>         (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> @@ -997,6 +998,8 @@ struct kvm_x86_ops {
>          *  - enable_log_dirty_pt_masked:
>          *      called when reenabling log dirty for the GFNs in the mask after
>          *      corresponding bits are cleared in slot->dirty_bitmap.
> +        *  - cpu_dirty_log_size:
> +        *      called to inquire about the size of the hardware dirty log
>          */
>         void (*slot_enable_log_dirty)(struct kvm *kvm,
>                                       struct kvm_memory_slot *slot);
> @@ -1006,6 +1009,8 @@ struct kvm_x86_ops {
>         void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
>                                            struct kvm_memory_slot *slot,
>                                            gfn_t offset, unsigned long mask);
> +       int (*cpu_dirty_log_size)(void);
> +
>         /* pmu operations of sub-arch */
>         const struct kvm_pmu_ops *pmu_ops;
>
> @@ -1388,6 +1393,8 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
>  void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
>                      struct kvm_lapic_irq *irq);
>
> +int kvm_mt_cpu_dirty_log_size(void);
> +
>  static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
>  {
>         if (kvm_x86_ops->vcpu_blocking)
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 7012de4..e0668a0 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -4980,6 +4980,13 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
>         }
>  }
>
> +int kvm_mt_cpu_dirty_log_size(void)
> +{
> +       if (kvm_x86_ops->cpu_dirty_log_size)
> +               return kvm_x86_ops->cpu_dirty_log_size();
> +       return 0;
> +}
> +
>  static unsigned long
>  mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>  {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ba20b00..76f88b0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6729,6 +6729,7 @@ static __init int hardware_setup(void)
>                 kvm_x86_ops->slot_disable_log_dirty = NULL;
>                 kvm_x86_ops->flush_log_dirty = NULL;
>                 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
> +               kvm_x86_ops->cpu_dirty_log_size = NULL;
>         }
>
>         if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
> @@ -11503,6 +11504,11 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
>                         ~FEATURE_CONTROL_LMCE;
>  }
>
> +static int vmx_cpu_dirty_log_size(void)
> +{
> +       return PML_ENTITY_NUM;
> +}
> +
>  static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
>         .cpu_has_kvm_support = cpu_has_kvm_support,
>         .disabled_by_bios = vmx_disabled_by_bios,
> @@ -11617,6 +11623,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
>         .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
>         .flush_log_dirty = vmx_flush_log_dirty,
>         .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> +       .cpu_dirty_log_size = vmx_cpu_dirty_log_size,
>
>         .pre_block = vmx_pre_block,
>         .post_block = vmx_post_block,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5707129..e2f4cee 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6714,6 +6714,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                  */
>                 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
>                         kvm_hv_process_stimers(vcpu);
> +               if (kvm_check_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu)) {
> +                       vcpu->run->exit_reason = KVM_EXIT_DIRTY_LOG_FULL;
> +                       r = -EINTR;
> +                       if (vcpu->need_exit) {
> +                               vcpu->need_exit = false;
> +                               kvm_make_all_cpus_request(vcpu->kvm,
> +                                       KVM_REQ_EXIT_DIRTY_LOG_FULL);
> +                       }
> +                       goto out;
> +               }
>         }
>
>         /*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7a85b30..b7fedeb 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -283,6 +283,7 @@ struct kvm_vcpu {
>         struct dentry *debugfs_dentry;
>  #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
>         struct gfn_list_t *dirty_logs;
> +       bool need_exit;
>  #endif
>  };
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 05332de..bacb8db 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -205,6 +205,7 @@ struct kvm_hyperv_exit {
>  #define KVM_EXIT_S390_STSI        25
>  #define KVM_EXIT_IOAPIC_EOI       26
>  #define KVM_EXIT_HYPERV           27
> +#define KVM_EXIT_DIRTY_LOG_FULL   28
>
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index bff980c..00d7989 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -270,6 +270,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
>                 }
>                 vcpu->dirty_logs = page_address(page);
>         }
> +       vcpu->need_exit = false;
>  #endif
>
>         kvm_vcpu_set_in_spin_loop(vcpu, false);
> @@ -3030,6 +3031,29 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
>  }
>
>  #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +static void kvm_mt_dirty_log_full(struct kvm *kvm, struct kvm_vcpu *vcpu)
> +{
> +       /*
> +        * Request vcpu exits, but if interrupts are disabled, we have
> +        * to defer the requests because smp_call_xxx may deadlock when
> +        * called that way.
> +        */
> +       if (vcpu && irqs_disabled()) {
> +               kvm_make_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu);
> +               vcpu->need_exit = true;
> +       } else {
> +               WARN_ON(irqs_disabled());
> +               kvm_make_all_cpus_request(kvm,
> +                                         KVM_REQ_EXIT_DIRTY_LOG_FULL);
> +       }
> +}
> +
> +/*
> + * estimated number of pages being dirtied during vcpu exit, not counting
> + * hardware dirty log (PML) flush
> + */
> +#define KVM_MT_DIRTY_PAGE_NUM_EXTRA 128
> +
>  void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
>         struct kvm_vcpu *vcpu, gfn_t gfn)
>  {
> @@ -3037,6 +3061,7 @@ void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
>         int slot_id;
>         u32 as_id = 0;
>         u64 offset;
> +       u32 extra = KVM_MT_DIRTY_PAGE_NUM_EXTRA;
>
>         if (!slot || !slot->dirty_bitmap || !kvm->dirty_log_size)
>                 return;
> @@ -3068,6 +3093,17 @@ void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
>         gfnlist->dirty_gfns[gfnlist->dirty_index].offset = offset;
>         smp_wmb();
>         gfnlist->dirty_index++;
> +
> +       /*
> +        * more pages will be dirtied during vcpu exit, e.g. pml log
> +        * being flushed. So allow some buffer space.
> +        */
> +       if (vcpu)
> +               extra += kvm_mt_cpu_dirty_log_size();
> +
> +       if (gfnlist->dirty_index == (kvm->max_dirty_logs - extra))
> +               kvm_mt_dirty_log_full(kvm, vcpu);
> +
>         if (!vcpu)
>                 spin_unlock(&kvm->dirty_log_lock);
>  }
> --
> 2.5.0
>
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html