Re: [PATCH v2] KVM: x86: enable dirty log gradually in small chunks

Peter Xu <peterx@xxxxxxxxxx> · Fri, 21 Feb 2020 10:41:06 -0500

On Fri, Feb 21, 2020 at 09:53:51AM +0000, Zhoujian (jay) wrote:
> 
> 
> > -----Original Message-----
> > From: Peter Xu [mailto:peterx@xxxxxxxxxx]
> > Sent: Friday, February 21, 2020 3:28 AM
> > To: Zhoujian (jay) <jianjay.zhou@xxxxxxxxxx>
> > Cc: kvm@xxxxxxxxxxxxxxx; pbonzini@xxxxxxxxxx; wangxin (U)
> > <wangxinxin.wang@xxxxxxxxxx>; Huangweidong (C)
> > <weidong.huang@xxxxxxxxxx>; sean.j.christopherson@xxxxxxxxx; Liujinsong
> > (Paul) <liu.jinsong@xxxxxxxxxx>
> > Subject: Re: [PATCH v2] KVM: x86: enable dirty log gradually in small chunks
> > 
> > On Thu, Feb 20, 2020 at 12:28:28PM +0800, Jay Zhou wrote:
> > > @@ -5865,8 +5865,12 @@ void
> > kvm_mmu_slot_remove_write_access(struct kvm *kvm,
> > >  	bool flush;
> > >
> > >  	spin_lock(&kvm->mmu_lock);
> > > -	flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
> > > -				      false);
> > > +	if (kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET)
> > > +		flush = slot_handle_large_level(kvm, memslot,
> > > +						slot_rmap_write_protect, false);
> > > +	else
> > > +		flush = slot_handle_all_level(kvm, memslot,
> > > +						slot_rmap_write_protect, false);
> > 
> > Another extra comment:
> > 
> > I think we should still keep the old behavior for KVM_MEM_READONLY (in
> > kvm_mmu_slot_apply_flags())) for this...  
> 
> I also realized this issue after posting this patch, and I agree.
> 
> > Say, instead of doing this, maybe we
> > want kvm_mmu_slot_remove_write_access() to take a new parameter to
> > decide to which level we do the wr-protect.
> 
> How about using the "flags" field to distinguish:
> 
> 		if ((kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET)
>                 && (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES))
>                 flush = slot_handle_large_level(kvm, memslot,
>                                         slot_rmap_write_protect, false);
>         else
>                 flush = slot_handle_all_level(kvm, memslot,
>                                         slot_rmap_write_protect, false);

This seems to be OK too.  But just to show what I meant (which I still
think could be a bit clearer; assuming kvm_manual_dirty_log_init_set()
is the helper you'll introduce):

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 40a0c0fd95ca..a90630cde92d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1312,7 +1312,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,

 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot);
+                                     struct kvm_memory_slot *memslot,
+                                     int start_level);
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 87e9ba27ada1..f538b7977fa2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5860,13 +5860,14 @@ static bool slot_rmap_write_protect(struct kvm *kvm,
 }

 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot)
+                                     struct kvm_memory_slot *memslot,
+                                     int start_level)
 {
        bool flush;

        spin_lock(&kvm->mmu_lock);
-       flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
-                                     false);
+       flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
+                                 start_level, PT_MAX_HUGEPAGE_LEVEL, false);
        spin_unlock(&kvm->mmu_lock);

        /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb5d64ebc35d..2ed3204dfd9f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9956,7 +9956,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 {
        /* Still write protect RO slot */
        if (new->flags & KVM_MEM_READONLY) {
-               kvm_mmu_slot_remove_write_access(kvm, new);
+               kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL);
                return;
        }

@@ -9993,8 +9993,20 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
        if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                if (kvm_x86_ops->slot_enable_log_dirty)
                        kvm_x86_ops->slot_enable_log_dirty(kvm, new);
-               else
-                       kvm_mmu_slot_remove_write_access(kvm, new);
+               else {
+                       int level = kvm_manual_dirty_log_init_set(kvm) ?
+                           PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL;
+
+                       /*
+                        * If we're with intial-all-set, we don't need
+                        * to write protect any small page because
+                        * they're reported as dirty already.  However
+                        * we still need to write-protect huge pages
+                        * so that the page split can happen lazily on
+                        * the first write to the huge page.
+                        */
+                       kvm_mmu_slot_remove_write_access(kvm, new, level);
+               }
        } else {
                if (kvm_x86_ops->slot_disable_log_dirty)
                        kvm_x86_ops->slot_disable_log_dirty(kvm, new);

Thanks,

-- 
Peter Xu