+Ben On Tue, Apr 06, 2021, Keqian Zhu wrote: > Hi Paolo, > > I plan to rework this patch and do full test. What do you think about this idea > (enable dirty logging for huge pages lazily)? Ben, don't you also have something similar (or maybe the exact opposite?) in the hopper? This sounds very familiar, but I can't quite connect the dots that are floating around my head... > PS: As dirty log of TDP MMU has been supported, I should add more code. > > On 2020/8/28 16:11, Keqian Zhu wrote: > > Currently during enable dirty logging, if we're with init-all-set, > > we just write protect huge pages and leave normal pages untouched, > > for that we can enable dirty logging for these pages lazily. > > > > It seems that enable dirty logging lazily for huge pages is feasible > > too, which not only reduces the time of start dirty logging, also > > greatly reduces side-effect on guest when there is high dirty rate. > > > > (These codes are not tested, for RFC purpose :-) ). > > > > Signed-off-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx> > > --- > > arch/x86/include/asm/kvm_host.h | 3 +- > > arch/x86/kvm/mmu/mmu.c | 65 ++++++++++++++++++++++++++------- > > arch/x86/kvm/vmx/vmx.c | 3 +- > > arch/x86/kvm/x86.c | 22 +++++------ > > 4 files changed, 62 insertions(+), 31 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 5303dbc5c9bc..201a068cf43d 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1296,8 +1296,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, > > > > void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); > > void kvm_mmu_slot_remove_write_access(struct kvm *kvm, > > - struct kvm_memory_slot *memslot, > > - int start_level); > > + struct kvm_memory_slot *memslot); > > void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, > > const struct kvm_memory_slot *memslot); > > void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > index 43fdb0c12a5d..4b7d577de6cd 100644 > > --- a/arch/x86/kvm/mmu/mmu.c > > +++ b/arch/x86/kvm/mmu/mmu.c > > @@ -1625,14 +1625,45 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) > > } > > > > /** > > - * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages > > + * kvm_mmu_write_protect_largepage_masked - write protect selected largepages > > * @kvm: kvm instance > > * @slot: slot to protect > > * @gfn_offset: start of the BITS_PER_LONG pages we care about > > * @mask: indicates which pages we should protect > > * > > - * Used when we do not need to care about huge page mappings: e.g. during dirty > > - * logging we do not have any such mappings. > > + * @ret: true if all pages are write protected > > + */ > > +static bool kvm_mmu_write_protect_largepage_masked(struct kvm *kvm, > > + struct kvm_memory_slot *slot, > > + gfn_t gfn_offset, unsigned long mask) > > +{ > > + struct kvm_rmap_head *rmap_head; > > + bool protected, all_protected; > > + gfn_t start_gfn = slot->base_gfn + gfn_offset; > > + int i; > > + > > + all_protected = true; > > + while (mask) { > > + protected = false; > > + for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { > > + rmap_head = __gfn_to_rmap(start_gfn + __ffs(mask), i, slot); > > + protectd |= __rmap_write_protect(kvm, rmap_head, false); > > + } > > + > > + all_protected &= protectd; > > + /* clear the first set bit */ > > + mask &= mask - 1; > > + } > > + > > + return all_protected; > > +} > > + > > +/** > > + * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages > > + * @kvm: kvm instance > > + * @slot: slot to protect > > + * @gfn_offset: start of the BITS_PER_LONG pages we care about > > + * @mask: indicates which pages we should protect > > */ > > static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, > > struct kvm_memory_slot *slot, > > @@ -1679,18 +1710,25 @@ EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked); > > > > /** > > * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected > > - * PT level pages. > > - * > > - * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to > > - * enable dirty logging for them. > > - * > > - * Used when we do not need to care about huge page mappings: e.g. during dirty > > - * logging we do not have any such mappings. > > + * dirty pages. > > */ > > void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, > > struct kvm_memory_slot *slot, > > gfn_t gfn_offset, unsigned long mask) > > { > > + /* > > + * If we're with initial-all-set, huge pages are NOT > > + * write protected when we start dirty log, so we must > > + * write protect them here. > > + */ > > + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) { > > + if (kvm_mmu_write_protect_largepage_masked(kvm, slot, > > + gfn_offset, mask)) > > + return; > > + } > > + > > + /* Then we can handle the 4K level pages */ > > + > > if (kvm_x86_ops.enable_log_dirty_pt_masked) > > kvm_x86_ops.enable_log_dirty_pt_masked(kvm, slot, gfn_offset, > > mask); > > @@ -5906,14 +5944,13 @@ static bool slot_rmap_write_protect(struct kvm *kvm, > > } > > > > void kvm_mmu_slot_remove_write_access(struct kvm *kvm, > > - struct kvm_memory_slot *memslot, > > - int start_level) > > + struct kvm_memory_slot *memslot) > > { > > bool flush; > > > > spin_lock(&kvm->mmu_lock); > > - flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, > > - start_level, KVM_MAX_HUGEPAGE_LEVEL, false); > > + flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect, > > + false); > > spin_unlock(&kvm->mmu_lock); > > > > /* > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > index 819c185adf09..ba871c52ef8b 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -7538,8 +7538,7 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) > > static void vmx_slot_enable_log_dirty(struct kvm *kvm, > > struct kvm_memory_slot *slot) > > { > > - if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) > > - kvm_mmu_slot_leaf_clear_dirty(kvm, slot); > > + kvm_mmu_slot_leaf_clear_dirty(kvm, slot); > > kvm_mmu_slot_largepage_remove_write_access(kvm, slot); > > } > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index d39d6cf1d473..c31c32f1424b 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -10225,22 +10225,18 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, > > * is enabled the D-bit or the W-bit will be cleared. > > */ > > if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { > > + /* > > + * If we're with initial-all-set, we don't need > > + * to write protect any page because they're > > + * reported as dirty already. > > + */ > > + if (kvm_dirty_log_manual_protect_and_init_set(kvm)) > > + return; > > + > > if (kvm_x86_ops.slot_enable_log_dirty) { > > kvm_x86_ops.slot_enable_log_dirty(kvm, new); > > } else { > > - int level = > > - kvm_dirty_log_manual_protect_and_init_set(kvm) ? > > - PG_LEVEL_2M : PG_LEVEL_4K; > > - > > - /* > > - * If we're with initial-all-set, we don't need > > - * to write protect any small page because > > - * they're reported as dirty already. However > > - * we still need to write-protect huge pages > > - * so that the page split can happen lazily on > > - * the first write to the huge page. > > - */ > > - kvm_mmu_slot_remove_write_access(kvm, new, level); > > + kvm_mmu_slot_remove_write_access(kvm, new); > > } > > } else { > > if (kvm_x86_ops.slot_disable_log_dirty) > >