On Wed, Jun 10, 2009 at 07:23:25PM +0300, Izik Eidus wrote: > change the dirty page tracking to work with dirty bity instead of page fault. > right now the dirty page tracking work with the help of page faults, when we > want to track a page for being dirty, we write protect it and we mark it dirty > when we have write page fault, this code move into looking at the dirty bit > of the spte. > > Signed-off-by: Izik Eidus <ieidus@xxxxxxxxxx> > --- > arch/ia64/kvm/kvm-ia64.c | 4 +++ > arch/powerpc/kvm/powerpc.c | 4 +++ > arch/s390/kvm/kvm-s390.c | 4 +++ > arch/x86/include/asm/kvm_host.h | 3 ++ > arch/x86/kvm/mmu.c | 42 ++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/svm.c | 7 ++++++ > arch/x86/kvm/vmx.c | 7 ++++++ > arch/x86/kvm/x86.c | 26 ++++++++++++++++++++--- > include/linux/kvm_host.h | 1 + > virt/kvm/kvm_main.c | 6 ++++- > 10 files changed, 96 insertions(+), 8 deletions(-) > > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > index 3199221..5914128 100644 > --- a/arch/ia64/kvm/kvm-ia64.c > +++ b/arch/ia64/kvm/kvm-ia64.c > @@ -1809,6 +1809,10 @@ void kvm_arch_exit(void) > kvm_vmm_info = NULL; > } > > +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) > +{ > +} > + > static int kvm_ia64_sync_dirty_log(struct kvm *kvm, > struct kvm_dirty_log *log) > { > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index 2cf915e..6beb368 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -418,6 +418,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) > return -ENOTSUPP; > } > #ifndef KVM_ARCH_HAVE_DIRTY_LOG > +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) > +{ > +} > + #endif in virt/kvm/main.c > index c7b0cc2..8a24149 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -527,6 +527,7 @@ struct kvm_x86_ops { > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > int (*get_tdp_level)(void); > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > + int (*dirty_bit_support)(void); > }; > > extern struct kvm_x86_ops *kvm_x86_ops; > @@ -796,4 +797,6 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); > int kvm_age_hva(struct kvm *kvm, unsigned long hva); > int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); > > +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp); > + > #endif /* _ASM_X86_KVM_HOST_H */ > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 809cce0..500e0e2 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -140,6 +140,8 @@ module_param(oos_shadow, bool, 0644); > #define ACC_USER_MASK PT_USER_MASK > #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) > > +#define SPTE_DONT_DIRTY (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) > + > #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) > > struct kvm_rmap_desc { > @@ -629,6 +631,29 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) > return NULL; > } > > +int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp) > +{ > + u64 *spte; > + int dirty = 0; > + > + if (!shadow_dirty_mask) > + return 0; > + > + spte = rmap_next(kvm, rmapp, NULL); > + while (spte) { > + if (*spte & PT_DIRTY_MASK) { > + set_shadow_pte(spte, (*spte &= ~PT_DIRTY_MASK) | > + SPTE_DONT_DIRTY); > + dirty = 1; > + break; > + } > + spte = rmap_next(kvm, rmapp, spte); > + } > + > + return dirty; > +} > + > + > static int rmap_write_protect(struct kvm *kvm, u64 gfn) > { > unsigned long *rmapp; > @@ -1381,11 +1406,17 @@ static int mmu_zap_unsync_children(struct kvm *kvm, > static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) > { > int ret; > + int i; > + > ++kvm->stat.mmu_shadow_zapped; > ret = mmu_zap_unsync_children(kvm, sp); > kvm_mmu_page_unlink_children(kvm, sp); > kvm_mmu_unlink_parents(kvm, sp); > kvm_flush_remote_tlbs(kvm); > + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { > + if (sp->spt[i] & PT_DIRTY_MASK) > + mark_page_dirty(kvm, sp->gfns[i]); > + } Also need to transfer dirty bit in other places probably. > if (!sp->role.invalid && !sp->role.direct) > unaccount_shadowed(kvm, sp->gfn); > if (sp->unsync) > @@ -1676,7 +1707,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, > * whether the guest actually used the pte (in order to detect > * demand paging). > */ > - spte = shadow_base_present_pte | shadow_dirty_mask; > + spte = shadow_base_present_pte; > + if (!(spte & SPTE_DONT_DIRTY)) > + spte |= shadow_dirty_mask; > + > if (!speculative) > spte |= shadow_accessed_mask; > if (!dirty) > @@ -1725,8 +1759,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, > } > } > > - if (pte_access & ACC_WRITE_MASK) > - mark_page_dirty(vcpu->kvm, gfn); > + if (!shadow_dirty_mask) { > + if (pte_access & ACC_WRITE_MASK) > + mark_page_dirty(vcpu->kvm, gfn); > + } You can avoid the mark_page_dirty if SPTE_DONT_DIRTY? (which is a good idea, gfn_to_memslot_unaliased and friends show up high in profiling). > set_pte: > set_shadow_pte(shadow_pte, spte); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 37397f6..5b6351e 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -2724,6 +2724,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) > return 0; > } > > +static int svm_dirty_bit_support(void) > +{ > + return 1; > +} > + > static struct kvm_x86_ops svm_x86_ops = { > .cpu_has_kvm_support = has_svm, > .disabled_by_bios = is_disabled, > @@ -2785,6 +2790,8 @@ static struct kvm_x86_ops svm_x86_ops = { > .set_tss_addr = svm_set_tss_addr, > .get_tdp_level = get_npt_level, > .get_mt_mask = svm_get_mt_mask, > + > + .dirty_bit_support = svm_dirty_bit_support, > }; > > static int __init svm_init(void) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 673bcb3..8903314 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -3774,6 +3774,11 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) > return ret; > } > > +static int vmx_dirty_bit_support(void) > +{ > + return false; > +} > + > static struct kvm_x86_ops vmx_x86_ops = { > .cpu_has_kvm_support = cpu_has_kvm_support, > .disabled_by_bios = vmx_disabled_by_bios, > @@ -3833,6 +3838,8 @@ static struct kvm_x86_ops vmx_x86_ops = { > .set_tss_addr = vmx_set_tss_addr, > .get_tdp_level = get_ept_level, > .get_mt_mask = vmx_get_mt_mask, > + > + .dirty_bit_support = vmx_dirty_bit_support, > }; > > static int __init vmx_init(void) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 272e2e8..e06387c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1963,6 +1963,20 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, > return 0; > } > > +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) > +{ > + int i; > + > + spin_lock(&kvm->mmu_lock); > + for (i = 0; i < memslot->npages; ++i) { > + if (!test_bit(i, memslot->dirty_bitmap)) { > + if (is_dirty_and_clean_rmapp(kvm, &memslot->rmap[i])) > + set_bit(i, memslot->dirty_bitmap); > + } > + } > + spin_unlock(&kvm->mmu_lock); > +} > + > /* > * Get (and clear) the dirty memory log for a memory slot. > */ > @@ -1982,10 +1996,14 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, > > /* If nothing is dirty, don't bother messing with page tables. */ > if (is_dirty) { > - spin_lock(&kvm->mmu_lock); > - kvm_mmu_slot_remove_write_access(kvm, log->slot); > - spin_unlock(&kvm->mmu_lock); > - kvm_flush_remote_tlbs(kvm); > + if (!kvm_x86_ops->dirty_bit_support()) { > + spin_lock(&kvm->mmu_lock); > + /* remove_write_access() flush the tlb */ > + kvm_mmu_slot_remove_write_access(kvm, log->slot); > + spin_unlock(&kvm->mmu_lock); > + } else { > + kvm_flush_remote_tlbs(kvm); > + } > memslot = &kvm->memslots[log->slot]; > n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; > memset(memslot->dirty_bitmap, 0, n); > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index cdf1279..d1657a3 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -250,6 +250,7 @@ int kvm_dev_ioctl_check_extension(long ext); > > int kvm_get_dirty_log(struct kvm *kvm, > struct kvm_dirty_log *log, int *is_dirty); > +void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); > int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, > struct kvm_dirty_log *log); > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 3046e9c..a876231 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1135,8 +1135,11 @@ int __kvm_set_memory_region(struct kvm *kvm, > } > > /* Free page dirty bitmap if unneeded */ > - if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) > + if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) { > new.dirty_bitmap = NULL; > + if (old.flags & KVM_MEM_LOG_DIRTY_PAGES) > + kvm_arch_flush_shadow(kvm); > + } Whats this for? The idea of making dirty bit accessible (also can use it to map host ptes read-only, when guest fault is read-only, for KSM (*)) is good. But better first introduce infrastructure to handle dirty bit (making sure the bit is transferred properly), then have logging make use of it. * EPT violations do no transfer fault information to the page fault handler, but its available (there's a vm-exit field). > r = -ENOMEM; > > @@ -1279,6 +1282,7 @@ int kvm_get_dirty_log(struct kvm *kvm, > if (!memslot->dirty_bitmap) > goto out; > > + kvm_arch_get_dirty_log(kvm, memslot); > n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; > > for (i = 0; !any && i < n/sizeof(long); ++i) > -- > 1.5.6.5 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html