2015-01-28 10:54+0800, Kai Huang: > This patch adds new mmu layer functions to clear/set D-bit for memory slot, and > to write protect superpages for memory slot. > > In case of PML, CPU logs the dirty GPA automatically to PML buffer when CPU > updates D-bit from 0 to 1, therefore we don't have to write protect 4K pages, > instead, we only need to clear D-bit in order to log that GPA. > > For superpages, we still write protect it and let page fault code to handle > dirty page logging, as we still need to split superpage to 4K pages in PML. > > As PML is always enabled during guest's lifetime, to eliminate unnecessary PML > GPA logging, we set D-bit manually for the slot with dirty logging disabled. > > Signed-off-by: Kai Huang <kai.huang@xxxxxxxxxxxxxxx> This message contains just several stylistic tips, I wasn't able to learn enough about large pages to review today. > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index b18e65c..c438224 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -4368,6 +4448,121 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) > +void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, > + struct kvm_memory_slot *memslot) > +{ > + gfn_t last_gfn; > + int i; > + bool flush = false; > + > + last_gfn = memslot->base_gfn + memslot->npages - 1; > + > + spin_lock(&kvm->mmu_lock); > + > + for (i = PT_PAGE_TABLE_LEVEL + 1; /* skip rmap for 4K page */ ("+ 1" is the only difference from kvm_mmu_slot_remove_write_access(); new argument, initial level, would be better. Btw, PT_PAGE_TABLE_LEVEL + 1 == PT_DIRECTORY_LEVEL) > + i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { > + unsigned long *rmapp; > + unsigned long last_index, index; > + > + rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; > + last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); > + > + for (index = 0; index <= last_index; ++index, ++rmapp) { > + if (*rmapp) > + flush |= __rmap_write_protect(kvm, rmapp, > + false); > + > + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) > + cond_resched_lock(&kvm->mmu_lock); > + } > + } > + spin_unlock(&kvm->mmu_lock); > + > + /* see kvm_mmu_slot_remove_write_access */ > + lockdep_assert_held(&kvm->slots_lock); > + > + if (flush) > + kvm_flush_remote_tlbs(kvm); > +} > +void kvm_mmu_slot_set_dirty(struct kvm *kvm, > + struct kvm_memory_slot *memslot) > +{ > + gfn_t last_gfn; > + int i; > + bool flush = false; > + > + last_gfn = memslot->base_gfn + memslot->npages - 1; > + > + spin_lock(&kvm->mmu_lock); > + > + for (i = PT_PAGE_TABLE_LEVEL; > + i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { > + unsigned long *rmapp; > + unsigned long last_index, index; > + > + rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; > + last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); > + > + for (index = 0; index <= last_index; ++index, ++rmapp) { > + if (*rmapp) > + flush |= __rmap_set_dirty(kvm, rmapp); (And yet another similar walker ... We can either pass a worker function accepting 'kvm' and 'rmapp', use a 'switch' with a new operations enum, or have code duplication. Paolo?) > + > + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) > + cond_resched_lock(&kvm->mmu_lock); > + } > + } > + > + spin_unlock(&kvm->mmu_lock); > + > + lockdep_assert_held(&kvm->slots_lock); > + > + /* see kvm_mmu_slot_leaf_clear_dirty */ > + if (flush) > + kvm_flush_remote_tlbs(kvm); > +} > +EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); > +void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, > + struct kvm_memory_slot *memslot) > +{ > + gfn_t last_gfn; > + unsigned long *rmapp; > + unsigned long last_index, index; > + bool flush = false; > + > + last_gfn = memslot->base_gfn + memslot->npages - 1; > + > + spin_lock(&kvm->mmu_lock); > + (If we abstracted with switch or function, we could also add max level argument to cover this function.) > + rmapp = memslot->arch.rmap[PT_PAGE_TABLE_LEVEL - 1]; > + last_index = gfn_to_index(last_gfn, memslot->base_gfn, > + PT_PAGE_TABLE_LEVEL); > + > + for (index = 0; index <= last_index; ++index, ++rmapp) { > + if (*rmapp) > + flush |= __rmap_clear_dirty(kvm, rmapp); > + > + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) > + cond_resched_lock(&kvm->mmu_lock); > + } > + > + spin_unlock(&kvm->mmu_lock); > + > + lockdep_assert_held(&kvm->slots_lock); > + > + /* > + * It's also safe to flush TLBs out of mmu lock here as currently this > + * function is only used for dirty logging, in which case flushing TLB > + * out of mmu lock also guarantees no dirty pages will be lost in > + * dirty_bitmap. > + */ > + if (flush) > + kvm_flush_remote_tlbs(kvm); > +} > +EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty); > @@ -1215,6 +1215,60 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, (After looking at following two pairs of functions, you'll hopefully understand why I don't like C very much.) > +static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep) > +{ > + u64 spte = *sptep; > + > + rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep); > + > + spte &= ~shadow_dirty_mask; > + > + return mmu_spte_update(sptep, spte); > +} > +static bool spte_set_dirty(struct kvm *kvm, u64 *sptep) > +{ > + u64 spte = *sptep; > + > + rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep); > + > + spte |= shadow_dirty_mask; > + > + return mmu_spte_update(sptep, spte); > +} > +static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) > +{ > + u64 *sptep; > + struct rmap_iterator iter; > + bool flush = false; > + > + for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { > + BUG_ON(!(*sptep & PT_PRESENT_MASK)); > + > + flush |= spte_clear_dirty(kvm, sptep); > + sptep = rmap_get_next(&iter); > + } > + > + return flush; > +} > +static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) > +{ > + u64 *sptep; > + struct rmap_iterator iter; > + bool flush = false; > + > + for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { > + BUG_ON(!(*sptep & PT_PRESENT_MASK)); > + > + flush |= spte_set_dirty(kvm, sptep); > + sptep = rmap_get_next(&iter); > + } > + > + return flush; > +} -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html