Lai Jiangshan once tried to rewrite kvm_mmu_slot_remove_write_access() using rmap: "kvm: rework remove-write-access for a slot" http://www.spinics.net/lists/kvm/msg35871.html One problem pointed out there was that this approach might hurt cache locality and make things slow down. But if we restrict the story to dirty logging, we notice that only small portion of pages are actually needed to be write protected. For example, I have confirmed that even when we are playing with tools like x11perf, dirty ratio of the frame buffer bitmap is almost always less than 10%. In the case of live-migration, we will see more sparseness in the usual workload because the RAM size is really big. So this patch uses his approach with small modification to use switched out dirty bitmap as a hint to restrict the rmap travel. We can also use this to selectively write protect pages to reduce unwanted page faults in the future. Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@xxxxxxxxxxxxx> Cc: Lai Jiangshan <laijs@xxxxxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu.c | 39 +++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 27 ++++++++++++++++++++++++--- 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b04c0fa..bc170e4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -617,6 +617,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); +void kvm_mmu_slot_remove_write_access_mask(struct kvm *kvm, + struct kvm_memory_slot *memslot, unsigned long *dirty_bitmap); void kvm_mmu_zap_all(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bdb9fa9..3506a64 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3454,6 +3454,45 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) kvm_flush_remote_tlbs(kvm); } +static void rmapp_remove_write_access(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte = rmap_next(kvm, rmapp, NULL); + + while (spte) { + /* avoid RMW */ + if (is_writable_pte(*spte)) + *spte &= ~PT_WRITABLE_MASK; + spte = rmap_next(kvm, rmapp, spte); + } +} + +/* + * Write protect the pages set dirty in a given bitmap. + */ +void kvm_mmu_slot_remove_write_access_mask(struct kvm *kvm, + struct kvm_memory_slot *memslot, + unsigned long *dirty_bitmap) +{ + int i; + unsigned long gfn_offset; + + for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { + rmapp_remove_write_access(kvm, &memslot->rmap[gfn_offset]); + + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; i++) { + unsigned long gfn = memslot->base_gfn + gfn_offset; + unsigned long huge = KVM_PAGES_PER_HPAGE(i + 2); + int idx = gfn / huge - memslot->base_gfn / huge; + + if (!(gfn_offset || (gfn % huge))) + break; + rmapp_remove_write_access(kvm, + &memslot->lpage_info[i][idx].rmap_pde); + } + } + kvm_flush_remote_tlbs(kvm); +} + void kvm_mmu_zap_all(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 038d719..3556b4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3194,12 +3194,27 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, } /* + * Check the dirty bit ratio of a given memslot. + * 0: clean + * 1: sparse + * 2: dense + */ +static int dirty_bitmap_density(struct kvm_memory_slot *memslot) +{ + if (!memslot->num_dirty_bits) + return 0; + if (memslot->num_dirty_bits < memslot->npages / 128) + return 1; + return 2; +} + +/* * Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r; + int r, density; struct kvm_memory_slot *memslot; unsigned long n; @@ -3217,7 +3232,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, n = kvm_dirty_bitmap_bytes(memslot); /* If nothing is dirty, don't bother messing with page tables. */ - if (memslot->num_dirty_bits) { + density = dirty_bitmap_density(memslot); + if (density) { struct kvm_memslots *slots, *old_slots; unsigned long *dirty_bitmap; @@ -3242,7 +3258,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, kfree(old_slots); spin_lock(&kvm->mmu_lock); - kvm_mmu_slot_remove_write_access(kvm, log->slot); + if (density == 2) + kvm_mmu_slot_remove_write_access(kvm, log->slot); + else + kvm_mmu_slot_remove_write_access_mask(kvm, + &slots->memslots[log->slot], + dirty_bitmap); spin_unlock(&kvm->mmu_lock); r = -EFAULT; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html