On Mon, Aug 06, 2012 at 08:06:03PM +1000, Paul Mackerras wrote: > >From 44067a8ee15021583636bea4cc1d47e5370b8397 Mon Sep 17 00:00:00 2001 > From: Paul Mackerras <paulus@xxxxxxxxx> > Date: Mon, 30 Jul 2012 16:40:54 +1000 > Subject: > > At present the HV style of KVM doesn't handle deletion or modification > of memory slots correctly. Deletion occurs when userspace does the > KVM_SET_USER_MEMORY_REGION with arguments specifying a new length of > zero for a slot that contains memory. Modification occurs when the > arguments specify a new guest_phys_addr or flags. > > To allow the HV code to know which operation (creation, deletion or > modification) is being requested, it needs to see the old and new > contents of the memslot. kvm_arch_prepare_memory_region has this > information, so we modify it to pass it down to > kvmppc_core_prepare_memory_region. We then modify the HV version > of that to check which operation is being performed. If it is a > deletion, we call a new function kvmppc_unmap_memslot to remove any > HPT (hashed page table) entries referencing the memory being removed. > Similarly, if we are modifying the guest physical address we also > remove any HPT entries. If the KVM_MEM_LOG_DIRTY_PAGES flag is now > set and wasn't before, we call kvmppc_hv_get_dirty_log to clear any > dirty bits so we can detect all modifications from now on. > > Signed-off-by: Paul Mackerras <paulus@xxxxxxxxx> > --- > arch/powerpc/include/asm/kvm_ppc.h | 4 +++ > arch/powerpc/kvm/book3s_64_mmu_hv.c | 27 ++++++++++++++-- > arch/powerpc/kvm/book3s_hv.c | 61 +++++++++++++++++++++++------------ > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- > arch/powerpc/kvm/book3s_pr.c | 2 ++ > arch/powerpc/kvm/booke.c | 2 ++ > arch/powerpc/kvm/powerpc.c | 2 +- > 7 files changed, 76 insertions(+), 24 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index 0124937..044c921 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -140,11 +140,15 @@ extern void kvm_release_hpt(struct kvmppc_linear_info *li); > extern int kvmppc_core_init_vm(struct kvm *kvm); > extern void kvmppc_core_destroy_vm(struct kvm *kvm); > extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, > + struct kvm_memory_slot *memslot, > + struct kvm_memory_slot *old, > struct kvm_userspace_memory_region *mem); > extern void kvmppc_core_commit_memory_region(struct kvm *kvm, > struct kvm_userspace_memory_region *mem); > extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, > struct kvm_ppc_smmu_info *info); > +extern void kvmppc_unmap_memslot(struct kvm *kvm, > + struct kvm_memory_slot *memslot); > > extern int kvmppc_bookehv_init(void); > extern void kvmppc_bookehv_exit(void); > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c > index 3c635c0..87735a7 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c > @@ -850,7 +850,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, > psize = hpte_page_size(hptep[0], ptel); > if ((hptep[0] & HPTE_V_VALID) && > hpte_rpn(ptel, psize) == gfn) { > - hptep[0] |= HPTE_V_ABSENT; > + if (kvm->arch.using_mmu_notifiers) > + hptep[0] |= HPTE_V_ABSENT; > kvmppc_invalidate_hpte(kvm, hptep, i); > /* Harvest R and C */ > rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); > @@ -877,6 +878,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) > return 0; > } > > +void kvmppc_unmap_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) > +{ > + unsigned long *rmapp; > + unsigned long gfn; > + unsigned long n; > + > + rmapp = memslot->rmap; > + gfn = memslot->base_gfn; > + for (n = memslot->npages; n; --n) { > + /* > + * Testing the present bit without locking is OK because > + * the memslot has been marked invalid already, and hence > + * no new HPTEs referencing this page can be created, > + * thus the present bit can't go from 0 to 1. > + */ > + if (*rmapp & KVMPPC_RMAP_PRESENT) > + kvm_unmap_rmapp(kvm, rmapp, gfn); > + ++rmapp; > + ++gfn; > + } > +} > + > static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, > unsigned long gfn) > { > @@ -1039,7 +1062,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) > rmapp = memslot->rmap; > map = memslot->dirty_bitmap; > for (i = 0; i < memslot->npages; ++i) { > - if (kvm_test_clear_dirty(kvm, rmapp)) > + if (kvm_test_clear_dirty(kvm, rmapp) && map) > __set_bit_le(i, map); > ++rmapp; > } > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 83e929e..aad20ca0 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -1299,26 +1299,6 @@ static unsigned long slb_pgsize_encoding(unsigned long psize) > return senc; > } > > -int kvmppc_core_prepare_memory_region(struct kvm *kvm, > - struct kvm_userspace_memory_region *mem) > -{ > - unsigned long npages; > - unsigned long *phys; > - > - /* Allocate a slot_phys array */ > - phys = kvm->arch.slot_phys[mem->slot]; > - if (!kvm->arch.using_mmu_notifiers && !phys) { > - npages = mem->memory_size >> PAGE_SHIFT; > - phys = vzalloc(npages * sizeof(unsigned long)); > - if (!phys) > - return -ENOMEM; > - kvm->arch.slot_phys[mem->slot] = phys; > - kvm->arch.slot_npages[mem->slot] = npages; > - } > - > - return 0; > -} > - > static void unpin_slot(struct kvm *kvm, int slot_id) > { > unsigned long *physp; > @@ -1343,6 +1323,47 @@ static void unpin_slot(struct kvm *kvm, int slot_id) > } > } > > +int kvmppc_core_prepare_memory_region(struct kvm *kvm, > + struct kvm_memory_slot *memslot, > + struct kvm_memory_slot *old, > + struct kvm_userspace_memory_region *mem) > +{ > + unsigned long npages; > + unsigned long *phys; > + > + /* Are we creating, deleting, or modifying the slot? */ > + if (!memslot->npages) { > + /* deleting */ > + kvmppc_unmap_memslot(kvm, old); > + if (!kvm->arch.using_mmu_notifiers) > + unpin_slot(kvm, mem->slot); > + return 0; > + } The !memslot->npages case is handled in __kvm_set_memory_region (please read that part, before kvm_arch_prepare_memory_region() call). kvm_arch_flush_shadow should be implemented. > + if (old->npages) { > + /* modifying guest_phys or flags */ > + if (old->base_gfn != memslot->base_gfn) > + kvmppc_unmap_memslot(kvm, old); This case is also handled generically by the last kvm_arch_flush_shadow call in __kvm_set_memory_region. > + if (memslot->dirty_bitmap && > + old->dirty_bitmap != memslot->dirty_bitmap) > + kvmppc_hv_get_dirty_log(kvm, old); > + return 0; > + } Better clear dirty log unconditionally on kvm_arch_commit_memory_region, similarly to x86 (just so its consistent). > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, > ptel = rev->guest_rpte |= rcbits; > gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); > memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); > - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) > + if (!memslot) > return; Why remove this check? (i don't know why it was there in the first place, just checking). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html