This adds code to flush the partition-scoped page tables for a radix guest when dirty tracking is turned on or off for a memslot. Only the guest real addresses covered by the memslot are flushed. The reason for this is to get rid of any 2M PTEs in the partition-scoped page tables that correspond to host transparent huge pages, so that page dirtiness is tracked at a system page (4k or 64k) granularity rather than a 2M granularity. The page tables are also flushed when turning dirty tracking off so that the memslot's address space can be repopulated with THPs if possible. To do this, we add a new function kvmppc_radix_flush_memslot(). Since this does what's needed for kvmppc_core_flush_memslot_hv() on a radix guest, we now make kvmppc_core_flush_memslot_hv() call the new kvmppc_radix_flush_memslot() rather than calling kvm_unmap_radix() for each page in the memslot. This has the effect of fixing a bug in that kvmppc_core_flush_memslot_hv() was previously calling kvm_unmap_radix() without holding the kvm->mmu_lock spinlock, which is required to be held. Signed-off-by: Paul Mackerras <paulus@xxxxxxxxxx> --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 +++++---- arch/powerpc/kvm/book3s_64_mmu_radix.c | 20 ++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 17 +++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 728d2b7..f8a5ac8 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -222,6 +222,8 @@ extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); +extern void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); /* XXX remove this export when load_last_inst() is generic */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index a18afda..6f2d2fb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -899,11 +899,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, gfn = memslot->base_gfn; rmapp = memslot->arch.rmap; + if (kvm_is_radix(kvm)) { + kvmppc_radix_flush_memslot(kvm, memslot); + return; + } + for (n = memslot->npages; n; --n, ++gfn) { - if (kvm_is_radix(kvm)) { - kvm_unmap_radix(kvm, memslot, gfn); - continue; - } /* * Testing the present bit without locking is OK because * the memslot has been marked invalid already, and hence diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 52711eb..d675ad9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -958,6 +958,26 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, return 0; } +void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long n; + pte_t *ptep; + unsigned long gpa; + unsigned int shift; + + gpa = memslot->base_gfn << PAGE_SHIFT; + spin_lock(&kvm->mmu_lock); + for (n = memslot->npages; n; --n) { + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + if (ptep && pte_present(*ptep)) + kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, + kvm->arch.lpid); + gpa += PAGE_SIZE; + } + spin_unlock(&kvm->mmu_lock); +} + static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, int psize, int *indexp) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f4fbb7b5..074ff5b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4384,6 +4384,23 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, */ if (npages) atomic64_inc(&kvm->arch.mmio_update); + + /* + * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels + * have already called kvm_arch_flush_shadow_memslot() to + * flush shadow mappings. For KVM_MR_CREATE we have no + * previous mappings. So the only case to handle is + * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit + * has been changed. + * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES + * to get rid of any THP PTEs in the partition-scoped page tables + * so we can track dirtiness at the page level; we flush when + * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to + * using THP PTEs. + */ + if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) && + ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES)) + kvmppc_radix_flush_memslot(kvm, old); } /* -- 2.7.4