This patch implements MMU notifiers for KVM RISC-V so that Guest physical address space is in-sync with Host physical address space. This will allow swapping, page migration, etc to work transparently with KVM RISC-V. Signed-off-by: Anup Patel <anup.patel@xxxxxxx> Acked-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> Reviewed-by: Alexander Graf <graf@xxxxxxxxxx> --- arch/riscv/include/asm/kvm_host.h | 7 ++ arch/riscv/kvm/Kconfig | 1 + arch/riscv/kvm/mmu.c | 129 +++++++++++++++++++++++++++++- arch/riscv/kvm/vm.c | 1 + 4 files changed, 137 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index dd1acb011fff..cc0b09918431 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -190,6 +190,13 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid, unsigned long gpa); void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid); diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 88edd477b3a8..2356dc52ebb3 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -20,6 +20,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" depends on RISCV_SBI && MMU + select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES select KVM_MMIO diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index f491c6aa4502..d88a50cde750 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -369,6 +369,38 @@ int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa, } +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, u64 size, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gpa; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT; + ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data); + } + + return ret; +} + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { @@ -501,6 +533,95 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, return ret; } +static int kvm_unmap_hva_handler(struct kvm *kvm, + gpa_t gpa, u64 size, void *data) +{ + stage2_unmap_range(kvm, gpa, size); + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + + handle_hva_to_gpa(kvm, start, end, + &kvm_unmap_hva_handler, NULL); + return 0; +} + +static int kvm_set_spte_handler(struct kvm *kvm, + gpa_t gpa, u64 size, void *data) +{ + pte_t *pte = (pte_t *)data; + + WARN_ON(size != PAGE_SIZE); + stage2_set_pte(kvm, 0, NULL, gpa, pte); + + return 0; +} + +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + kvm_pfn_t pfn = pte_pfn(pte); + pte_t stage2_pte; + + if (!kvm->arch.pgd) + return 0; + + stage2_pte = pfn_pte(pfn, PAGE_WRITE_EXEC); + handle_hva_to_gpa(kvm, hva, end, + &kvm_set_spte_handler, &stage2_pte); + + return 0; +} + +static int kvm_age_hva_handler(struct kvm *kvm, + gpa_t gpa, u64 size, void *data) +{ + pte_t *ptep; + u32 ptep_level = 0; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE); + + if (!stage2_get_leaf_entry(kvm, gpa, &ptep, &ptep_level)) + return 0; + + return ptep_test_and_clear_young(NULL, 0, ptep); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, + gpa_t gpa, u64 size, void *data) +{ + pte_t *ptep; + u32 ptep_level = 0; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE); + if (!stage2_get_leaf_entry(kvm, gpa, &ptep, &ptep_level)) + return 0; + + return pte_young(*ptep); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.pgd) + return 0; + + return handle_hva_to_gpa(kvm, hva, hva, + kvm_test_age_hva_handler, NULL); +} + int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva, bool is_write) { @@ -512,7 +633,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva, struct vm_area_struct *vma; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache; - unsigned long vma_pagesize; + unsigned long vma_pagesize, mmu_seq; down_read(¤t->mm->mmap_sem); @@ -545,6 +666,8 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva, return ret; } + mmu_seq = kvm->mmu_notifier_seq; + hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable); if (hfn == KVM_PFN_ERR_HWPOISON) { if (is_vm_hugetlb_page(vma)) @@ -563,6 +686,9 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva, spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + if (writeable) { kvm_set_pfn_dirty(hfn); ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, @@ -575,6 +701,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva, if (ret) kvm_err("Failed to map in stage2\n"); +out_unlock: spin_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(hfn); kvm_release_pfn_clean(hfn); diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index c5aab5478c38..fd84b4d914dc 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -54,6 +54,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: case KVM_CAP_READONLY_MEM: -- 2.17.1