Implement a new hypercall, __pkvm_host_reclaim_page(), so that the host at EL1 can reclaim pages that were previously donated to EL2. This allows EL2 to defer clearing of guest memory on teardown and allows preemption in the host after reclaiming each page. Signed-off-by: Will Deacon <will@xxxxxxxxxx> --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/include/nvhe/memory.h | 7 ++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 8 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 91 ++++++++++++++++++- 5 files changed, 107 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f5030e88eb58..a68381699c40 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -64,6 +64,7 @@ enum __kvm_host_smccc_func { /* Hypercalls available after pKVM finalisation */ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_host_reclaim_page, __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 663019992b67..ecedc545e608 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -64,6 +64,7 @@ extern unsigned long hyp_nr_cpus; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); +int __pkvm_host_reclaim_page(u64 pfn); int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages); int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 29f2ebe306bc..15b719fefc86 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,6 +7,13 @@ #include <linux/types.h> +/* + * Accesses to struct hyp_page flags are serialized by the host stage-2 + * page-table lock. + */ +#define HOST_PAGE_NEED_POISONING BIT(0) +#define HOST_PAGE_PENDING_RECLAIM BIT(1) + struct hyp_page { unsigned short refcount; u8 order; diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8e51cdab00b7..629d306c91c0 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -155,6 +155,13 @@ static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn); } +static void handle___pkvm_host_reclaim_page(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_host_reclaim_page(pfn); +} + static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); @@ -211,6 +218,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_host_unshare_hyp), + HANDLE_FUNC(__pkvm_host_reclaim_page), HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bcf84e157d4b..adb6a880c684 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -260,15 +260,51 @@ int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd) return 0; } +static int reclaim_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t pte = *ptep; + struct hyp_page *page; + + if (!kvm_pte_valid(pte)) + return 0; + + page = hyp_phys_to_page(kvm_pte_to_phys(pte)); + switch (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte))) { + case PKVM_PAGE_OWNED: + page->flags |= HOST_PAGE_NEED_POISONING; + fallthrough; + case PKVM_PAGE_SHARED_BORROWED: + case PKVM_PAGE_SHARED_OWNED: + page->flags |= HOST_PAGE_PENDING_RECLAIM; + break; + default: + return -EPERM; + } + + return 0; +} + void reclaim_guest_pages(struct kvm_shadow_vm *vm, struct kvm_hyp_memcache *mc) { + + struct kvm_pgtable_walker walker = { + .cb = reclaim_walker, + .flags = KVM_PGTABLE_WALK_LEAF + }; void *addr; - /* Dump all pgtable pages in the hyp_pool */ + host_lock_component(); guest_lock_component(vm); + + /* Reclaim all guest pages and dump all pgtable pages in the hyp_pool */ + BUG_ON(kvm_pgtable_walk(&vm->pgt, 0, BIT(vm->pgt.ia_bits), &walker)); kvm_pgtable_stage2_destroy(&vm->pgt); vm->kvm.arch.mmu.pgd_phys = 0ULL; + guest_unlock_component(vm); + host_unlock_component(); /* Drain the hyp_pool into the memcache */ addr = hyp_alloc_pages(&vm->pool, 0); @@ -1225,3 +1261,56 @@ void hyp_unpin_shared_mem(void *from, void *to) hyp_unlock_component(); host_unlock_component(); } + +static int hyp_zero_page(phys_addr_t phys) +{ + void *addr; + + addr = hyp_fixmap_map(phys); + if (!addr) + return -EINVAL; + memset(addr, 0, PAGE_SIZE); + __clean_dcache_guest_page(addr, PAGE_SIZE); + + return hyp_fixmap_unmap(); +} + +int __pkvm_host_reclaim_page(u64 pfn) +{ + u64 addr = hyp_pfn_to_phys(pfn); + struct hyp_page *page; + kvm_pte_t pte; + int ret; + + host_lock_component(); + + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); + if (ret) + goto unlock; + + if (host_get_page_state(pte) == PKVM_PAGE_OWNED) + goto unlock; + + page = hyp_phys_to_page(addr); + if (!(page->flags & HOST_PAGE_PENDING_RECLAIM)) { + ret = -EPERM; + goto unlock; + } + + if (page->flags & HOST_PAGE_NEED_POISONING) { + ret = hyp_zero_page(addr); + if (ret) + goto unlock; + page->flags &= ~HOST_PAGE_NEED_POISONING; + } + + ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, PKVM_ID_HOST); + if (ret) + goto unlock; + page->flags &= ~HOST_PAGE_PENDING_RECLAIM; + +unlock: + host_unlock_component(); + + return ret; +} -- 2.36.1.124.g0e6072fb45-goog _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm