From: Quentin Perret <qperret@xxxxxxxxxx> In order to allow unmapping arbitrary memory pages from the hypervisor stage-1 page-table, fix-up the initial refcount for pages that have been mapped before the 'vmemmap' array was up and running so that it accurately accounts for all existing hypervisor mappings. This is achieved by traversing the entire hypervisor stage-1 page-table during initialisation of EL2 and updating the corresponding 'struct hyp_page' for each valid mapping. Reviewed-by: Oliver Upton <oliver.upton@xxxxxxxxx> Tested-by: Vincent Donnefort <vdonnefort@xxxxxxxxxx> Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx> Signed-off-by: Will Deacon <will@xxxxxxxxxx> --- arch/arm64/kvm/hyp/nvhe/setup.c | 62 +++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 579eb4f73476..8f2726d7e201 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -185,12 +185,11 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) { - struct kvm_pgtable_mm_ops *mm_ops = arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; kvm_pte_t pte = *ptep; @@ -199,15 +198,6 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, if (!kvm_pte_valid(pte)) return 0; - /* - * Fix-up the refcount for the page-table pages as the early allocator - * was unable to access the hyp_vmemmap and so the buddy allocator has - * initialised the refcount to '1'. - */ - mm_ops->get_page(ptep); - if (flag != KVM_PGTABLE_WALK_LEAF) - return 0; - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; @@ -236,12 +226,30 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); } -static int finalize_host_mappings(void) +static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + if (kvm_pte_valid(pte)) + mm_ops->get_page(ptep); + + return 0; +} + +static int fix_host_ownership(void) { struct kvm_pgtable_walker walker = { - .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pkvm_pgtable.mm_ops, + .cb = fix_host_ownership_walker, + .flags = KVM_PGTABLE_WALK_LEAF, }; int i, ret; @@ -257,6 +265,18 @@ static int finalize_host_mappings(void) return 0; } +static int fix_hyp_pgtable_refcnt(void) +{ + struct kvm_pgtable_walker walker = { + .cb = fix_hyp_pgtable_refcnt_walker, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), + &walker); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -286,7 +306,11 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = finalize_host_mappings(); + ret = fix_host_ownership(); + if (ret) + goto out; + + ret = fix_hyp_pgtable_refcnt(); if (ret) goto out; -- 2.38.1.431.g37b22c650d-goog