On 15/10/2024 12:25, Suzuki K Poulose wrote: > Hi Steven > > On 04/10/2024 16:27, Steven Price wrote: >> The RMM owns the stage 2 page tables for a realm, and KVM must request >> that the RMM creates/destroys entries as necessary. The physical pages >> to store the page tables are delegated to the realm as required, and can >> be undelegated when no longer used. >> >> Creating new RTTs is the easy part, tearing down is a little more >> tricky. The result of realm_rtt_destroy() can be used to effectively >> walk the tree and destroy the entries (undelegating pages that were >> given to the realm). >> >> Signed-off-by: Steven Price <steven.price@xxxxxxx> >> --- >> Changes since v2: >> * Moved {alloc,free}_delegated_page() and ensure_spare_page() to a >> later patch when they are actually used. >> * Some simplifications now rmi_xxx() functions allow NULL as an output >> parameter. >> * Improved comments and code layout. >> --- >> arch/arm64/include/asm/kvm_rme.h | 19 ++++++ >> arch/arm64/kvm/mmu.c | 6 +- >> arch/arm64/kvm/rme.c | 113 +++++++++++++++++++++++++++++++ >> 3 files changed, 135 insertions(+), 3 deletions(-) >> >> diff --git a/arch/arm64/include/asm/kvm_rme.h >> b/arch/arm64/include/asm/kvm_rme.h >> index bd306bd7b64b..e5704859a6e5 100644 >> --- a/arch/arm64/include/asm/kvm_rme.h >> +++ b/arch/arm64/include/asm/kvm_rme.h >> @@ -76,5 +76,24 @@ u32 kvm_realm_ipa_limit(void); >> int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); >> int kvm_init_realm_vm(struct kvm *kvm); >> void kvm_destroy_realm(struct kvm *kvm); >> +void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); >> + >> +#define RME_RTT_BLOCK_LEVEL 2 >> +#define RME_RTT_MAX_LEVEL 3 >> + >> +#define RME_PAGE_SHIFT 12 >> +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) >> +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ >> +#define RME_RTT_LEVEL_SHIFT(l) \ >> + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) >> +#define RME_L2_BLOCK_SIZE BIT(RME_RTT_LEVEL_SHIFT(2)) >> + >> +static inline unsigned long rme_rtt_level_mapsize(int level) >> +{ >> + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) >> + return RME_PAGE_SIZE; >> + >> + return (1UL << RME_RTT_LEVEL_SHIFT(level)); >> +} >> #endif >> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c >> index d4ef6dcf8eb7..a26cdac59eb3 100644 >> --- a/arch/arm64/kvm/mmu.c >> +++ b/arch/arm64/kvm/mmu.c >> @@ -1054,17 +1054,17 @@ void stage2_unmap_vm(struct kvm *kvm) >> void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) >> { >> struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); >> - struct kvm_pgtable *pgt = NULL; >> + struct kvm_pgtable *pgt; >> write_lock(&kvm->mmu_lock); >> + pgt = mmu->pgt; >> if (kvm_is_realm(kvm) && >> (kvm_realm_state(kvm) != REALM_STATE_DEAD && >> kvm_realm_state(kvm) != REALM_STATE_NONE)) { >> - /* Tearing down RTTs will be added in a later patch */ >> write_unlock(&kvm->mmu_lock); >> + kvm_realm_destroy_rtts(kvm, pgt->ia_bits); >> return; >> } >> - pgt = mmu->pgt; >> if (pgt) { >> mmu->pgd_phys = 0; >> mmu->pgt = NULL; >> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c >> index f6430d460519..7db405d2b2b2 100644 >> --- a/arch/arm64/kvm/rme.c >> +++ b/arch/arm64/kvm/rme.c >> @@ -125,6 +125,119 @@ static int realm_create_rd(struct kvm *kvm) >> return r; >> } >> +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, >> + int level, phys_addr_t *rtt_granule, >> + unsigned long *next_addr) >> +{ >> + unsigned long out_rtt; > > minor nit: You could drop the local variable out_rtt. I could, but I was trying to avoid assuming that phys_addr_t was compatible with unsigned long - i.e. I don't want to do the type-punning to pass rtt_granule straight into rmi_rtt_destroy(). I would expect the compiler will inline this function and get rid of the temporary. >> + int ret; >> + >> + ret = rmi_rtt_destroy(virt_to_phys(realm->rd), addr, level, >> + &out_rtt, next_addr); >> + >> + *rtt_granule = out_rtt; >> + >> + return ret; >> +} >> + >> +static int realm_tear_down_rtt_level(struct realm *realm, int level, >> + unsigned long start, unsigned long end) >> +{ >> + ssize_t map_size; >> + unsigned long addr, next_addr; >> + >> + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) >> + return -EINVAL; >> + >> + map_size = rme_rtt_level_mapsize(level - 1); >> + >> + for (addr = start; addr < end; addr = next_addr) { >> + phys_addr_t rtt_granule; >> + int ret; >> + unsigned long align_addr = ALIGN(addr, map_size); >> + >> + next_addr = ALIGN(addr + 1, map_size); >> + >> + if (next_addr > end || align_addr != addr) { >> + /* >> + * The target range is smaller than what this level >> + * covers, recurse deeper. >> + */ >> + ret = realm_tear_down_rtt_level(realm, >> + level + 1, >> + addr, >> + min(next_addr, end)); >> + if (ret) >> + return ret; >> + continue; >> + } >> + >> + ret = realm_rtt_destroy(realm, addr, level, >> + &rtt_granule, &next_addr); >> + >> + switch (RMI_RETURN_STATUS(ret)) { >> + case RMI_SUCCESS: >> + if (!WARN_ON(rmi_granule_undelegate(rtt_granule))) >> + free_page((unsigned long)phys_to_virt(rtt_granule)); >> + break; >> + case RMI_ERROR_RTT: >> + if (next_addr > addr) { >> + /* Missing RTT, skip */ >> + break; >> + } >> + if (WARN_ON(RMI_RETURN_INDEX(ret) != level)) >> + return -EBUSY; >> + /* >> + * We tear down the RTT range for the full IPA >> + * space, after everything is unmapped. Also we >> + * descend down only if we cannot tear down a >> + * top level RTT. Thus RMM must be able to walk >> + * to the requested level. e.g., a block mapping >> + * exists at L1 or L2. >> + */ > > This comment really applies to the if (RMI_RETURN_INDEX(ret) != level) > check above. Please move it up. Good spot. > With that : > > Reviewed-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx> > Thanks, Steve > > > >> + if (WARN_ON(level == RME_RTT_MAX_LEVEL)) >> + return -EBUSY; > + >> + /* >> + * The table has active entries in it, recurse deeper >> + * and tear down the RTTs. >> + */ >> + next_addr = ALIGN(addr + 1, map_size); >> + ret = realm_tear_down_rtt_level(realm, >> + level + 1, >> + addr, >> + next_addr); >> + if (ret) >> + return ret; >> + /* >> + * Now that the child RTTs are destroyed, >> + * retry at this level. >> + */ >> + next_addr = addr; >> + break; >> + default: >> + WARN_ON(1); >> + return -ENXIO; >> + } >> + } >> + >> + return 0; >> +} >> + >> +static int realm_tear_down_rtt_range(struct realm *realm, >> + unsigned long start, unsigned long end) >> +{ >> + return realm_tear_down_rtt_level(realm, get_start_level(realm) + 1, >> + start, end); >> +} >> + >> +void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + >> + WARN_ON(realm_tear_down_rtt_range(realm, 0, (1UL << ia_bits))); >> +} >> + >> /* Protects access to rme_vmid_bitmap */ >> static DEFINE_SPINLOCK(rme_vmid_lock); >> static unsigned long *rme_vmid_bitmap; >