Hi Fuad! I finally got around to giving this patch series a spin for my non-CoCo usecase. I used the below diff to expose the functionality outside of pKVM (Based on Steven P.'s ARM CCA patch for custom VM types on ARM [2]). There's two small things that were broken for me (will post as responses to individual patches), but after fixing those, I was able to boot some guests using a modified Firecracker [1]. Just wondering, are you still looking into posting a separate series with just the MMU changes (e.g. something to have a bare-bones KVM_SW_PROTECTED_VM on ARM, like we do for x86), like you mentioned in the guest_memfd call before Christmas? We're pretty keen to get our hands something like that for our non-CoCo VMs (and ofc, am happy to help with any work required to get there :) Best, Patrick [1]: https://github.com/roypat/firecracker/tree/secret-freedom-mmap [2]: https://lore.kernel.org/kvm/20241004152804.72508-12-steven.price@xxxxxxx/ --- diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8dfae9183651..0b8dfb855e51 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -380,6 +380,8 @@ struct kvm_arch { * the associated pKVM instance in the hypervisor. */ struct kvm_protected_vm pkvm; + + unsigned long type; }; struct kvm_vcpu_fault_info { @@ -1529,7 +1531,11 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) -#define kvm_arch_has_private_mem(kvm) \ - (IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && is_protected_kvm_enabled()) +#ifdef CONFIG_KVM_PRIVATE_MEM +#define kvm_arch_has_private_mem(kvm) \ + ((kvm)->arch.type == KVM_VM_TYPE_ARM_SW_PROTECTED || is_protected_kvm_enabled()) +#else +#define kvm_arch_has_private_mem(kvm) false +#endif #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index fe3451f244b5..2da26aa3b0b5 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -38,6 +38,7 @@ menuconfig KVM select HAVE_KVM_VCPU_RUN_PID_CHANGE select SCHED_INFO select GUEST_PERF_EVENTS if PERF_EVENTS + select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM select KVM_GMEM_MAPPABLE help Support hosting virtualized guest machines. @@ -84,4 +85,10 @@ config PTDUMP_STAGE2_DEBUGFS If in doubt, say N. +config KVM_SW_PROTECTED_VM + bool "Enable support for KVM software-protected VMs" + depends on EXPERT + depends on KVM && ARM64 + select KVM_GENERIC_PRIVATE_MEM + endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a102c3aebdbc..35683868c0e4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -181,6 +181,19 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_unlock(&kvm->lock); #endif + if (type & ~(KVM_VM_TYPE_ARM_MASK | KVM_VM_TYPE_ARM_IPA_SIZE_MASK)) + return -EINVAL; + + switch (type & KVM_VM_TYPE_ARM_MASK) { + case KVM_VM_TYPE_ARM_NORMAL: + case KVM_VM_TYPE_ARM_SW_PROTECTED: + break; + default: + return -EINVAL; + } + + kvm->arch.type = type & KVM_VM_TYPE_ARM_MASK; + kvm_init_nested(kvm); ret = kvm_share_hyp(kvm, kvm + 1); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1c4b3871967c..9dbb472eb96a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -869,9 +869,6 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) u64 mmfr0, mmfr1; u32 phys_shift; - if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) - return -EINVAL; - phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); if (is_protected_kvm_enabled()) { phys_shift = kvm_ipa_limit; @@ -2373,3 +2370,31 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); } + +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range) +{ + /* + * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM only + * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM + * can simply ignore such slots. But if userspace is making memory + * PRIVATE, then KVM must prevent the guest from accessing the memory + * as shared. And if userspace is making memory SHARED and this point + * is reached, then at least one page within the range was previously + * PRIVATE, i.e. the slot's possible hugepage ranges are changing. + * Zapping SPTEs in this case ensures KVM will reassess whether or not + * a hugepage can be used for affected ranges. + */ + if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) + return false; + + return kvm_unmap_gfn_range(kvm, range); +} + +bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range) +{ + return false; +} +#endif \ No newline at end of file diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b34aed04ffa5..214f6b5da43f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -653,6 +653,13 @@ struct kvm_enable_cap { * PA size shift (i.e, log2(PA_Size)). For backward compatibility, * value 0 implies the default IPA size, 40bits. */ +#define KVM_VM_TYPE_ARM_SHIFT 8 +#define KVM_VM_TYPE_ARM_MASK (0xfULL << KVM_VM_TYPE_ARM_SHIFT) +#define KVM_VM_TYPE_ARM(_type) \ + (((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK) +#define KVM_VM_TYPE_ARM_NORMAL KVM_VM_TYPE_ARM(0) +#define KVM_VM_TYPE_ARM_SW_PROTECTED KVM_VM_TYPE_ARM(1) + #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) On Fri, 2024-12-13 at 16:47 +0000, Fuad Tabba wrote: > This series adds restricted mmap() support to guest_memfd, as > well as support for guest_memfd on arm64. It is based on Linux > 6.13-rc2. Please refer to v3 for the context [1]. > > Main changes since v3: > - Added a new folio type for guestmem, used to register a > callback when a folio's reference count reaches 0 (Matthew > Wilcox, DavidH) [2] > - Introduce new mappability states for folios, where a folio can > be mappable by the host and the guest, only the guest, or by no > one (transient state) > - Rebased on Linux 6.13-rc2 > - Refactoring and tidying up > > Cheers, > /fuad > > [1] https://lore.kernel.org/all/20241010085930.1546800-1-tabba@xxxxxxxxxx/ > [2] https://lore.kernel.org/all/20241108162040.159038-1-tabba@xxxxxxxxxx/ > > Ackerley Tng (2): > KVM: guest_memfd: Make guest mem use guest mem inodes instead of > anonymous inodes > KVM: guest_memfd: Track mappability within a struct kvm_gmem_private > > Fuad Tabba (12): > mm: Consolidate freeing of typed folios on final folio_put() > KVM: guest_memfd: Introduce kvm_gmem_get_pfn_locked(), which retains > the folio lock > KVM: guest_memfd: Folio mappability states and functions that manage > their transition > KVM: guest_memfd: Handle final folio_put() of guestmem pages > KVM: guest_memfd: Allow host to mmap guest_memfd() pages when shared > KVM: guest_memfd: Add guest_memfd support to > kvm_(read|/write)_guest_page() > KVM: guest_memfd: Add KVM capability to check if guest_memfd is host > mappable > KVM: guest_memfd: Add a guest_memfd() flag to initialize it as > mappable > KVM: guest_memfd: selftests: guest_memfd mmap() test when mapping is > allowed > KVM: arm64: Skip VMA checks for slots without userspace address > KVM: arm64: Handle guest_memfd()-backed guest page faults > KVM: arm64: Enable guest_memfd private memory when pKVM is enabled > > Documentation/virt/kvm/api.rst | 4 + > arch/arm64/include/asm/kvm_host.h | 3 + > arch/arm64/kvm/Kconfig | 1 + > arch/arm64/kvm/mmu.c | 119 +++- > include/linux/kvm_host.h | 75 +++ > include/linux/page-flags.h | 22 + > include/uapi/linux/kvm.h | 2 + > include/uapi/linux/magic.h | 1 + > mm/debug.c | 1 + > mm/swap.c | 28 +- > tools/testing/selftests/kvm/Makefile | 1 + > .../testing/selftests/kvm/guest_memfd_test.c | 64 +- > virt/kvm/Kconfig | 4 + > virt/kvm/guest_memfd.c | 579 +++++++++++++++++- > virt/kvm/kvm_main.c | 229 ++++++- > 15 files changed, 1074 insertions(+), 59 deletions(-) > > > base-commit: fac04efc5c793dccbd07e2d59af9f90b7fc0dca4 > -- > 2.47.1.613.gc27f4b7a9f-goog