Now that a hypervisor can run in the virtual EL2, the guest hypervisor can assign any VMID to its own VMs. To avoid conflicts between VMIDs among a host and guest(s), the host hypervisor maps each VMID from a guest hypervisor's view (i.e. virtual VMID) to an unique shadow VMID. It also manages a set of shadow stage-2 page tables for each shadow VMID. All this information is stored in kvm_nested_s2_mmu struct. A host hypervisor manages a list of kvm_nested_s2_mmu objects per VM. On a VM entry it searches an object in the list using a virtual VMID as a key. Signed-off-by: Jintack Lim <jintack.lim@xxxxxxxxxx> --- Notes: v1-->v2: - This is a merged commit of [RFC 39/55] and [RFC 40/55]. - Updated the commit message and comments. - Defer creating a new nested mmu structure until we enter the VM with stage 2 paging enabled, which was previously done on vttbr_el2 write operations. - Use the existing kvm->mmu_lock when iterating nested mmus instead of creating one. arch/arm/include/asm/kvm_host.h | 12 ++++ arch/arm64/include/asm/kvm_emulate.h | 13 ++--- arch/arm64/include/asm/kvm_host.h | 25 ++++++++ arch/arm64/include/asm/kvm_mmu.h | 21 +++++++ arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/context.c | 2 +- arch/arm64/kvm/mmu-nested.c | 108 +++++++++++++++++++++++++++++++++++ virt/kvm/arm/arm.c | 1 + 8 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 arch/arm64/kvm/mmu-nested.c diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 33ccdbe..d84c1c1 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -67,6 +67,15 @@ struct kvm_s2_mmu { pgd_t *pgd; }; +/* Per shadow VMID mmu structure. This is only for nested virtualization */ +struct kvm_nested_s2_mmu { + struct kvm_s2_mmu mmu; + + u64 virtual_vttbr; + + struct list_head list; +}; + struct kvm_arch { /* Stage 2 paging state for the VM */ struct kvm_s2_mmu mmu; @@ -79,6 +88,9 @@ struct kvm_arch { * here. */ + /* Never used on arm but added to be compatible with arm64 */ + struct list_head nested_mmu_list; + /* Interrupt controller */ struct vgic_dist vgic; int max_vcpus; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 71a3a04..f476576 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -199,6 +199,11 @@ static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) return false; } +static inline bool vcpu_nested_stage2_enabled(const struct kvm_vcpu *vcpu) +{ + return (vcpu_sys_reg(vcpu, HCR_EL2) & HCR_VM); +} + static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; @@ -385,12 +390,4 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu) -{ - if (unlikely(is_hyp_ctxt(vcpu))) - return &vcpu->kvm->arch.mmu.el2_vmid; - - return &vcpu->kvm->arch.mmu.vmid; -} - #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a7edf0e..0c37e49 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -65,6 +65,28 @@ struct kvm_s2_mmu { pgd_t *pgd; }; +/* Per shadow VMID mmu structure */ +struct kvm_nested_s2_mmu { + struct kvm_s2_mmu mmu; + + /* + * virtual_vttbr contains vttbr_el2 value from the guest hypervisor. + * We use vmid field as a key to search for this mmu object in the list, + * and ignore baddr field. + * + * Note that we may use both of vmid field and baddr field respectively + * to find a shadow VMID and a pointer to the shadow stage-2 page + * table, then combine them to set up hw_vttbr. The only benefit of + * doing that would be reusing shadow stage-2 page tables for different + * VMIDs, which is not usual. So, we choose the current design for the + * simplicity. + * + */ + u64 virtual_vttbr; + + struct list_head list; +}; + struct kvm_arch { /* Stage 2 paging state for the VM */ struct kvm_s2_mmu mmu; @@ -77,6 +99,9 @@ struct kvm_arch { /* Interrupt controller */ struct vgic_dist vgic; + + /* Stage 2 shadow paging contexts for nested L2 VM */ + struct list_head nested_mmu_list; }; #define KVM_NR_MEM_OBJS 40 diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index bceaec1..452912f 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -112,6 +112,7 @@ #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> +#include <asm/kvm_emulate.h> static inline unsigned long __kern_hyp_va(unsigned long v) { @@ -321,6 +322,10 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr); +struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu); +void update_nested_s2_mmu(struct kvm_vcpu *vcpu); + static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid, struct kvm_s2_mmu *mmu) { @@ -332,5 +337,21 @@ static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid, return baddr | vmid_field; } +static inline u64 get_vmid(u64 vttbr) +{ + return (vttbr & VTTBR_VMID_MASK(get_kvm_vmid_bits())) >> + VTTBR_VMID_SHIFT; +} + +static inline struct kvm_s2_vmid *vcpu_get_active_vmid(struct kvm_vcpu *vcpu) +{ + struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu); + + if (unlikely(is_hyp_ctxt(vcpu))) + return &mmu->el2_vmid; + else + return &mmu->vmid; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 0263ef0..5300db0 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -37,4 +37,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o kvm-$(CONFIG_KVM_ARM_HOST) += nested.o +kvm-$(CONFIG_KVM_ARM_HOST) += mmu-nested.o kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o diff --git a/arch/arm64/kvm/context.c b/arch/arm64/kvm/context.c index afd1702..762d4a5 100644 --- a/arch/arm64/kvm/context.c +++ b/arch/arm64/kvm/context.c @@ -177,7 +177,7 @@ static void flush_shadow_el1_sysregs(struct kvm_vcpu *vcpu) static void setup_s2_mmu(struct kvm_vcpu *vcpu) { - struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu; + struct kvm_s2_mmu *mmu = vcpu_get_active_s2_mmu(vcpu); struct kvm_s2_vmid *vmid = vcpu_get_active_vmid(vcpu); vcpu->arch.hw_vttbr = kvm_get_vttbr(vmid, mmu); diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c new file mode 100644 index 0000000..c436daf --- /dev/null +++ b/arch/arm64/kvm/mmu-nested.c @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2017 - Columbia University and Linaro Ltd. + * Author: Jintack Lim <jintack.lim@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_arm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> + +static struct kvm_nested_s2_mmu *lookup_nested_mmu(struct kvm_vcpu *vcpu, + u64 vttbr) +{ + struct kvm_nested_s2_mmu *mmu; + u64 virtual_vmid; + u64 target_vmid = get_vmid(vttbr); + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; + + /* Search a mmu in the list using the virtual VMID as a key */ + list_for_each_entry_rcu(mmu, nested_mmu_list, list) { + virtual_vmid = get_vmid(mmu->virtual_vttbr); + if (target_vmid == virtual_vmid) + return mmu; + } + return NULL; +} + +/** + * create_nested_mmu - create mmu for the given virtual VMID + * + * Called from setup_s2_mmu before entering the nested VM to ensure the shadow + * stage 2 page table is allocated and it is valid to use. + */ +static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu, + u64 vttbr) +{ + struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu; + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; + bool need_free = false; + int ret; + + nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL); + if (!nested_mmu) + return NULL; + + ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu); + if (ret) { + kfree(nested_mmu); + return NULL; + } + + spin_lock(&vcpu->kvm->mmu_lock); + tmp_mmu = lookup_nested_mmu(vcpu, vttbr); + if (!tmp_mmu) { + list_add_rcu(&nested_mmu->list, nested_mmu_list); + } else { + /* + * Somebody already put a new nested_mmu for this virtual VMID + * to the list behind our back. + */ + need_free = true; + } + spin_unlock(&vcpu->kvm->mmu_lock); + + if (need_free) { + __kvm_free_stage2_pgd(vcpu->kvm, &nested_mmu->mmu); + kfree(nested_mmu); + nested_mmu = tmp_mmu; + } + + /* The virtual VMID will be used as a key when searching a mmu */ + nested_mmu->virtual_vttbr = vttbr; + + return nested_mmu; +} + +static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu) +{ + u64 vttbr = vcpu_sys_reg(vcpu, VTTBR_EL2); + struct kvm_nested_s2_mmu *nested_mmu; + + nested_mmu = lookup_nested_mmu(vcpu, vttbr); + if (!nested_mmu) + nested_mmu = create_nested_mmu(vcpu, vttbr); + + return &nested_mmu->mmu; +} + +struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) +{ + if (is_hyp_ctxt(vcpu) || !vcpu_nested_stage2_enabled(vcpu)) + return &vcpu->kvm->arch.mmu; + + return get_s2_mmu_nested(vcpu); +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 63dd897..4548d77 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -145,6 +145,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Mark the initial VMID generation invalid */ kvm->arch.mmu.vmid.vmid_gen = 0; kvm->arch.mmu.el2_vmid.vmid_gen = 0; + INIT_LIST_HEAD(&kvm->arch.nested_mmu_list); /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = vgic_present ? -- 1.9.1