On Mon, Jan 09, 2017 at 01:24:36AM -0500, Jintack Lim wrote: > Each nested VM is supposed to have a mmu (i.e. shadow stage-2 page to have a 'struct kvm_mmu' ? > table), and we create it when the guest hypervisor writes to vttbr_el2 > with a new vmid. I think the commit message should also mention that you maintain a list of seen nested stage 2 translation contexts and associated shadow page tables. > > In case the guest hypervisor writes to vttbr_el2 with existing vmid, we > check if the base address is changed. If so, then what we have in the > shadow page table is not valid any more. So ummap it. unmap? We clear the entire shadow stage 2 page table, right? > > Signed-off-by: Jintack Lim <jintack@xxxxxxxxxxxxxxx> > --- > arch/arm/include/asm/kvm_host.h | 1 + > arch/arm/kvm/arm.c | 1 + > arch/arm64/include/asm/kvm_host.h | 1 + > arch/arm64/include/asm/kvm_mmu.h | 6 ++++ > arch/arm64/kvm/mmu-nested.c | 71 +++++++++++++++++++++++++++++++++++++++ > arch/arm64/kvm/sys_regs.c | 15 ++++++++- > 6 files changed, 94 insertions(+), 1 deletion(-) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index fbde48d..ebf2810 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -84,6 +84,7 @@ struct kvm_arch { > > /* Never used on arm but added to be compatible with arm64 */ > struct list_head nested_mmu_list; > + spinlock_t mmu_list_lock; > > /* Interrupt controller */ > struct vgic_dist vgic; > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 147df97..6fa5754 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -147,6 +147,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > kvm->arch.mmu.vmid.vmid_gen = 0; > kvm->arch.mmu.el2_vmid.vmid_gen = 0; > INIT_LIST_HEAD(&kvm->arch.nested_mmu_list); > + spin_lock_init(&kvm->arch.mmu_list_lock); > > /* The maximum number of VCPUs is limited by the host's GIC model */ > kvm->arch.max_vcpus = vgic_present ? > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 23e2267..52eea76 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -99,6 +99,7 @@ struct kvm_arch { > > /* Stage 2 shadow paging contexts for nested L2 VM */ > struct list_head nested_mmu_list; > + spinlock_t mmu_list_lock; I'm wondering if we really need the separate spin lock or if we could just grab the KVM mutex? > }; > > #define KVM_NR_MEM_OBJS 40 > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index d1ef650..fdc9327 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -327,6 +327,7 @@ static inline unsigned int kvm_get_vmid_bits(void) > #ifdef CONFIG_KVM_ARM_NESTED_HYP > struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr); > struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu); > +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr); > #else > static inline struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, > u64 vttbr) > @@ -337,6 +338,11 @@ static inline struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) > { > return &vcpu->kvm->arch.mmu; > } > + > +static inline bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr) > +{ > + return false; > +} > #endif > > static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid, > diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c > index d52078f..0811d94 100644 > --- a/arch/arm64/kvm/mmu-nested.c > +++ b/arch/arm64/kvm/mmu-nested.c > @@ -53,3 +53,74 @@ struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) > > return &nested_mmu->mmu; > } > + > +static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu, > + u64 vttbr) > +{ > + struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu; > + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; > + bool need_free = false; > + int ret; > + > + nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL); > + if (!nested_mmu) > + return NULL; > + > + ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu); > + if (ret) { > + kfree(nested_mmu); > + return NULL; > + } > + > + spin_lock(&vcpu->kvm->arch.mmu_list_lock); > + tmp_mmu = get_nested_mmu(vcpu, vttbr); > + if (!tmp_mmu) > + list_add_rcu(&nested_mmu->list, nested_mmu_list); > + else /* Somebody already created and put a new nested_mmu to the list */ > + need_free = true; > + spin_unlock(&vcpu->kvm->arch.mmu_list_lock); > + > + if (need_free) { > + __kvm_free_stage2_pgd(&nested_mmu->mmu); > + kfree(nested_mmu); > + nested_mmu = tmp_mmu; > + } > + > + return nested_mmu; > +} > + > +static void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) > +{ > + struct kvm_nested_s2_mmu *nested_mmu; > + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; > + > + list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list) > + kvm_unmap_stage2_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE); > +} > + > +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr) > +{ > + struct kvm_nested_s2_mmu *nested_mmu; > + > + /* See if we can relax this */ huh? > + if (!vttbr) why is this a special case? Theoretically an IPA of zero and VMID zero could be a valid page table base pointer, right? I'm gussing because the guest hypervisor occasionally writes zero into VTTBR_EL2, for example when not using stage 2 translation, so perhaps what you need to do is to defer creating a new nested mmu structure until you actually enter the VM with stage 2 paging enabled? > + return true; > + > + nested_mmu = (struct kvm_nested_s2_mmu *)get_nested_mmu(vcpu, vttbr); > + if (!nested_mmu) { > + nested_mmu = create_nested_mmu(vcpu, vttbr); > + if (!nested_mmu) > + return false; I'm wondering if this can be simplified by having get_nested_mmu lookup and allocate the struct and renaming the get_nested_mmu funtion to lookup_nested_mmu? This caller looks racy, even though it isn't, which would be improved by my suggestion as well. > + } else { > + /* > + * unmap the shadow page table if vttbr_el2 is While the function is called unmap, what we really do is clearing/flushing the shadow stage 2 page table. > + * changed to different value > + */ > + if (vttbr != nested_mmu->virtual_vttbr) > + kvm_nested_s2_unmap(vcpu); > + } > + > + nested_mmu->virtual_vttbr = vttbr; > + > + return true; > +} > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index e66f40d..ddb641c 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -960,6 +960,19 @@ static bool access_cpacr(struct kvm_vcpu *vcpu, > return true; > } > > +static bool access_vttbr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + u64 vttbr = p->regval; > + > + if (!p->is_write) { > + p->regval = vcpu_el2_reg(vcpu, r->reg); > + return true; > + } > + > + return handle_vttbr_update(vcpu, vttbr); > +} > + > static bool trap_el2_reg(struct kvm_vcpu *vcpu, > struct sys_reg_params *p, > const struct sys_reg_desc *r) > @@ -1306,7 +1319,7 @@ static bool trap_el2_reg(struct kvm_vcpu *vcpu, > trap_el2_reg, reset_el2_val, TCR_EL2, 0 }, > /* VTTBR_EL2 */ > { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b000), > - trap_el2_reg, reset_el2_val, VTTBR_EL2, 0 }, > + access_vttbr, reset_el2_val, VTTBR_EL2, 0 }, > /* VTCR_EL2 */ > { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b010), > trap_el2_reg, reset_el2_val, VTCR_EL2, 0 }, > -- > 1.9.1 > > Thanks, -Christoffer