Hi Suzuki, On 9/17/18 12:41 PM, Suzuki K Poulose wrote: > Allow specifying the physical address size limit for a new > VM via the kvm_type argument for the KVM_CREATE_VM ioctl. This > allows us to finalise the stage2 page table as early as possible > and hence perform the right checks on the memory slots > without complication. The size is ecnoded as Log2(PA_Size) in encoded > bits[7:0] of the type field. For backward compatibility the > value 0 is reserved and implies 40bits. Also, lift the limit > of the IPA to host limit and allow lower IPA sizes (e.g, 32). > > The userspace could check the extension KVM_CAP_ARM_VM_PHYS_SHIFT > for the availability of this feature. The cap check returns the > maximum limit for the physical address shift supported by the host. > > Cc: Marc Zyngier <marc.zyngier@xxxxxxx> > Cc: Christoffer Dall <cdall@xxxxxxxxxx> > Cc: Peter Maydel <peter.maydell@xxxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> > Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx> > --- > Changes since v4: > - Fold the introduction of the KVM_CAP_ARM_VM_PHYS_SHIFT to this > patch to allow detection of the availability of the feature for > userspace. > - Document the API > - Restrict the feature only to arm64. > Changes since V3: > - Switch to a CAP, that can be checkd via EXTENSIONS on KVM device > fd, rather than a dedicated ioctl. > --- > Documentation/virtual/kvm/api.txt | 8 ++++++++ > arch/arm64/include/asm/stage2_pgtable.h | 20 -------------------- > arch/arm64/kvm/reset.c | 20 ++++++++++++++++---- > include/uapi/linux/kvm.h | 10 ++++++++++ > 4 files changed, 34 insertions(+), 24 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index c664064f76fb..f860251ff27c 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -122,6 +122,14 @@ the default trap & emulate implementation (which changes the virtual > memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the > flag KVM_VM_MIPS_VZ. > > +To configure the physical address space size for a VM (IPA size) on arm64, > +check KVM_CAP_ARM_VM_PHYS_SHIFT (which returns the maximum limit for the > +IPA shift) and use KVM_VM_TYPE_ARM_PHYS_SHIFT(PHYS_SHIFT). Bits[7-0] of the > +machine type has been reserved for specifying the PHYS_SHIFT. are reserved to pass the PHYS_SHIFT? > +The supported range is [32...IPA_LIMIT], where IPA_LIMIT could be s/could be/is > +identified by checking KVM_CAP_ARM_VM_PHYS_SHIFT. For backward compatibility > +a value of 0 selects 40bits. > + > > 4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST > > diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h > index 6a56fdff0823..0b339f5a4a7c 100644 > --- a/arch/arm64/include/asm/stage2_pgtable.h > +++ b/arch/arm64/include/asm/stage2_pgtable.h > @@ -42,28 +42,8 @@ > * the range (IPA_SHIFT, IPA_SHIFT - 4). > */ > #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) > -#define STAGE2_PGTABLE_LEVELS stage2_pgtable_levels(KVM_PHYS_SHIFT) > #define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) > > -/* > - * With all the supported VA_BITs and 40bit guest IPA, the following condition > - * is always true: > - * > - * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS > - * > - * We base our stage-2 page table walker helpers on this assumption and > - * fall back to using the host version of the helper wherever possible. > - * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back > - * to using the host version, since it is guaranteed it is not folded at host. > - * > - * If the condition breaks in the future, we can rearrange the host level > - * definitions and reuse them for stage2. Till then... > - */ > -#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS > -#error "Unsupported combination of guest IPA and host VA_BITS." > -#endif > - > - > /* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */ > #define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm)) > #define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm)) > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c > index 0393bb974b23..c9640159e11f 100644 > --- a/arch/arm64/kvm/reset.c > +++ b/arch/arm64/kvm/reset.c > @@ -89,6 +89,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_VCPU_EVENTS: > r = 1; > break; > + case KVM_CAP_ARM_VM_PHYS_SHIFT: > + r = kvm_ipa_limit; > + break; > default: > r = 0; > } > @@ -190,16 +193,25 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) > { > u64 vtcr = VTCR_EL2_FLAGS; > u64 parange; > - u8 lvls; > + u8 lvls, ipa_shift; > > - if (type) > + if (type & ~KVM_VM_TYPE_ARM_PHYS_SHIFT_MASK) > return -EINVAL; > > + ipa_shift = KVM_VM_TYPE_ARM_PHYS_SHIFT(type); > + if (ipa_shift) { > + if (ipa_shift > kvm_ipa_limit || > + ipa_shift < 32) > + return -EINVAL; > + } else { > + ipa_shift = KVM_PHYS_SHIFT; > + } > + > /* > * Use a minimum 2 level page table to prevent splitting > * host PMD huge pages at stage2. > */ > - lvls = stage2_pgtable_levels(KVM_PHYS_SHIFT); > + lvls = stage2_pgtable_levels(ipa_shift); > if (lvls < 2) > lvls = 2; > > @@ -221,7 +233,7 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) > VTCR_EL2_VS_8BIT; > > vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); > - vtcr |= VTCR_EL2_T0SZ(KVM_PHYS_SHIFT); > + vtcr |= VTCR_EL2_T0SZ(ipa_shift); > > kvm->arch.vtcr = vtcr; > return 0; > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 07548de5c988..2a6b29c446db 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -750,6 +750,15 @@ struct kvm_ppc_resize_hpt { > > #define KVM_S390_SIE_PAGE_OFFSET 1 > > +/* > + * On arm64, machine type can be used to request the physical > + * address size for the VM. Bits[7-0] has been reserved for the PA s/has been reserved/are? Thanks Eric > + * size shift (i.e, log2(PA_Size)). For backward compatibility, > + * value 0 implies the default IPA size, 40bits. > + */ > +#define KVM_VM_TYPE_ARM_PHYS_SHIFT_MASK 0xffULL > +#define KVM_VM_TYPE_ARM_PHYS_SHIFT(x) \ > + ((x) & KVM_VM_TYPE_ARM_PHYS_SHIFT_MASK) > /* > * ioctls for /dev/kvm fds: > */ > @@ -952,6 +961,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_S390_HPAGE_1M 156 > #define KVM_CAP_NESTED_STATE 157 > #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 > +#define KVM_CAP_ARM_VM_PHYS_SHIFT 159 /* returns maximum PA shift for a VM */ > > #ifdef KVM_CAP_IRQ_ROUTING > >