Re: [PATCH v5] arm64: fix VTTBR_BADDR_MASK

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Joel,

On Mon, Aug 18 2014 at  9:36:04 pm BST, Joel Schopp <joel.schopp@xxxxxxx> wrote:
> The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current
> systems.  Rather than just add a bit it seems like a good time to also set
> things at run-time instead of compile time to accomodate more hardware.
>
> This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime,
> not compile time.
>
> In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address
> size (VTCR_EL2.T0SZE) cannot be determined in compile time since they
> depend on hardware capability.
>
> According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document,
> vttbr_x is calculated using different fixed values with consideration
> of T0SZ, granule size and the level of translation tables. Therefore,
> vttbr_baddr_mask should be determined dynamically.
>
> Changes since v4:
> More minor cleanups from review
> Moved some functions into headers
>
> Changes since v3:
> Another rebase
> Addressed minor comments from v2
>
> Changes since v2:
> Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch
>
> Changes since v1:
> Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to
> provide better long term fix.  Updated that patch to log error instead of
> silently fail on unaligned vttbr.
>
> Cc: Christoffer Dall <christoffer.dall@xxxxxxxxxx>
> Cc: Sungjinn Chung <sungjinn.chung@xxxxxxxxxxx>
> Signed-off-by: Jungseok Lee <jays.lee@xxxxxxxxxxx>
> Signed-off-by: Joel Schopp <joel.schopp@xxxxxxx>
> ---
>  arch/arm/include/asm/kvm_mmu.h   |   12 ++++++
>  arch/arm/kvm/arm.c               |   17 +++++++-
>  arch/arm64/include/asm/kvm_arm.h |   17 +-------
>  arch/arm64/include/asm/kvm_mmu.h |   78 ++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kvm/hyp-init.S        |   20 +++++++---
>  5 files changed, 122 insertions(+), 22 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 5c7aa3c..73f6ff6 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
>
>  void stage2_flush_vm(struct kvm *kvm);
>
> +static inline int kvm_get_phys_addr_shift(void)
> +{
> +       return KVM_PHYS_SHIFT;
> +}
> +
> +static inline int set_vttbr_baddr_mask(void)
> +{
> +       vttbr_baddr_mask = VTTBR_BADDR_MASK;

Have you tried compiling this?

Apart from the obvious missing definition of the variable, I'm not fond
of functions with side-effects hidden in an include file. What is wrong
with just returning the mask and letting the common code setting it?

> +       return 0;
> +}
> +
> +
>  #endif /* !__ASSEMBLY__ */
>
>  #endif /* __ARM_KVM_MMU_H__ */
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 3c82b37..f396eb7 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -37,6 +37,7 @@
>  #include <asm/mman.h>
>  #include <asm/tlbflush.h>
>  #include <asm/cacheflush.h>
> +#include <asm/cputype.h>
>  #include <asm/virt.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_asm.h>
> @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm)
>         /* update vttbr to be used with the new vmid */
>         pgd_phys = virt_to_phys(kvm->arch.pgd);
>         vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
> -       kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
> -       kvm->arch.vttbr |= vmid;
> +
> +       /*
> +        * If the VTTBR isn't aligned there is something wrong with the system
> +        * or kernel.
> +        */
> +       BUG_ON(pgd_phys & ~vttbr_baddr_mask);
> +
> +       kvm->arch.vttbr = pgd_phys | vmid;
>
>         spin_unlock(&kvm_vmid_lock);
>  }
> @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque)
>                 }
>         }
>
> +       err = set_vttbr_baddr_mask();
> +       if (err) {
> +               kvm_err("Cannot set vttbr_baddr_mask\n");
> +               return -EINVAL;
> +       }
> +
>         cpu_notifier_register_begin();
>
>         err = init_hyp_mode();
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 3d69030..8dbef70 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -94,7 +94,6 @@
>  /* TCR_EL2 Registers bits */
>  #define TCR_EL2_TBI    (1 << 20)
>  #define TCR_EL2_PS     (7 << 16)
> -#define TCR_EL2_PS_40B (2 << 16)
>  #define TCR_EL2_TG0    (1 << 14)
>  #define TCR_EL2_SH0    (3 << 12)
>  #define TCR_EL2_ORGN0  (3 << 10)
> @@ -103,8 +102,6 @@
>  #define TCR_EL2_MASK   (TCR_EL2_TG0 | TCR_EL2_SH0 | \
>                          TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
>
> -#define TCR_EL2_FLAGS  (TCR_EL2_PS_40B)
> -
>  /* VTCR_EL2 Registers bits */
>  #define VTCR_EL2_PS_MASK       (7 << 16)
>  #define VTCR_EL2_TG0_MASK      (1 << 14)
> @@ -119,36 +116,28 @@
>  #define VTCR_EL2_SL0_MASK      (3 << 6)
>  #define VTCR_EL2_SL0_LVL1      (1 << 6)
>  #define VTCR_EL2_T0SZ_MASK     0x3f
> -#define VTCR_EL2_T0SZ_40B      24
> +#define VTCR_EL2_T0SZ(bits)    (64 - (bits))
>
>  #ifdef CONFIG_ARM64_64K_PAGES
>  /*
>   * Stage2 translation configuration:
> - * 40bits output (PS = 2)
> - * 40bits input  (T0SZ = 24)
>   * 64kB pages (TG0 = 1)
>   * 2 level page tables (SL = 1)
>   */
>  #define VTCR_EL2_FLAGS         (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
>                                  VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
> -                                VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
> -#define VTTBR_X                (38 - VTCR_EL2_T0SZ_40B)
> +                                VTCR_EL2_SL0_LVL1)
>  #else
>  /*
>   * Stage2 translation configuration:
> - * 40bits output (PS = 2)
> - * 40bits input  (T0SZ = 24)
>   * 4kB pages (TG0 = 0)
>   * 3 level page tables (SL = 1)
>   */
>  #define VTCR_EL2_FLAGS         (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
>                                  VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
> -                                VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
> -#define VTTBR_X                (37 - VTCR_EL2_T0SZ_40B)
> +                                VTCR_EL2_SL0_LVL1)
>  #endif
>
> -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
> -#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
>  #define VTTBR_VMID_SHIFT  (48LLU)
>  #define VTTBR_VMID_MASK          (0xffLLU << VTTBR_VMID_SHIFT)
>
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 7d29847..b6ae83b 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -152,5 +152,83 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
>
>  void stage2_flush_vm(struct kvm *kvm);
>
> +/*
> + * ARMv8 64K architecture limitations:
> + * 16 <= T0SZ <= 21 is valid under 3 level of translation tables
> + * 18 <= T0SZ <= 34 is valid under 2 level of translation tables
> + * 31 <= T0SZ <= 39 is valid under 1 level of transltaion tables
> + *
> + * ARMv8 4K architecture limitations:
> + * 16 <= T0SZ <= 24 is valid under 4 level of translation tables
> + * 21 <= T0SZ <= 33 is valid under 3 level of translation tables
> + * 30 <= T0SZ <= 39 is valid under 2 level of translation tables
> + *
> + * For 4K pages we only support 3 or 4 level, giving T0SZ a range of 16 to 33.
> + * For 64K pages we only support 2 or 3 level, giving T0SZ a range of 16 to 34.
> + *
> + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out
> + * the origin of the hardcoded values, 38 and 37.
> + */
> +
> +#ifdef CONFIG_ARM64_64K_PAGES
> +static inline int t0sz_to_vttbr_x(int t0sz)
> +{
> +       if (t0sz < 16 || t0sz > 34) {
> +               kvm_err("Cannot support %d-bit address space\n", 64 - t0sz);
> +               return 0;

0 is definitely a bad value for something that is an error
case. Consider -EINVAL instead.

Also, what if we're in a range that only deals with more levels of page
tables than the kernel can deal with (remember we use the kernel page
table accessors)? See the new ARM64_VA_BITS and ARM64_PGTABLE_LEVELS
symbols that are now available, and use them to validate the range you
have.

> +       }
> +
> +       return 38 - t0sz;
> +}
> +#else /* 4K pages */
> +static inline int t0sz_to_vttbr_x(int t0sz)
> +{
> +       if (t0sz < 16 || t0sz > 33) {
> +               kvm_err("Cannot support %d-bit address space\n", 64 - t0sz);
> +               return 0;

Same here.

> +       }
> +       return 37 - t0sz;
> +}
> +#endif
> +static inline int kvm_get_phys_addr_shift(void)
> +{
> +       int pa_range = read_cpuid(ID_AA64MMFR0_EL1) & 0xf;
> +
> +       switch (pa_range) {
> +       case 0: return 32;
> +       case 1: return 36;
> +       case 2: return 40;
> +       case 3: return 42;
> +       case 4: return 44;
> +       case 5: return 48;
> +       default:
> +               BUG();
> +               return 0;
> +       }
> +}
> +
> +static u64 vttbr_baddr_mask;

Now every compilation unit that includes kvm_mmu.h has an instance of
this variable. I doubt that it is the intended effect.

> +
> +/**
> + * set_vttbr_baddr_mask - set mask value for vttbr base address
> + *
> + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the
> + * stage2 input address size depends on hardware capability. Thus, we first
> + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with
> + * consideration of both the granule size and the level of translation tables.
> + */
> +static inline int set_vttbr_baddr_mask(void)
> +{
> +       int t0sz, vttbr_x;
> +
> +       t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift());
> +       vttbr_x = t0sz_to_vttbr_x(t0sz);
> +       if (!vttbr_x)
> +               return -EINVAL;
> +       vttbr_baddr_mask = (((1LLU << (48 - vttbr_x)) - 1) << (vttbr_x - 1));

I think this can now be written as GENMASK_ULL(48, (vttbr_x - 1)).

> +       return 0;
> +}
> +
>  #endif /* __ASSEMBLY__ */
>  #endif /* __ARM64_KVM_MMU_H__ */
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index d968796..c0f7634 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -63,17 +63,21 @@ __do_hyp_init:
>         mrs     x4, tcr_el1
>         ldr     x5, =TCR_EL2_MASK
>         and     x4, x4, x5
> -       ldr     x5, =TCR_EL2_FLAGS
> -       orr     x4, x4, x5
> -       msr     tcr_el2, x4
> -
> -       ldr     x4, =VTCR_EL2_FLAGS
>         /*
>          * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
> -        * VTCR_EL2.
> +        * TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2.
>          */
>         mrs     x5, ID_AA64MMFR0_EL1
>         bfi     x4, x5, #16, #3
> +       msr     tcr_el2, x4
> +
> +       ldr     x4, =VTCR_EL2_FLAGS
> +       bfi     x4, x5, #16, #3
> +       and     x5, x5, #0xf
> +       adr     x6, t0sz
> +       add     x6, x6, x5, lsl #2
> +       ldr     w5, [x6]
> +       orr     x4, x4, x5

You'll need to validate the T0SZ value, and possibly adjust it so that
it is compatible with the addressing capability of the kernel. That
probably require a slight change of the hyp-init API.

>         msr     vtcr_el2, x4
>
>         mrs     x4, mair_el1
> @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */
>
>         /* Hello, World! */
>         eret
> +
> +t0sz:
> +       .word   VTCR_EL2_T0SZ(32), VTCR_EL2_T0SZ(36), VTCR_EL2_T0SZ(40)
> +       .word   VTCR_EL2_T0SZ(42), VTCR_EL2_T0SZ(44), VTCR_EL2_T0SZ(48)
>  ENDPROC(__kvm_hyp_init)
>
>         .ltorg
>

Another element that doesn't appear in this patch is that we need a way
for the kernel to expose the maximum input address to userspace (and
validate that noone puts memory outside of that range). This should be a
separate patch, but it is conceptually tied to the same problem.

Thanks,

        M.
-- 
Jazz is not dead. It just smells funny.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux