Re: [PATCH v3 09/20] kvm: arm64: Make stage2 page table layout dynamic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Suzuki,

On 06/29/2018 01:15 PM, Suzuki K Poulose wrote:
> So far we had a static stage2 page table handling code, based on a
> fixed IPA of 40bits. As we prepare for a configurable IPA size per
> VM, make our stage2 page table code dynamic, to do the right thing
> for a given VM. We ensure the existing condition is always true even
> when we lift the limit on the IPA. i.e,
> 
> 	page table levels in stage1 >= page table levels in stage2
> 
> Support for the IPA size configuration needs other changes in the way
> we configure the EL2 registers (VTTBR and VTCR). So, the IPA is still
> fixed to 40bits. The patch also moves the kvm_page_empty() in asm/kvm_mmu.h
> to the top, before including the asm/stage2_pgtable.h to avoid a forward
> declaration.
> 
> Cc: Marc Zyngier <marc.zyngier@xxxxxxx>
> Cc: Christoffer Dall <cdall@xxxxxxxxxx>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
> ---
> Changes since V2
>  - Restrict the stage2 page table to allow reusing the host page table
>    helpers for now, until we get stage1 independent page table helpers.
I would move this up in the commit msg to motivate the fact we enforce
the able condition.
> ---
>  arch/arm64/include/asm/kvm_mmu.h              |  14 +-
>  arch/arm64/include/asm/stage2_pgtable-nopmd.h |  42 ------
>  arch/arm64/include/asm/stage2_pgtable-nopud.h |  39 -----
>  arch/arm64/include/asm/stage2_pgtable.h       | 207 +++++++++++++++++++-------
>  4 files changed, 159 insertions(+), 143 deletions(-)
>  delete mode 100644 arch/arm64/include/asm/stage2_pgtable-nopmd.h
>  delete mode 100644 arch/arm64/include/asm/stage2_pgtable-nopud.h

with my very limited knowledge of S2 page table walkers I fail to
understand why we now can get rid of stage2_pgtable-nopmd.h and
stage2_pgtable-nopud.h and associated FOLDED config. Please could you
explain it in the commit message?
> 
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index dbaf513..a351722 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -21,6 +21,7 @@
>  #include <asm/page.h>
>  #include <asm/memory.h>
>  #include <asm/cpufeature.h>
> +#include <asm/kvm_arm.h>
>  
>  /*
>   * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
> @@ -147,6 +148,13 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
>  #define kvm_phys_mask(kvm)		(kvm_phys_size(kvm) - _AC(1, ULL))
>  #define kvm_vttbr_baddr_mask(kvm)	VTTBR_BADDR_MASK
>  
> +static inline bool kvm_page_empty(void *ptr)
> +{
> +	struct page *ptr_page = virt_to_page(ptr);
> +
> +	return page_count(ptr_page) == 1;
> +}
> +
>  #include <asm/stage2_pgtable.h>
>  
>  int create_hyp_mappings(void *from, void *to, pgprot_t prot);
> @@ -237,12 +245,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
>  	return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
>  }
>  
> -static inline bool kvm_page_empty(void *ptr)
> -{
> -	struct page *ptr_page = virt_to_page(ptr);
> -	return page_count(ptr_page) == 1;
> -}
> -
>  #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
>  
>  #ifdef __PAGETABLE_PMD_FOLDED
> diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
> deleted file mode 100644
> index 0280ded..0000000
> --- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h
> +++ /dev/null
> @@ -1,42 +0,0 @@
> -/*
> - * Copyright (C) 2016 - ARM Ltd
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - *
> - * You should have received a copy of the GNU General Public License
> - * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> - */
> -
> -#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
> -#define __ARM64_S2_PGTABLE_NOPMD_H_
> -
> -#include <asm/stage2_pgtable-nopud.h>
> -
> -#define __S2_PGTABLE_PMD_FOLDED
> -
> -#define S2_PMD_SHIFT		S2_PUD_SHIFT
> -#define S2_PTRS_PER_PMD		1
> -#define S2_PMD_SIZE		(1UL << S2_PMD_SHIFT)
> -#define S2_PMD_MASK		(~(S2_PMD_SIZE-1))
> -
> -#define stage2_pud_none(kvm, pud)		(0)
> -#define stage2_pud_present(kvm, pud)		(1)
> -#define stage2_pud_clear(kvm, pud)		do { } while (0)
> -#define stage2_pud_populate(kvm, pud, pmd)	do { } while (0)
> -#define stage2_pmd_offset(kvm, pud, address)	((pmd_t *)(pud))
> -
> -#define stage2_pmd_free(kvm, pmd)		do { } while (0)
> -
> -#define stage2_pmd_addr_end(kvm, addr, end)	(end)
> -
> -#define stage2_pud_huge(kvm, pud)		(0)
> -#define stage2_pmd_table_empty(kvm, pmdp)	(0)
> -
> -#endif
> diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
> deleted file mode 100644
> index cd6304e..0000000
> --- a/arch/arm64/include/asm/stage2_pgtable-nopud.h
> +++ /dev/null
> @@ -1,39 +0,0 @@
> -/*
> - * Copyright (C) 2016 - ARM Ltd
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - * GNU General Public License for more details.
> - *
> - * You should have received a copy of the GNU General Public License
> - * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> - */
> -
> -#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
> -#define __ARM64_S2_PGTABLE_NOPUD_H_
> -
> -#define __S2_PGTABLE_PUD_FOLDED
> -
> -#define S2_PUD_SHIFT		S2_PGDIR_SHIFT
> -#define S2_PTRS_PER_PUD		1
> -#define S2_PUD_SIZE		(_AC(1, UL) << S2_PUD_SHIFT)
> -#define S2_PUD_MASK		(~(S2_PUD_SIZE-1))
> -
> -#define stage2_pgd_none(kvm, pgd)		(0)
> -#define stage2_pgd_present(kvm, pgd)		(1)
> -#define stage2_pgd_clear(kvm, pgd)		do { } while (0)
> -#define stage2_pgd_populate(kvm, pgd, pud)	do { } while (0)
> -
> -#define stage2_pud_offset(kvm, pgd, address)	((pud_t *)(pgd))
> -
> -#define stage2_pud_free(kvm, x)			do { } while (0)
> -
> -#define stage2_pud_addr_end(kvm, addr, end)	(end)
> -#define stage2_pud_table_empty(kvm, pmdp)	(0)
> -
> -#endif
> diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
> index 057a405..ffc37cc 100644
> --- a/arch/arm64/include/asm/stage2_pgtable.h
> +++ b/arch/arm64/include/asm/stage2_pgtable.h
> @@ -19,8 +19,12 @@
>  #ifndef __ARM64_S2_PGTABLE_H_
>  #define __ARM64_S2_PGTABLE_H_
>  
> +#include <linux/hugetlb.h>
>  #include <asm/pgtable.h>
>  
> +/* The PGDIR shift for a given page table with "n" levels. */
> +#define pt_levels_pgdir_shift(n)	ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (n))
> +
>  /*
>   * The hardware supports concatenation of up to 16 tables at stage2 entry level
>   * and we use the feature whenever possible.
> @@ -29,118 +33,209 @@
>   * On arm64, the smallest PAGE_SIZE supported is 4k, which means
>   *             (PAGE_SHIFT - 3) > 4 holds for all page sizes.
Trying to understand that comment. Why do we compare to 4?
>   * This implies, the total number of page table levels at stage2 expected
> - * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
> + * by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
although understandable, is IPA_SHIFT defined somewhere?
>   * in normal translations(e.g, stage1), since we cannot have another level in
> - * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
> + * the range (IPA_SHIFT, IPA_SHIFT - 4).
I fail to understand the above comment. Could you give a pointer to the
spec?
>   */
> -#define STAGE2_PGTABLE_LEVELS		ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
> +#define stage2_pt_levels(ipa_shift)	ARM64_HW_PGTABLE_LEVELS((ipa_shift) - 4)
>  
>  /*
> - * With all the supported VA_BITs and 40bit guest IPA, the following condition
> - * is always true:
> + * With all the supported VA_BITs and guest IPA, the following condition
> + * must be always true:
>   *
> - *       STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
> + *       stage2_pt_levels <= CONFIG_PGTABLE_LEVELS
>   *
>   * We base our stage-2 page table walker helpers on this assumption and
>   * fall back to using the host version of the helper wherever possible.
>   * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
>   * to using the host version, since it is guaranteed it is not folded at host.
>   *
> - * If the condition breaks in the future, we can rearrange the host level
> - * definitions and reuse them for stage2. Till then...
> + * If the condition breaks in the future, we need completely independent
> + * page table helpers. Till then...
>   */
> -#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
> +
> +#if stage2_pt_levels(KVM_PHYS_SHIFT) > CONFIG_PGTABLE_LEVELS
>  #error "Unsupported combination of guest IPA and host VA_BITS."
>  #endif
>  
> -/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
> -#define S2_PGDIR_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
> -#define S2_PGDIR_SIZE			(_AC(1, UL) << S2_PGDIR_SHIFT)
> -#define S2_PGDIR_MASK			(~(S2_PGDIR_SIZE - 1))
> -
>  /*
>   * The number of PTRS across all concatenated stage2 tables given by the
>   * number of bits resolved at the initial level.
>   */
> -#define PTRS_PER_S2_PGD			(1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
> +#define __s2_pgd_ptrs(pa, lvls)	(1 << ((pa) - pt_levels_pgdir_shift((lvls))))
> +#define __s2_pgd_size(pa, lvls)	(__s2_pgd_ptrs((pa), (lvls)) * sizeof(pgd_t))
> +
> +#define kvm_stage2_levels(kvm)		stage2_pt_levels(kvm_phys_shift(kvm))
> +#define stage2_pgdir_shift(kvm)	\
> +		pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
> +#define stage2_pgdir_size(kvm)		(_AC(1, UL) << stage2_pgdir_shift((kvm)))
> +#define stage2_pgdir_mask(kvm)		(~(stage2_pgdir_size((kvm)) - 1))
> +#define stage2_pgd_ptrs(kvm)	\
> +	__s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
> +
> +#define stage2_pgd_size(kvm)	__s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
>  
>  /*
>   * kvm_mmmu_cache_min_pages is the number of stage2 page table translation
>   * levels in addition to the PGD.
>   */
> -#define kvm_mmu_cache_min_pages(kvm)	(STAGE2_PGTABLE_LEVELS - 1)
> +#define kvm_mmu_cache_min_pages(kvm)	(kvm_stage2_levels(kvm) - 1)
>  
>  
> -#if STAGE2_PGTABLE_LEVELS > 3
> +/* PUD/PMD definitions if present */
> +#define __S2_PUD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
> +#define __S2_PUD_SIZE			(_AC(1, UL) << __S2_PUD_SHIFT)
> +#define __S2_PUD_MASK			(~(__S2_PUD_SIZE - 1))
>  
> -#define S2_PUD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
> -#define S2_PUD_SIZE			(_AC(1, UL) << S2_PUD_SHIFT)
> -#define S2_PUD_MASK			(~(S2_PUD_SIZE - 1))
> +#define __S2_PMD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
> +#define __S2_PMD_SIZE			(_AC(1, UL) << __S2_PMD_SHIFT)
> +#define __S2_PMD_MASK			(~(__S2_PMD_SIZE - 1))
Is this renaming mandatory?
>  
> -#define stage2_pgd_none(kvm, pgd)		pgd_none(pgd)
> -#define stage2_pgd_clear(kvm, pgd)		pgd_clear(pgd)
> -#define stage2_pgd_present(kvm, pgd)		pgd_present(pgd)
> -#define stage2_pgd_populate(kvm, pgd, pud)	pgd_populate(NULL, pgd, pud)
> -#define stage2_pud_offset(kvm, pgd, address)	pud_offset(pgd, address)
> -#define stage2_pud_free(kvm, pud)		pud_free(NULL, pud)
> +#define __s2_pud_index(addr) \
> +	(((addr) >> __S2_PUD_SHIFT) & (PTRS_PER_PTE - 1))
> +#define __s2_pmd_index(addr) \
> +	(((addr) >> __S2_PMD_SHIFT) & (PTRS_PER_PTE - 1))
>  
> -#define stage2_pud_table_empty(kvm, pudp)	kvm_page_empty(pudp)
> +#define __kvm_has_stage2_levels(kvm, min_levels)	\
> +  ((CONFIG_PGTABLE_LEVELS >= min_levels) && (kvm_stage2_levels(kvm) >= min_levels))
kvm_stage2_levels <= CONFIG_PGTABLE_LEVELS so you should just need to
check kvm_stage2_levels?
> +
> +#define kvm_stage2_has_pgd(kvm)	__kvm_has_stage2_levels(kvm, 4)
> +#define kvm_stage2_has_pud(kvm) __kvm_has_stage2_levels(kvm, 3)
> +
> +static inline int stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
> +{
> +	return kvm_stage2_has_pgd(kvm) ? pgd_none(pgd) : 0;
> +}
> +
> +static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
> +{
> +	if (kvm_stage2_has_pgd(kvm))
> +		pgd_clear(pgdp);
> +}
> +
> +static inline int stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
> +{
> +	return kvm_stage2_has_pgd(kvm) ? pgd_present(pgd) : 1;
> +}
> +
> +static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgdp, pud_t *pud)
> +{
> +	if (kvm_stage2_has_pgd(kvm))
> +		pgd_populate(NULL, pgdp, pud);
> +	else
> +		BUG();
> +}
> +
> +static inline pud_t *stage2_pud_offset(struct kvm *kvm,
> +				       pgd_t *pgd, unsigned long address)
> +{
> +	if (kvm_stage2_has_pgd(kvm)) {
> +		phys_addr_t pud_phys = pgd_page_paddr(*pgd);
> +
> +		pud_phys += __s2_pud_index(address) * sizeof(pud_t);
> +		return __va(pud_phys);
> +	}
> +	return (pud_t *)pgd;
> +}
> +
> +static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
> +{
> +	if (kvm_stage2_has_pgd(kvm))
> +		pud_free(NULL, pud);
> +}
> +
> +static inline int stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
> +{
> +	return kvm_stage2_has_pgd(kvm) && kvm_page_empty(pudp);
> +}
>  
>  static inline phys_addr_t
>  stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
>  {
> -	phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
> +	if (kvm_stage2_has_pgd(kvm)) {
> +		phys_addr_t boundary = (addr + __S2_PUD_SIZE) & __S2_PUD_MASK;
>  
> -	return (boundary - 1 < end - 1) ? boundary : end;
> +		return (boundary - 1 < end - 1) ? boundary : end;
> +	}
> +	return end;
>  }
>  
> -#endif		/* STAGE2_PGTABLE_LEVELS > 3 */
> +static inline int stage2_pud_none(struct kvm *kvm, pud_t pud)
> +{
> +	return kvm_stage2_has_pud(kvm) ? pud_none(pud) : 0;
> +}
>  
> +static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pudp)
> +{
> +	if (kvm_stage2_has_pud(kvm))
> +		pud_clear(pudp);
> +}
>  
> -#if STAGE2_PGTABLE_LEVELS > 2
> +static inline int stage2_pud_present(struct kvm *kvm, pud_t pud)
> +{
> +	return kvm_stage2_has_pud(kvm) ? pud_present(pud) : 1;
> +}
>  
> -#define S2_PMD_SHIFT			ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
> -#define S2_PMD_SIZE			(_AC(1, UL) << S2_PMD_SHIFT)
> -#define S2_PMD_MASK			(~(S2_PMD_SIZE - 1))
> +static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pudp, pmd_t *pmd)
> +{
> +	if (kvm_stage2_has_pud(kvm))
> +		pud_populate(NULL, pudp, pmd);
> +	else
> +		BUG();
> +}
>  
> -#define stage2_pud_none(kvm, pud)		pud_none(pud)
> -#define stage2_pud_clear(kvm, pud)		pud_clear(pud)
> -#define stage2_pud_present(kvm, pud)		pud_present(pud)
> -#define stage2_pud_populate(kvm, pud, pmd)	pud_populate(NULL, pud, pmd)
> -#define stage2_pmd_offset(kvm, pud, address)	pmd_offset(pud, address)
> -#define stage2_pmd_free(kvm, pmd)		pmd_free(NULL, pmd)
> +static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
> +					 pud_t *pud, unsigned long address)
> +{
> +	if (kvm_stage2_has_pud(kvm)) {
> +		phys_addr_t pmd_phys = pud_page_paddr(*pud);
>  
> -#define stage2_pud_huge(kvm, pud)		pud_huge(pud)
> -#define stage2_pmd_table_empty(kvm, pmdp)	kvm_page_empty(pmdp)
> +		pmd_phys += __s2_pmd_index(address) * sizeof(pmd_t);
> +		return __va(pmd_phys);
> +	}
> +	return (pmd_t *)pud;
> +}
> +
> +static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
> +{
> +	if (kvm_stage2_has_pud(kvm))
> +		pmd_free(NULL, pmd);
> +}
> +
> +static inline int stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
> +{
> +	return kvm_stage2_has_pud(kvm) && kvm_page_empty(pmdp);
> +}
>  
>  static inline phys_addr_t
>  stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
>  {
> -	phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
> +	if (kvm_stage2_has_pud(kvm)) {
> +		phys_addr_t boundary = (addr + __S2_PMD_SIZE) & __S2_PMD_MASK;
>  
> -	return (boundary - 1 < end - 1) ? boundary : end;
> +		return (boundary - 1 < end - 1) ? boundary : end;
> +	}
> +	return end;
>  }
>  
> -#endif		/* STAGE2_PGTABLE_LEVELS > 2 */
> +static inline int stage2_pud_huge(struct kvm *kvm, pud_t pud)
> +{
> +	return kvm_stage2_has_pud(kvm) ? pud_huge(pud) : 0;
> +}
>  
>  #define stage2_pte_table_empty(kvm, ptep)	kvm_page_empty(ptep)
>  
> -#if STAGE2_PGTABLE_LEVELS == 2
> -#include <asm/stage2_pgtable-nopmd.h>
> -#elif STAGE2_PGTABLE_LEVELS == 3
> -#include <asm/stage2_pgtable-nopud.h>
> -#endif
> -
> -#define stage2_pgd_size(kvm)	(PTRS_PER_S2_PGD * sizeof(pgd_t))
> -
> -#define stage2_pgd_index(kvm, addr) \
> -	(((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
> +static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
> +{
> +	return (addr >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1);
> +}
>  
>  static inline phys_addr_t
>  stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
>  {
> -	phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
> +	phys_addr_t boundary;
>  
> +	boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
>  	return (boundary - 1 < end - 1) ? boundary : end;
>  }
>  
> 

Globally this patch is pretty hard to review. I don't know if it is
possible to split into 2. 1) Addition of some helper macros. 2) removal
of nopud and nopmd and implementation of the corresponding macros?

Thanks

Eric



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux