Re: [PATCH 1/2] KVM: arm64: Infer the PA offset from IPA in stage-2 map walker

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 21 Apr 2023 08:16:05 +0100,
Oliver Upton <oliver.upton@xxxxxxxxx> wrote:
> 
> Until now, the page table walker counted increments to the PA and IPA
> of a walk in two separate places. While the PA is incremented as soon as
> a leaf PTE is installed in stage2_map_walker_try_leaf(), the IPA is
> actually bumped in the generic table walker context. Critically,
> __kvm_pgtable_visit() rereads the PTE after the LEAF callback returns
> to work out if a table or leaf was installed, and only bumps the IPA for
> a leaf PTE.
> 
> This arrangement worked fine when we handled faults behind the write lock,
> as the walker had exclusive access to the stage-2 page tables. However,
> commit 1577cb5823ce ("KVM: arm64: Handle stage-2 faults in parallel")
> started handling all stage-2 faults behind the read lock, opening up a
> race where a walker could increment the PA but not the IPA of a walk.
> Nothing good ensues, as the walker starts mapping with the incorrect
> IPA -> PA relationship.
> 
> For example, assume that two vCPUs took a data abort on the same IPA.
> One observes that dirty logging is disabled, and the other observed that
> it is enabled:
> 
>   vCPU attempting PMD mapping		  vCPU attempting PTE mapping
>   ======================================  =====================================
>   /* install PMD */
>   stage2_make_pte(ctx, leaf);
>   data->phys += granule;
>   					  /* replace PMD with a table */
>   					  stage2_try_break_pte(ctx, data->mmu);
> 					  stage2_make_pte(ctx, table);
>   /* table is observed */
>   ctx.old = READ_ONCE(*ptep);
>   table = kvm_pte_table(ctx.old, level);
> 
>   /*
>    * map walk continues w/o incrementing
>    * IPA.
>    */
>    __kvm_pgtable_walk(..., level + 1);
> 
> Bring an end to the whole mess by using the IPA as the single source of
> truth for how far along a walk has gotten. Work out the correct PA to
> map by calculating the IPA offset from the beginning of the walk and add
> that to the starting physical address.
> 
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: 1577cb5823ce ("KVM: arm64: Handle stage-2 faults in parallel")
> Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  1 +
>  arch/arm64/kvm/hyp/pgtable.c         | 32 ++++++++++++++++++++++++----
>  2 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 4cd6762bda80..dc3c072e862f 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
>  	kvm_pte_t				old;
>  	void					*arg;
>  	struct kvm_pgtable_mm_ops		*mm_ops;
> +	u64					start;
>  	u64					addr;
>  	u64					end;
>  	u32					level;
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 3d61bd3e591d..140f82300db5 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -58,6 +58,7 @@
>  struct kvm_pgtable_walk_data {
>  	struct kvm_pgtable_walker	*walker;
>  
> +	u64				start;
>  	u64				addr;
>  	u64				end;
>  };
> @@ -201,6 +202,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
>  		.old	= READ_ONCE(*ptep),
>  		.arg	= data->walker->arg,
>  		.mm_ops	= mm_ops,
> +		.start	= data->start,
>  		.addr	= data->addr,
>  		.end	= data->end,
>  		.level	= level,
> @@ -293,6 +295,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
>  		     struct kvm_pgtable_walker *walker)
>  {
>  	struct kvm_pgtable_walk_data walk_data = {
> +		.start	= ALIGN_DOWN(addr, PAGE_SIZE),
>  		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
>  		.end	= PAGE_ALIGN(walk_data.addr + size),
>  		.walker	= walker,
> @@ -794,20 +797,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
>  	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
>  }
>  
> +static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
> +				       const struct stage2_map_data *data)
> +{
> +	u64 phys = data->phys;
> +
> +	/*
> +	 * Stage-2 walks to update ownership data are communicated to the map
> +	 * walker using an invalid PA. Avoid offsetting an already invalid PA,
> +	 * which could overflow and make the address valid again.
> +	 */
> +	if (!kvm_phys_is_valid(phys))
> +		return phys;
> +
> +	/*
> +	 * Otherwise, work out the correct PA based on how far the walk has
> +	 * gotten.
> +	 */
> +	return phys + (ctx->addr - ctx->start);
> +}
> +
>  static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
>  					struct stage2_map_data *data)
>  {
> +	u64 phys = stage2_map_walker_phys_addr(ctx, data);
> +
>  	if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
>  		return false;
>  
> -	return kvm_block_mapping_supported(ctx, data->phys);
> +	return kvm_block_mapping_supported(ctx, phys);
>  }
>  
>  static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
>  				      struct stage2_map_data *data)
>  {
>  	kvm_pte_t new;
> -	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
> +	u64 phys = stage2_map_walker_phys_addr(ctx, data);
> +	u64 granule = kvm_granule_size(ctx->level);
>  	struct kvm_pgtable *pgt = data->mmu->pgt;
>  	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
>  
> @@ -841,8 +867,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
>  
>  	stage2_make_pte(ctx, new);
>  
> -	if (kvm_phys_is_valid(phys))
> -		data->phys += granule;
>  	return 0;
>  }
>  

So my conclusion is that after these two patches, data->phys should
never be updated, right? Then I'd suggest an additional patch to
constify a couple of things and make sure we don't accidentally update
them. Something like the patch below (compile-tested only).

Thanks,

	M.

>From a2eb08ce793c1cf01c79df13a619815e9d7c1d41 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@xxxxxxxxxx>
Date: Fri, 21 Apr 2023 10:18:34 +0100
Subject: [PATCH] KVM: arm64: Constify start/end/phys fields of the pgtable
 walker data

As we are revamping the way the pgtable walker evaluates some of the
data, make it clear that we rely on somew of the fields to be constant
across the lifetime of a walk.

For this, flag the start, end and pjys fields of the walk data as
'const', which will generate an error if we were to accidentally
update these fields again.

Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx>
---
 arch/arm64/kvm/hyp/pgtable.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 356a3fd5220c..5282cb9ca4cf 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -58,9 +58,9 @@
 struct kvm_pgtable_walk_data {
 	struct kvm_pgtable_walker	*walker;
 
-	u64				start;
+	const u64			start;
 	u64				addr;
-	u64				end;
+	const u64			end;
 };
 
 static bool kvm_phys_is_valid(u64 phys)
@@ -352,7 +352,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
 }
 
 struct hyp_map_data {
-	u64				phys;
+	const u64			phys;
 	kvm_pte_t			attr;
 };
 
@@ -578,7 +578,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
 }
 
 struct stage2_map_data {
-	u64				phys;
+	const u64			phys;
 	kvm_pte_t			attr;
 	u8				owner_id;
 
-- 
2.34.1

-- 
Without deviation from the norm, progress is not possible.




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux