Re: [PATCH v5 08/14] KVM: arm64: Protect stage-2 traversal with RCU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Oliver,

On 07.11.2022 22:56, Oliver Upton wrote:
> Use RCU to safely walk the stage-2 page tables in parallel. Acquire and
> release the RCU read lock when traversing the page tables. Defer the
> freeing of table memory to an RCU callback. Indirect the calls into RCU
> and provide stubs for hypervisor code, as RCU is not available in such a
> context.
>
> The RCU protection doesn't amount to much at the moment, as readers are
> already protected by the read-write lock (all walkers that free table
> memory take the write lock). Nonetheless, a subsequent change will
> futher relax the locking requirements around the stage-2 MMU, thereby
> depending on RCU.
>
> Signed-off-by: Oliver Upton <oliver.upton@xxxxxxxxx>

This patch landed in today's linux-next (20221114) as commit 
c3119ae45dfb ("KVM: arm64: Protect stage-2 traversal with RCU"). 
Unfortunately it introduces a following warning:

--->8---

kvm [1]: IPA Size Limit: 40 bits
BUG: sleeping function called from invalid context at 
include/linux/sched/mm.h:274
in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0
preempt_count: 0, expected: 0
RCU nest depth: 1, expected: 0
2 locks held by swapper/0/1:
  #0: ffff80000a8a44d0 (kvm_hyp_pgd_mutex){+.+.}-{3:3}, at: 
__create_hyp_mappings+0x80/0xc4
  #1: ffff80000a927720 (rcu_read_lock){....}-{1:2}, at: 
kvm_pgtable_walk+0x0/0x1f4
CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc3+ #5918
Hardware name: Raspberry Pi 3 Model B (DT)
Call trace:
  dump_backtrace.part.0+0xe4/0xf0
  show_stack+0x18/0x40
  dump_stack_lvl+0x8c/0xb8
  dump_stack+0x18/0x34
  __might_resched+0x178/0x220
  __might_sleep+0x48/0xa0
  prepare_alloc_pages+0x178/0x1a0
  __alloc_pages+0x9c/0x109c
  alloc_page_interleave+0x1c/0xc4
  alloc_pages+0xec/0x160
  get_zeroed_page+0x1c/0x44
  kvm_hyp_zalloc_page+0x14/0x20
  hyp_map_walker+0xd4/0x134
  kvm_pgtable_visitor_cb.isra.0+0x38/0x5c
  __kvm_pgtable_walk+0x1a4/0x220
  kvm_pgtable_walk+0x104/0x1f4
  kvm_pgtable_hyp_map+0x80/0xc4
  __create_hyp_mappings+0x9c/0xc4
  kvm_mmu_init+0x144/0x1cc
  kvm_arch_init+0xe4/0xef4
  kvm_init+0x3c/0x3d0
  arm_init+0x20/0x30
  do_one_initcall+0x74/0x400
  kernel_init_freeable+0x2e0/0x350
  kernel_init+0x24/0x130
  ret_from_fork+0x10/0x20
kvm [1]: Hyp mode initialized successfully

--->8----

I looks that more changes in the KVM code are needed to use RCU for that 
code.

> ---
>   arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++
>   arch/arm64/kvm/hyp/pgtable.c         | 10 +++++-
>   arch/arm64/kvm/mmu.c                 | 14 +++++++-
>   3 files changed, 71 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index e70cf57b719e..7634b6964779 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0)
>   
>   typedef u64 kvm_pte_t;
>   
> +/*
> + * RCU cannot be used in a non-kernel context such as the hyp. As such, page
> + * table walkers used in hyp do not call into RCU and instead use other
> + * synchronization mechanisms (such as a spinlock).
> + */
> +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
> +
>   typedef kvm_pte_t *kvm_pteref_t;
>   
>   static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
> @@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
>   	return pteref;
>   }
>   
> +static inline void kvm_pgtable_walk_begin(void) {}
> +static inline void kvm_pgtable_walk_end(void) {}
> +
> +static inline bool kvm_pgtable_walk_lock_held(void)
> +{
> +	return true;
> +}
> +
> +#else
> +
> +typedef kvm_pte_t __rcu *kvm_pteref_t;
> +
> +static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
> +{
> +	return rcu_dereference_check(pteref, !shared);
> +}
> +
> +static inline void kvm_pgtable_walk_begin(void)
> +{
> +	rcu_read_lock();
> +}
> +
> +static inline void kvm_pgtable_walk_end(void)
> +{
> +	rcu_read_unlock();
> +}
> +
> +static inline bool kvm_pgtable_walk_lock_held(void)
> +{
> +	return rcu_read_lock_held();
> +}
> +
> +#endif
> +
>   #define KVM_PTE_VALID			BIT(0)
>   
>   #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
> @@ -202,11 +243,14 @@ struct kvm_pgtable {
>    *					children.
>    * @KVM_PGTABLE_WALK_TABLE_POST:	Visit table entries after their
>    *					children.
> + * @KVM_PGTABLE_WALK_SHARED:		Indicates the page-tables may be shared
> + *					with other software walkers.
>    */
>   enum kvm_pgtable_walk_flags {
>   	KVM_PGTABLE_WALK_LEAF			= BIT(0),
>   	KVM_PGTABLE_WALK_TABLE_PRE		= BIT(1),
>   	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
> +	KVM_PGTABLE_WALK_SHARED			= BIT(3),
>   };
>   
>   struct kvm_pgtable_visit_ctx {
> @@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx {
>   typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
>   					enum kvm_pgtable_walk_flags visit);
>   
> +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
> +{
> +	return ctx->flags & KVM_PGTABLE_WALK_SHARED;
> +}
> +
>   /**
>    * struct kvm_pgtable_walker - Hook into a page-table walk.
>    * @cb:		Callback function to invoke during the walk.
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 7c9782347570..d8d963521d4e 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
>   				  enum kvm_pgtable_walk_flags visit)
>   {
>   	struct kvm_pgtable_walker *walker = data->walker;
> +
> +	/* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
> +	WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
>   	return walker->cb(ctx, visit);
>   }
>   
> @@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
>   		.end	= PAGE_ALIGN(walk_data.addr + size),
>   		.walker	= walker,
>   	};
> +	int r;
> +
> +	kvm_pgtable_walk_begin();
> +	r = _kvm_pgtable_walk(pgt, &walk_data);
> +	kvm_pgtable_walk_end();
>   
> -	return _kvm_pgtable_walk(pgt, &walk_data);
> +	return r;
>   }
>   
>   struct leaf_walk_data {
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 73ae908eb5d9..52e042399ba5 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
>   
>   static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
>   
> +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
> +{
> +	struct page *page = container_of(head, struct page, rcu_head);
> +	void *pgtable = page_to_virt(page);
> +	u32 level = page_private(page);
> +
> +	kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
> +}
> +
>   static void stage2_free_removed_table(void *addr, u32 level)
>   {
> -	kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
> +	struct page *page = virt_to_page(addr);
> +
> +	set_page_private(page, (unsigned long)level);
> +	call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
>   }
>   
>   static void kvm_host_get_page(void *addr)

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux