[no subject]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Fuad and I can help add some details for the conversion. Hopefully we
can figure out some of the plan at plumbers this week.

Thanks,
Elliot

> 
> Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
> 
> ---
>  virt/kvm/guest_memfd.c | 131 ++++++++++++++++++++++++++++++++++++++++-
>  virt/kvm/kvm_main.c    |   2 +
>  virt/kvm/kvm_mm.h      |   7 +++
>  3 files changed, 139 insertions(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 110c4bbb004b..fb292e542381 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -129,13 +129,29 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo
>  }
>  
>  /**
> - * Use the uptodate flag to indicate that the folio is prepared for KVM's usage.
> + * Use folio's up-to-date flag to indicate that this folio is prepared for usage
> + * by the guest.
> + *
> + * This flag can be used whether the folio is prepared for PRIVATE or SHARED
> + * usage.
>   */
>  static inline void kvm_gmem_mark_prepared(struct folio *folio)
>  {
>  	folio_mark_uptodate(folio);
>  }
>  
> +/**
> + * Use folio's up-to-date flag to indicate that this folio is not yet prepared for
> + * usage by the guest.
> + *
> + * This flag can be used whether the folio is prepared for PRIVATE or SHARED
> + * usage.
> + */
> +static inline void kvm_gmem_clear_prepared(struct folio *folio)
> +{
> +	folio_clear_uptodate(folio);
> +}
> +
>  /*
>   * Process @folio, which contains @gfn, so that the guest can use it.
>   * The folio must be locked and the gfn must be contained in @slot.
> @@ -148,6 +164,12 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
>  	pgoff_t index;
>  	int r;
>  
> +	/*
> +	 * Defensively zero folio to avoid leaking kernel memory in
> +	 * uninitialized pages. This is important since pages can now be mapped
> +	 * into userspace, where hardware (e.g. TDX) won't be clearing those
> +	 * pages.
> +	 */
>  	if (folio_test_hugetlb(folio)) {
>  		folio_zero_user(folio, folio->index << PAGE_SHIFT);
>  	} else {
> @@ -1017,6 +1039,7 @@ static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
>  {
>  	struct inode *inode;
>  	struct folio *folio;
> +	bool is_prepared;
>  
>  	inode = file_inode(vmf->vma->vm_file);
>  	if (!kvm_gmem_is_faultable(inode, vmf->pgoff))
> @@ -1026,6 +1049,31 @@ static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
>  	if (!folio)
>  		return VM_FAULT_SIGBUS;
>  
> +	is_prepared = folio_test_uptodate(folio);
> +	if (!is_prepared) {
> +		unsigned long nr_pages;
> +		unsigned long i;
> +
> +		if (folio_test_hugetlb(folio)) {
> +			folio_zero_user(folio, folio->index << PAGE_SHIFT);
> +		} else {
> +			/*
> +			 * Defensively zero folio to avoid leaking kernel memory in
> +			 * uninitialized pages. This is important since pages can now be
> +			 * mapped into userspace, where hardware (e.g. TDX) won't be
> +			 * clearing those pages.
> +			 *
> +			 * Will probably need a version of kvm_gmem_prepare_folio() to
> +			 * prepare the page for SHARED use.
> +			 */
> +			nr_pages = folio_nr_pages(folio);
> +			for (i = 0; i < nr_pages; i++)
> +				clear_highpage(folio_page(folio, i));
> +		}
> +
> +		kvm_gmem_mark_prepared(folio);
> +	}
> +
>  	vmf->page = folio_file_page(folio, vmf->pgoff);
>  	return VM_FAULT_LOCKED;
>  }
> @@ -1593,6 +1641,87 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
>  }
>  EXPORT_SYMBOL_GPL(kvm_gmem_populate);
>  
> +static void kvm_gmem_clear_prepared_range(struct inode *inode, pgoff_t start,
> +					  pgoff_t end)
> +{
> +	pgoff_t index;
> +
> +	filemap_invalidate_lock_shared(inode->i_mapping);
> +
> +	/* TODO: replace iteration with filemap_get_folios() for efficiency. */
> +	for (index = start; index < end;) {
> +		struct folio *folio;
> +
> +		/* Don't use kvm_gmem_get_folio to avoid allocating */
> +		folio = filemap_lock_folio(inode->i_mapping, index);
> +		if (IS_ERR(folio)) {
> +			++index;
> +			continue;
> +		}
> +
> +		kvm_gmem_clear_prepared(folio);
> +
> +		index = folio_next_index(folio);
> +		folio_unlock(folio);
> +		folio_put(folio);
> +	}
> +
> +	filemap_invalidate_unlock_shared(inode->i_mapping);
> +}
> +
> +/**
> + * Clear the prepared flag for all folios in gfn range [@start, @end) in memslot
> + * @slot.
> + */
> +static void kvm_gmem_clear_prepared_slot(struct kvm_memory_slot *slot, gfn_t start,
> +					 gfn_t end)
> +{
> +	pgoff_t start_offset;
> +	pgoff_t end_offset;
> +	struct file *file;
> +
> +	file = kvm_gmem_get_file(slot);
> +	if (!file)
> +		return;
> +
> +	start_offset = start - slot->base_gfn + slot->gmem.pgoff;
> +	end_offset = end - slot->base_gfn + slot->gmem.pgoff;
> +
> +	kvm_gmem_clear_prepared_range(file_inode(file), start_offset, end_offset);
> +
> +	fput(file);
> +}
> +
> +/**
> + * Clear the prepared flag for all folios for any slot in gfn range
> + * [@start, @end) in @kvm.
> + */
> +void kvm_gmem_clear_prepared_vm(struct kvm *kvm, gfn_t start, gfn_t end)
> +{
> +	int i;
> +
> +	lockdep_assert_held(&kvm->slots_lock);
> +
> +	for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
> +		struct kvm_memslot_iter iter;
> +		struct kvm_memslots *slots;
> +
> +		slots = __kvm_memslots(kvm, i);
> +		kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) {
> +			struct kvm_memory_slot *slot;
> +			gfn_t gfn_start;
> +			gfn_t gfn_end;
> +
> +			slot = iter.slot;
> +			gfn_start = max(start, slot->base_gfn);
> +			gfn_end = min(end, slot->base_gfn + slot->npages);
> +
> +			if (iter.slot->flags & KVM_MEM_GUEST_MEMFD)
> +				kvm_gmem_clear_prepared_slot(iter.slot, gfn_start, gfn_end);
> +		}
> +	}
> +}
> +
>  /**
>   * Returns true if pages in range [@start, @end) in inode @inode have no
>   * userspace mappings.
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 1a7bbcc31b7e..255d27df7f5c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2565,6 +2565,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
>  		KVM_BUG_ON(r, kvm);
>  	}
>  
> +	kvm_gmem_clear_prepared_vm(kvm, start, end);
> +
>  	kvm_handle_gfn_range(kvm, &post_set_range);
>  
>  out_unlock:
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index d8ff2b380d0e..25fd0d9f66cc 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -43,6 +43,7 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
>  void kvm_gmem_unbind(struct kvm_memory_slot *slot);
>  int kvm_gmem_should_set_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
>  				   unsigned long attrs);
> +void kvm_gmem_clear_prepared_vm(struct kvm *kvm, gfn_t start, gfn_t end);
>  #else
>  static inline void kvm_gmem_init(struct module *module)
>  {
> @@ -68,6 +69,12 @@ static inline int kvm_gmem_should_set_attributes(struct kvm *kvm, gfn_t start,
>  	return 0;
>  }
>  
> +static inline void kvm_gmem_clear_prepared_slots(struct kvm *kvm,
> +						 gfn_t start, gfn_t end)
> +{
> +	WARN_ON_ONCE(1);
> +}
> +
>  #endif /* CONFIG_KVM_PRIVATE_MEM */
>  
>  #endif /* __KVM_MM_H__ */
> -- 
> 2.46.0.598.g6f2099f65c-goog
> 




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux