Re: [RFCv2 14/16] KVM: Handle protected memory in __kvm_map_gfn()/__kvm_unmap_gfn()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2020-10-20 at 09:18 +0300, Kirill A. Shutemov wrote:
> We cannot access protected pages directly. Use ioremap() to
> create a temporary mapping of the page. The mapping is destroyed
> on __kvm_unmap_gfn().
> 
> The new interface gfn_to_pfn_memslot_protected() is used to detect if
> the page is protected.
> 
> ioremap_cache_force() is a hack to bypass IORES_MAP_SYSTEM_RAM check
> in
> the x86 ioremap code. We need a better solution.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> ---
>  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  2 +-
>  arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
>  arch/x86/include/asm/io.h              |  2 +
>  arch/x86/include/asm/pgtable_types.h   |  1 +
>  arch/x86/kvm/mmu/mmu.c                 |  6 ++-
>  arch/x86/mm/ioremap.c                  | 16 ++++++--
>  include/linux/kvm_host.h               |  3 +-
>  include/linux/kvm_types.h              |  1 +
>  virt/kvm/kvm_main.c                    | 52 +++++++++++++++++++-----
> --
>  9 files changed, 63 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 38ea396a23d6..8e06cd3f759c 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -590,7 +590,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu
> *vcpu,
>  	} else {
>  		/* Call KVM generic code to do the slow-path check */
>  		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
> -					   writing, &write_ok);
> +					   writing, &write_ok, NULL);
>  		if (is_error_noslot_pfn(pfn))
>  			return -EFAULT;
>  		page = NULL;
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index 22a677b18695..6fd4e3f9b66a 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct
> kvm_vcpu *vcpu,
>  
>  		/* Call KVM generic code to do the slow-path check */
>  		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
> -					   writing, upgrade_p);
> +					   writing, upgrade_p, NULL);
>  		if (is_error_noslot_pfn(pfn))
>  			return -EFAULT;
>  		page = NULL;
> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index c58d52fd7bf2..a3e1bfad1026 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -184,6 +184,8 @@ extern void __iomem *ioremap_uc(resource_size_t
> offset, unsigned long size);
>  #define ioremap_uc ioremap_uc
>  extern void __iomem *ioremap_cache(resource_size_t offset, unsigned
> long size);
>  #define ioremap_cache ioremap_cache
> +extern void __iomem *ioremap_cache_force(resource_size_t offset,
> unsigned long size);
> +#define ioremap_cache_force ioremap_cache_force
>  extern void __iomem *ioremap_prot(resource_size_t offset, unsigned
> long size, unsigned long prot_val);
>  #define ioremap_prot ioremap_prot
>  extern void __iomem *ioremap_encrypted(resource_size_t phys_addr,
> unsigned long size);
> diff --git a/arch/x86/include/asm/pgtable_types.h
> b/arch/x86/include/asm/pgtable_types.h
> index 816b31c68550..4c16a9583786 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -147,6 +147,7 @@ enum page_cache_mode {
>  	_PAGE_CACHE_MODE_UC       = 3,
>  	_PAGE_CACHE_MODE_WT       = 4,
>  	_PAGE_CACHE_MODE_WP       = 5,
> +	_PAGE_CACHE_MODE_WB_FORCE = 6,
>  
>  	_PAGE_CACHE_MODE_NUM      = 8
>  };
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 71aa3da2a0b7..162cb285b87b 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4058,7 +4058,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu,
> bool prefault, gfn_t gfn,
>  	}
>  
>  	async = false;
> -	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write,
> writable);
> +	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write,
> writable,
> +				    NULL);
>  	if (!async)
>  		return false; /* *pfn has correct page already */
>  
> @@ -4072,7 +4073,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu,
> bool prefault, gfn_t gfn,
>  			return true;
>  	}
>  
> -	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write,
> writable);
> +	*pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write,
> writable,
> +				    NULL);
>  	return false;
>  }
>  
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index 9e5ccc56f8e0..4409785e294c 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -202,9 +202,12 @@ __ioremap_caller(resource_size_t phys_addr,
> unsigned long size,
>  	__ioremap_check_mem(phys_addr, size, &io_desc);
>  
>  	/*
> -	 * Don't allow anybody to remap normal RAM that we're using..
> +	 * Don't allow anybody to remap normal RAM that we're using,
> unless
> +	 * _PAGE_CACHE_MODE_WB_FORCE is used.
>  	 */
> -	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
> +	if (pcm == _PAGE_CACHE_MODE_WB_FORCE) {
> +	    pcm = _PAGE_CACHE_MODE_WB;
> +	} else if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
>  		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
>  			  &phys_addr, &last_addr);
>  		return NULL;
> @@ -419,6 +422,13 @@ void __iomem *ioremap_cache(resource_size_t
> phys_addr, unsigned long size)
>  }
>  EXPORT_SYMBOL(ioremap_cache);
>  
> +void __iomem *ioremap_cache_force(resource_size_t phys_addr,
> unsigned long size)
> +{
> +	return __ioremap_caller(phys_addr, size,
> _PAGE_CACHE_MODE_WB_FORCE,
> +				__builtin_return_address(0), false);
> +}
> +EXPORT_SYMBOL(ioremap_cache_force);
> +
>  void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long
> size,
>  				unsigned long prot_val)
>  {
> @@ -467,7 +477,7 @@ void iounmap(volatile void __iomem *addr)
>  	p = find_vm_area((void __force *)addr);
>  
>  	if (!p) {
> -		printk(KERN_ERR "iounmap: bad address %p\n", addr);
> +		printk(KERN_ERR "iounmap: bad address %px\n", addr);

Unintentional?

>  		dump_stack();
>  		return;
>  	}
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6655e8da4555..0d5f3885747b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -238,6 +238,7 @@ struct kvm_host_map {
>  	void *hva;
>  	kvm_pfn_t pfn;
>  	kvm_pfn_t gfn;
> +	bool protected;
>  };
>  
>  /*
> @@ -725,7 +726,7 @@ kvm_pfn_t gfn_to_pfn_memslot(struct
> kvm_memory_slot *slot, gfn_t gfn);
>  kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot,
> gfn_t gfn);
>  kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t
> gfn,
>  			       bool atomic, bool *async, bool
> write_fault,
> -			       bool *writable);
> +			       bool *writable, bool *protected);
>  
>  void kvm_release_pfn_clean(kvm_pfn_t pfn);
>  void kvm_release_pfn_dirty(kvm_pfn_t pfn);
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index a7580f69dda0..0a8c6426b4f4 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -58,6 +58,7 @@ struct gfn_to_pfn_cache {
>  	gfn_t gfn;
>  	kvm_pfn_t pfn;
>  	bool dirty;
> +	bool protected;
>  };
>  
>  #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 9b569b78874a..7c2c764c28c5 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1852,9 +1852,10 @@ static bool hva_to_pfn_fast(unsigned long
> addr, bool write_fault,
>   * 1 indicates success, -errno is returned if error is detected.
>   */
>  static int hva_to_pfn_slow(unsigned long addr, bool *async, bool
> write_fault,
> -			   bool *writable, kvm_pfn_t *pfn)
> +			   bool *writable, bool *protected, kvm_pfn_t
> *pfn)
>  {
>  	unsigned int flags = FOLL_HWPOISON | FOLL_KVM;
> +	struct vm_area_struct *vma;
>  	struct page *page;
>  	int npages = 0;
>  
> @@ -1868,9 +1869,15 @@ static int hva_to_pfn_slow(unsigned long addr,
> bool *async, bool write_fault,
>  	if (async)
>  		flags |= FOLL_NOWAIT;
>  
> -	npages = get_user_pages_unlocked(addr, 1, &page, flags);
> -	if (npages != 1)
> +	mmap_read_lock(current->mm);
> +	npages = get_user_pages(addr, 1, flags, &page, &vma);
> +	if (npages != 1) {
> +		mmap_read_unlock(current->mm);
>  		return npages;
> +	}
> +	if (protected)
> +		*protected = vma_is_kvm_protected(vma);
> +	mmap_read_unlock(current->mm);
>  
>  	/* map read fault as writable if possible */
>  	if (unlikely(!write_fault) && writable) {
> @@ -1961,7 +1968,7 @@ static int hva_to_pfn_remapped(struct
> vm_area_struct *vma,
>   *     whether the mapping is writable.
>   */
>  static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool
> *async,
> -			bool write_fault, bool *writable)
> +			bool write_fault, bool *writable, bool
> *protected)
>  {
>  	struct vm_area_struct *vma;
>  	kvm_pfn_t pfn = 0;
> @@ -1976,7 +1983,8 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr,
> bool atomic, bool *async,
>  	if (atomic)
>  		return KVM_PFN_ERR_FAULT;
>  
> -	npages = hva_to_pfn_slow(addr, async, write_fault, writable,
> &pfn);
> +	npages = hva_to_pfn_slow(addr, async, write_fault, writable,
> protected,
> +				 &pfn);
>  	if (npages == 1)
>  		return pfn;
>  
> @@ -2010,7 +2018,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr,
> bool atomic, bool *async,
>  
>  kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t
> gfn,
>  			       bool atomic, bool *async, bool
> write_fault,
> -			       bool *writable)
> +			       bool *writable, bool *protected)
>  {
>  	unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL,
> write_fault);
>  
> @@ -2033,7 +2041,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct
> kvm_memory_slot *slot, gfn_t gfn,
>  	}
>  
>  	return hva_to_pfn(addr, atomic, async, write_fault,
> -			  writable);
> +			  writable, protected);
>  }
>  EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
>  
> @@ -2041,19 +2049,26 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm,
> gfn_t gfn, bool write_fault,
>  		      bool *writable)
>  {
>  	return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn,
> false, NULL,
> -				    write_fault, writable);
> +				    write_fault, writable, NULL);
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
>  
>  kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t
> gfn)
>  {
> -	return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true,
> NULL);
> +	return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL,
> NULL);
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
>  
> +static kvm_pfn_t gfn_to_pfn_memslot_protected(struct kvm_memory_slot
> *slot,
> +					      gfn_t gfn, bool
> *protected)
> +{
> +	return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL,
> +				    protected);
> +}
> +
>  kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot,
> gfn_t gfn)
>  {
> -	return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
> +	return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL,
> NULL);
>  }
>  EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
>  
> @@ -2134,7 +2149,7 @@ static void kvm_cache_gfn_to_pfn(struct
> kvm_memory_slot *slot, gfn_t gfn,
>  {
>  	kvm_release_pfn(cache->pfn, cache->dirty, cache);
>  
> -	cache->pfn = gfn_to_pfn_memslot(slot, gfn);
> +	cache->pfn = gfn_to_pfn_memslot_protected(slot, gfn, &cache-
> >protected);
>  	cache->gfn = gfn;
>  	cache->dirty = false;
>  	cache->generation = gen;
> @@ -2149,6 +2164,7 @@ static int __kvm_map_gfn(struct kvm_memslots
> *slots, gfn_t gfn,
>  	void *hva = NULL;
>  	struct page *page = KVM_UNMAPPED_PAGE;
>  	struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
> +	bool protected;
>  	u64 gen = slots->generation;
>  
>  	if (!map)
> @@ -2162,15 +2178,20 @@ static int __kvm_map_gfn(struct kvm_memslots
> *slots, gfn_t gfn,
>  			kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
>  		}
>  		pfn = cache->pfn;
> +		protected = cache->protected;
>  	} else {
>  		if (atomic)
>  			return -EAGAIN;
> -		pfn = gfn_to_pfn_memslot(slot, gfn);
> +		pfn = gfn_to_pfn_memslot_protected(slot, gfn,
> &protected);
>  	}
>  	if (is_error_noslot_pfn(pfn))
>  		return -EINVAL;
>  
> -	if (pfn_valid(pfn)) {
> +	if (protected) {
> +		if (atomic)
> +			return -EAGAIN;
> +		hva = ioremap_cache_force(pfn_to_hpa(pfn), PAGE_SIZE);
> +	} else if (pfn_valid(pfn)) {
>  		page = pfn_to_page(pfn);
>  		if (atomic)
>  			hva = kmap_atomic(page);

I think the page could have got unmapped since the gup via the
hypercall on another CPU. It could be an avenue for the guest to crash
the host.

> @@ -2191,6 +2212,7 @@ static int __kvm_map_gfn(struct kvm_memslots
> *slots, gfn_t gfn,
>  	map->hva = hva;
>  	map->pfn = pfn;
>  	map->gfn = gfn;
> +	map->protected = protected;
>  
>  	return 0;
>  }
> @@ -2221,7 +2243,9 @@ static void __kvm_unmap_gfn(struct
> kvm_memory_slot *memslot,
>  	if (!map->hva)
>  		return;
>  
> -	if (map->page != KVM_UNMAPPED_PAGE) {
> +	if (map->protected) {
> +		iounmap(map->hva);
> +	} else if (map->page != KVM_UNMAPPED_PAGE) {
>  		if (atomic)
>  			kunmap_atomic(map->hva);
>  		else





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux