[no subject]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This should save some time to avoid double pinning and make the pinning
information clear.

> 
> > But we may still have to support
> > the user-level memory pinning API as legacy case. Instead of duplicating
> > the storage, can we change the region list to the list of memslot->pfns
> > instead (Or using the struct **pages in memslot instead of pfns)? This
> > way APIs could still work but we don't need extra storage burden.
> 
> Right, patch 6 and 9 will achieve this using the memslot->pfns when the MMU 
> is active. We still need to maintain this enc_region if the user tries to pin
> memory before MMU is active(i.e. vcpu is online). In my testing, I havent 
> seen enc_region being used, but added to make sure we do not break any userspace.
> 
> > Anyway, I think it might be essentially needed to unify them together,
> > Otherwise, not only the metadata size is quite large, but also it is
> > confusing to have parallel data structures doing the same thing.
> >>
> >>  	return pages;
> >>  
> >> @@ -441,14 +468,43 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
> >>  	return ERR_PTR(ret);
> >>  }
> >>  
> >> -static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
> >> -			     unsigned long npages)
> >> +static void __sev_unpin_memory(struct kvm *kvm, struct page **pages,
> >> +			       unsigned long npages)
> >>  {
> >>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >>  
> >>  	unpin_user_pages(pages, npages);
> >>  	kvfree(pages);
> >> -	sev->pages_locked -= npages;
> >> +	sev->pages_to_lock -= npages;
> >> +}
> >> +
> >> +static struct pinned_region *find_pinned_region(struct kvm *kvm,
> >> +					     struct page **pages,
> >> +					     unsigned long n)
> >> +{
> >> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >> +	struct list_head *head = &sev->pinned_regions_list;
> >> +	struct pinned_region *i;
> >> +
> >> +	list_for_each_entry(i, head, list) {
> >> +		if (i->pages == pages && i->npages == n)
> >> +			return i;
> >> +	}
> >> +
> >> +	return NULL;
> >> +}
> >> +
> >> +static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
> >> +			     unsigned long npages)
> >> +{
> >> +	struct pinned_region *region;
> >> +
> >> +	region = find_pinned_region(kvm, pages, npages);
> >> +	__sev_unpin_memory(kvm, pages, npages);
> >> +	if (region) {
> >> +		list_del(&region->list);
> >> +		kfree(region);
> >> +	}
> >>  }
> >>  
> >>  static void sev_clflush_pages(struct page *pages[], unsigned long npages)
> >> @@ -551,8 +607,9 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
> >>  		set_page_dirty_lock(inpages[i]);
> >>  		mark_page_accessed(inpages[i]);
> >>  	}
> >> -	/* unlock the user pages */
> >> -	sev_unpin_memory(kvm, inpages, npages);
> >> +	/* unlock the user pages on error */
> >> +	if (ret)
> >> +		sev_unpin_memory(kvm, inpages, npages);
> >>  	return ret;
> >>  }
> >>  
> >> @@ -1059,7 +1116,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
> >>  		set_page_dirty_lock(pages[i]);
> >>  		mark_page_accessed(pages[i]);
> >>  	}
> >> -	sev_unpin_memory(kvm, pages, n);
> >> +	if (ret)
> >> +		sev_unpin_memory(kvm, pages, n);
> >>  	return ret;
> >>  }
> >>  
> >> @@ -1338,7 +1396,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
> >>  e_free_hdr:
> >>  	kfree(hdr);
> >>  e_unpin:
> >> -	sev_unpin_memory(kvm, guest_page, n);
> >> +	if (ret)
> >> +		sev_unpin_memory(kvm, guest_page, n);
> >>  
> >>  	return ret;
> >>  }
> >> @@ -1508,7 +1567,8 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
> >>  	ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
> >>  				&argp->error);
> >>  
> >> -	sev_unpin_memory(kvm, guest_page, n);
> >> +	if (ret)
> >> +		sev_unpin_memory(kvm, guest_page, n);
> >>  
> >>  e_free_trans:
> >>  	kfree(trans);
> >> @@ -1629,16 +1689,17 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
> >>  	dst->active = true;
> >>  	dst->asid = src->asid;
> >>  	dst->handle = src->handle;
> >> -	dst->pages_locked = src->pages_locked;
> >> +	dst->pages_to_lock = src->pages_to_lock;
> >>  	dst->enc_context_owner = src->enc_context_owner;
> >>  
> >>  	src->asid = 0;
> >>  	src->active = false;
> >>  	src->handle = 0;
> >> -	src->pages_locked = 0;
> >> +	src->pages_to_lock = 0;
> >>  	src->enc_context_owner = NULL;
> >>  
> >> -	list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
> >> +	list_cut_before(&dst->pinned_regions_list, &src->pinned_regions_list,
> >> +			&src->pinned_regions_list);
> >>  }
> >>  
> >>  static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
> >> @@ -1862,8 +1923,7 @@ int svm_register_enc_region(struct kvm *kvm,
> >>  			    struct kvm_enc_region *range)
> >>  {
> >>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >> -	struct enc_region *region;
> >> -	int ret = 0;
> >> +	unsigned long npages;
> >>  
> >>  	if (!sev_guest(kvm))
> >>  		return -ENOTTY;
> >> @@ -1875,101 +1935,35 @@ int svm_register_enc_region(struct kvm *kvm,
> >>  	if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
> >>  		return -EINVAL;
> >>  
> >> -	region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
> >> -	if (!region)
> >> +	npages = get_npages(range->addr, range->size);
> >> +	if (rlimit_memlock_exceeds(sev->pages_to_lock, npages)) {
> >> +		pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n",
> >> +		       sev->pages_to_lock + npages,
> >> +		       (rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT));
> >>  		return -ENOMEM;
> >> -
> >> -	mutex_lock(&kvm->lock);
> >> -	region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
> >> -	if (IS_ERR(region->pages)) {
> >> -		ret = PTR_ERR(region->pages);
> >> -		mutex_unlock(&kvm->lock);
> >> -		goto e_free;
> >>  	}
> >> +	sev->pages_to_lock += npages;
> >>  
> >> -	region->uaddr = range->addr;
> >> -	region->size = range->size;
> >> -
> >> -	list_add_tail(&region->list, &sev->regions_list);
> >> -	mutex_unlock(&kvm->lock);
> >> -
> >> -	/*
> >> -	 * The guest may change the memory encryption attribute from C=0 -> C=1
> >> -	 * or vice versa for this memory range. Lets make sure caches are
> >> -	 * flushed to ensure that guest data gets written into memory with
> >> -	 * correct C-bit.
> >> -	 */
> >> -	sev_clflush_pages(region->pages, region->npages);
> >> -
> >> -	return ret;
> >> -
> >> -e_free:
> >> -	kfree(region);
> >> -	return ret;
> >> -}
> >> -
> >> -static struct enc_region *
> >> -find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
> >> -{
> >> -	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >> -	struct list_head *head = &sev->regions_list;
> >> -	struct enc_region *i;
> >> -
> >> -	list_for_each_entry(i, head, list) {
> >> -		if (i->uaddr == range->addr &&
> >> -		    i->size == range->size)
> >> -			return i;
> >> -	}
> >> -
> >> -	return NULL;
> >> -}
> >> -
> >> -static void __unregister_enc_region_locked(struct kvm *kvm,
> >> -					   struct enc_region *region)
> >> -{
> >> -	sev_unpin_memory(kvm, region->pages, region->npages);
> >> -	list_del(&region->list);
> >> -	kfree(region);
> >> +	return 0;
> >>  }
> >>  
> >>  int svm_unregister_enc_region(struct kvm *kvm,
> >>  			      struct kvm_enc_region *range)
> >>  {
> >> -	struct enc_region *region;
> >> -	int ret;
> >> +	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >> +	unsigned long npages;
> >>  
> >>  	/* If kvm is mirroring encryption context it isn't responsible for it */
> >>  	if (is_mirroring_enc_context(kvm))
> >>  		return -EINVAL;
> >>  
> >> -	mutex_lock(&kvm->lock);
> >> -
> >> -	if (!sev_guest(kvm)) {
> >> -		ret = -ENOTTY;
> >> -		goto failed;
> >> -	}
> >> -
> >> -	region = find_enc_region(kvm, range);
> >> -	if (!region) {
> >> -		ret = -EINVAL;
> >> -		goto failed;
> >> -	}
> >> -
> >> -	/*
> >> -	 * Ensure that all guest tagged cache entries are flushed before
> >> -	 * releasing the pages back to the system for use. CLFLUSH will
> >> -	 * not do this, so issue a WBINVD.
> >> -	 */
> >> -	wbinvd_on_all_cpus();
> >> +	if (!sev_guest(kvm))
> >> +		return -ENOTTY;
> >>  
> >> -	__unregister_enc_region_locked(kvm, region);
> >> +	npages = get_npages(range->addr, range->size);
> >> +	sev->pages_to_lock -= npages;
> >>  
> >> -	mutex_unlock(&kvm->lock);
> >>  	return 0;
> >> -
> >> -failed:
> >> -	mutex_unlock(&kvm->lock);
> >> -	return ret;
> >>  }
> >>  
> >>  int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
> >> @@ -2018,7 +2012,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
> >>  	mirror_sev->fd = source_sev->fd;
> >>  	mirror_sev->es_active = source_sev->es_active;
> >>  	mirror_sev->handle = source_sev->handle;
> >> -	INIT_LIST_HEAD(&mirror_sev->regions_list);
> >> +	INIT_LIST_HEAD(&mirror_sev->pinned_regions_list);
> >>  	ret = 0;
> >>  
> >>  	/*
> >> @@ -2038,8 +2032,9 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
> >>  void sev_vm_destroy(struct kvm *kvm)
> >>  {
> >>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> >> -	struct list_head *head = &sev->regions_list;
> >> +	struct list_head *head = &sev->pinned_regions_list;
> >>  	struct list_head *pos, *q;
> >> +	struct pinned_region *region;
> >>  
> >>  	WARN_ON(sev->num_mirrored_vms);
> >>  
> >> @@ -2072,8 +2067,14 @@ void sev_vm_destroy(struct kvm *kvm)
> >>  	 */
> >>  	if (!list_empty(head)) {
> >>  		list_for_each_safe(pos, q, head) {
> >> -			__unregister_enc_region_locked(kvm,
> >> -				list_entry(pos, struct enc_region, list));
> >> +			/*
> >> +			 * Unpin the memory that were pinned outside of MMU
> >> +			 * demand pinning
> >> +			 */
> >> +			region = list_entry(pos, struct pinned_region, list);
> >> +			__sev_unpin_memory(kvm, region->pages, region->npages);
> >> +			list_del(&region->list);
> >> +			kfree(region);
> >>  			cond_resched();
> >>  		}
> >>  	}
> >  So the guest memory has already been unpinned in sev_free_memslot().
> >  Why doing it again at here?
> 
> Guest memory that was demand pinned got freed using sev_free_memslot(). Regions that were 
> statically pinned for e.g. SEV_LAUNCH_UPDATE_DATA need to be freed here. This too will be 
> demand pinned in patch 9.
> 
> > 
> >> @@ -2951,13 +2952,96 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> >>  	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
> >>  }
> >>  
> >> +bool sev_pin_pfn(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
> >> +		 kvm_pfn_t pfn, hva_t hva, bool write, enum pg_level level)
> >> +{
> >> +	unsigned int npages = KVM_PAGES_PER_HPAGE(level);
> >> +	unsigned int flags = FOLL_LONGTERM, npinned;
> >> +	struct kvm_arch_memory_slot *aslot;
> >> +	struct kvm *kvm = vcpu->kvm;
> >> +	gfn_t gfn_start, rel_gfn;
> >> +	struct page *page[1];
> >> +	kvm_pfn_t old_pfn;
> >> +
> >> +	if (!sev_guest(kvm))
> >> +		return true;
> >> +
> >> +	if (WARN_ON_ONCE(!memslot->arch.pfns))
> >> +		return false;
> >> +
> >> +	if (KVM_BUG_ON(level > PG_LEVEL_1G, kvm))
> >> +		return false;
> >> +
> >> +	hva = ALIGN_DOWN(hva, npages << PAGE_SHIFT);
> >> +	flags |= write ? FOLL_WRITE : 0;
> >> +
> >> +	mutex_lock(&kvm->slots_arch_lock);
> >> +	gfn_start = hva_to_gfn_memslot(hva, memslot);
> >> +	rel_gfn = gfn_start - memslot->base_gfn;
> >> +	aslot = &memslot->arch;
> >> +	if (test_bit(rel_gfn, aslot->pinned_bitmap)) {
> >> +		old_pfn = aslot->pfns[rel_gfn];
> >> +		if (old_pfn == pfn)
> >> +			goto out;
> >> +
> >> +		/* Flush the cache before releasing the page to the system */
> >> +		sev_flush_guest_memory(to_svm(vcpu), __va(old_pfn),
> >> +				       npages * PAGE_SIZE);
> >> +		unpin_user_page(pfn_to_page(old_pfn));
> >> +	}
> >> +	/* Pin the page, KVM doesn't yet support page migration. */
> >> +	npinned = pin_user_pages_fast(hva, 1, flags, page);
> >> +	KVM_BUG(npinned != 1, kvm, "SEV: Pinning failed\n");
> >> +	KVM_BUG(pfn != page_to_pfn(page[0]), kvm, "SEV: pfn mismatch\n");
> >> +
> >> +	if (!this_cpu_has(X86_FEATURE_SME_COHERENT))
> >> +		clflush_cache_range(__va(pfn << PAGE_SHIFT), npages * PAGE_SIZE);
> >> +
> >> +	WARN_ON(rel_gfn >= memslot->npages);
> >> +	aslot->pfns[rel_gfn] = pfn;
> >> +	set_bit(rel_gfn, aslot->pinned_bitmap);
> >> +
> >> +out:
> >> +	mutex_unlock(&kvm->slots_arch_lock);
> >> +	return true;
> >> +}
> >> +
> >>  void sev_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
> >>  {
> >>  	struct kvm_arch_memory_slot *aslot = &slot->arch;
> >> +	kvm_pfn_t *pfns;
> >> +	gfn_t gfn;
> >> +	int i;
> >>  
> >>  	if (!sev_guest(kvm))
> >>  		return;
> >>  
> >> +	if (!aslot->pinned_bitmap || !slot->arch.pfns)
> >> +		goto out;
> >> +
> >> +	pfns = aslot->pfns;
> >> +
> >> +	/*
> >> +	 * Ensure that all guest tagged cache entries are flushed before
> >> +	 * releasing the pages back to the system for use. CLFLUSH will
> >> +	 * not do this, so issue a WBINVD.
> >> +	 */
> >> +	wbinvd_on_all_cpus();
> > 
> > This is a heavy-weight operation and is essentially only needed at most
> > once per VM shutdown. So, better using smaller hammer in the following
> > code. Or, alternatively, maybe passing a boolean from caller to avoid
> > flushing if true.
> 
> Or maybe I can do this in sev_vm_destroy() patch?
> 
> >> +
> >> +	/*
> >> +	 * Iterate the memslot to find the pinned pfn using the bitmap and drop
> >> +	 * the pfn stored.
> >> +	 */
> >> +	for (i = 0, gfn = slot->base_gfn; i < slot->npages; i++, gfn++) {
> >> +		if (test_and_clear_bit(i, aslot->pinned_bitmap)) {
> >> +			if (WARN_ON(!pfns[i]))
> >> +				continue;
> >> +
> >> +			unpin_user_page(pfn_to_page(pfns[i]));
> >> +		}
> >> +	}
> >> +
> >> +out:
> >>  	if (aslot->pinned_bitmap) {
> >>  		kvfree(aslot->pinned_bitmap);
> >>  		aslot->pinned_bitmap = NULL;
> >> @@ -2992,6 +3076,8 @@ int sev_alloc_memslot_metadata(struct kvm *kvm,
> >>  		return -ENOMEM;
> >>  
> >>  	aslot->pinned_bitmap = kvzalloc(pinned_bytes, GFP_KERNEL_ACCOUNT);
> >> +	new->flags |= KVM_MEMSLOT_ENCRYPTED;
> >> +
> >>  	if (!aslot->pinned_bitmap) {
> >>  		kvfree(aslot->pfns);
> >>  		aslot->pfns = NULL;
> >> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> >> index ec06421cb532..463a90ed6f83 100644
> >> --- a/arch/x86/kvm/svm/svm.c
> >> +++ b/arch/x86/kvm/svm/svm.c
> >> @@ -4661,6 +4661,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> >>  
> >>  	.alloc_memslot_metadata = sev_alloc_memslot_metadata,
> >>  	.free_memslot = sev_free_memslot,
> >> +	.pin_pfn = sev_pin_pfn,
> >>  };
> >>  
> >>  /*
> >> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> >> index f00364020d7e..2f38e793ead0 100644
> >> --- a/arch/x86/kvm/svm/svm.h
> >> +++ b/arch/x86/kvm/svm/svm.h
> >> @@ -75,8 +75,8 @@ struct kvm_sev_info {
> >>  	unsigned int asid;	/* ASID used for this guest */
> >>  	unsigned int handle;	/* SEV firmware handle */
> >>  	int fd;			/* SEV device fd */
> >> -	unsigned long pages_locked; /* Number of pages locked */
> >> -	struct list_head regions_list;  /* List of registered regions */
> >> +	unsigned long pages_to_lock; /* Number of page that can be locked */
> >> +	struct list_head pinned_regions_list;  /* List of pinned regions */
> >>  	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
> >>  	struct kvm *enc_context_owner; /* Owner of copied encryption context */
> >>  	unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
> >> @@ -621,5 +621,7 @@ int sev_alloc_memslot_metadata(struct kvm *kvm,
> >>  			       struct kvm_memory_slot *new);
> >>  void sev_free_memslot(struct kvm *kvm,
> >>  		      struct kvm_memory_slot *slot);
> >> +bool sev_pin_pfn(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
> >> +		 kvm_pfn_t pfn, hva_t hva, bool write, enum pg_level level);
> >>  
> >>  #endif
> >> -- 
> >> 2.32.0
> >>
> 
> Thanks for the review.
> 
> Regards
> Nikunj



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux