Re: [RFC v2 1/5] vfio/type1: Introduce iova list and add iommu aperture validity check

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Shameer,

On 18/01/18 01:04, Alex Williamson wrote:
> On Fri, 12 Jan 2018 16:45:27 +0000
> Shameer Kolothum <shameerali.kolothum.thodi@xxxxxxxxxx> wrote:
> 
>> This introduces an iova list that is valid for dma mappings. Make
>> sure the new iommu aperture window is valid and doesn't conflict
>> with any existing dma mappings during attach. Also update the iova
>> list with new aperture window during attach/detach.
>>
>> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@xxxxxxxxxx>
>> ---
>>  drivers/vfio/vfio_iommu_type1.c | 177 ++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 177 insertions(+)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index e30e29a..11cbd49 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -60,6 +60,7 @@
>>  
>>  struct vfio_iommu {
>>  	struct list_head	domain_list;
>> +	struct list_head	iova_list;
>>  	struct vfio_domain	*external_domain; /* domain for external user */
>>  	struct mutex		lock;
>>  	struct rb_root		dma_list;
>> @@ -92,6 +93,12 @@ struct vfio_group {
>>  	struct list_head	next;
>>  };
>>  
>> +struct vfio_iova {
>> +	struct list_head	list;
>> +	phys_addr_t		start;
>> +	phys_addr_t		end;
>> +};
> 
> dma_list uses dma_addr_t for the iova.  IOVAs are naturally DMA
> addresses, why are we using phys_addr_t?
> 
>> +
>>  /*
>>   * Guest RAM pinning working set or DMA target
>>   */
>> @@ -1192,6 +1199,123 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
>>  	return ret;
>>  }
>>  
>> +static int vfio_insert_iova(phys_addr_t start, phys_addr_t end,
>> +				struct list_head *head)
>> +{
>> +	struct vfio_iova *region;
>> +
>> +	region = kmalloc(sizeof(*region), GFP_KERNEL);
>> +	if (!region)
>> +		return -ENOMEM;
>> +
>> +	INIT_LIST_HEAD(&region->list);
>> +	region->start = start;
>> +	region->end = end;
>> +
>> +	list_add_tail(&region->list, head);
>> +	return 0;
>> +}
> 
> As I'm reading through this series, I'm learning that there are a lot
> of assumptions and subtle details that should be documented.  For
> instance, the IOMMU API only provides a single geometry and we build
> upon that here as this patch creates a list, but there's only a single
> entry for now.  The following patches carve that single iova range into
> pieces and somewhat subtly use the list_head passed to keep the list
> sorted, allowing the first/last_entry tricks used throughout.  Subtle
> interfaces are prone to bugs.
> 
>> +
>> +/*
>> + * Find whether a mem region overlaps with existing dma mappings
>> + */
>> +static bool vfio_find_dma_overlap(struct vfio_iommu *iommu,
>> +				  phys_addr_t start, phys_addr_t end)
>> +{
>> +	struct rb_node *n = rb_first(&iommu->dma_list);
>> +
>> +	for (; n; n = rb_next(n)) {
>> +		struct vfio_dma *dma;
>> +
>> +		dma = rb_entry(n, struct vfio_dma, node);
>> +
>> +		if (end < dma->iova)
>> +			break;
>> +		if (start >= dma->iova + dma->size)
>> +			continue;
>> +		return true;
>> +	}
>> +
>> +	return false;
>> +}
> 
> Why do we need this in addition to the existing vfio_find_dma()?  Why
> doesn't this use the tree structure of the dma_list?
> 
>> +
>> +/*
>> + * Check the new iommu aperture is a valid one
>> + */
>> +static int vfio_iommu_valid_aperture(struct vfio_iommu *iommu,
>> +				     phys_addr_t start,
>> +				     phys_addr_t end)
>> +{
>> +	struct vfio_iova *first, *last;
>> +	struct list_head *iova = &iommu->iova_list;
>> +
>> +	if (list_empty(iova))
>> +		return 0;
>> +
>> +	/* Check if new one is outside the current aperture */
> 
> "Disjoint sets"
> 
>> +	first = list_first_entry(iova, struct vfio_iova, list);
>> +	last = list_last_entry(iova, struct vfio_iova, list);
>> +	if ((start > last->end) || (end < first->start))
>> +		return -EINVAL;
>> +
>> +	/* Check for any existing dma mappings outside the new start */
>> +	if (start > first->start) {
>> +		if (vfio_find_dma_overlap(iommu, first->start, start - 1))
>> +			return -EINVAL;
>> +	}
>> +
>> +	/* Check for any existing dma mappings outside the new end */
>> +	if (end < last->end) {
>> +		if (vfio_find_dma_overlap(iommu, end + 1, last->end))
>> +			return -EINVAL;
>> +	}
>> +
>> +	return 0;
>> +}
> 
> I think this returns an int because you want to use it for the return
> value below, but it really seems like a bool question, ie. does this
> aperture conflict with existing mappings.  Additionally, the aperture
> is valid, it was provided to us by the IOMMU API, the question is
> whether it conflicts.  Please also name consistently to the other
> functions in this patch, vfio_iommu_aper_xxxx().
> 
>> +
>> +/*
>> + * Adjust the iommu aperture window if new aperture is a valid one
>> + */
>> +static int vfio_iommu_iova_aper_adjust(struct vfio_iommu *iommu,
>> +				      phys_addr_t start,
>> +				      phys_addr_t end)
> 
> Perhaps "resize", "prune", or "shrink" to make it more clear what is
> being adjusted?
> 
>> +{
>> +	struct vfio_iova *node, *next;
>> +	struct list_head *iova = &iommu->iova_list;
>> +
>> +	if (list_empty(iova))
>> +		return vfio_insert_iova(start, end, iova);
>> +
>> +	/* Adjust iova list start */
>> +	list_for_each_entry_safe(node, next, iova, list) {
>> +		if (start < node->start)
>> +			break;
>> +		if ((start >= node->start) && (start <= node->end)) {
> 
> start == node->end results in a zero sized node.  s/<=/</
> 
>> +			node->start = start;
>> +			break;
>> +		}
>> +		/* Delete nodes before new start */
>> +		list_del(&node->list);
>> +		kfree(node);
>> +	}
>> +
>> +	/* Adjust iova list end */
>> +	list_for_each_entry_safe(node, next, iova, list) {
>> +		if (end > node->end)
>> +			continue;
>> +
>> +		if ((end >= node->start) && (end <= node->end)) {
> 
> end == node->start results in a zero sized node.  s/>=/>/
> 
>> +			node->end = end;
>> +			continue;
>> +		}
>> +		/* Delete nodes after new end */
>> +		list_del(&node->list);
>> +		kfree(node);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  static int vfio_iommu_type1_attach_group(void *iommu_data,
>>  					 struct iommu_group *iommu_group)
>>  {
>> @@ -1202,6 +1326,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>>  	int ret;
>>  	bool resv_msi, msi_remap;
>>  	phys_addr_t resv_msi_base;
>> +	struct iommu_domain_geometry geo;
>>  
>>  	mutex_lock(&iommu->lock);
>>  
>> @@ -1271,6 +1396,14 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>>  	if (ret)
>>  		goto out_domain;
>>  
>> +	/* Get aperture info */
>> +	iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
>> +
>> +	ret = vfio_iommu_valid_aperture(iommu, geo.aperture_start,
>> +					geo.aperture_end);
>> +	if (ret)
>> +		goto out_detach;
>> +
>>  	resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
>>  
>>  	INIT_LIST_HEAD(&domain->group_list);
>> @@ -1327,6 +1460,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
>>  			goto out_detach;
>>  	}
>>  
>> +	ret = vfio_iommu_iova_aper_adjust(iommu, geo.aperture_start,
>> +					  geo.aperture_end);
>> +	if (ret)
>> +		goto out_detach;
>> +
>>  	list_add(&domain->next, &iommu->domain_list);
>>  
>>  	mutex_unlock(&iommu->lock);
>> @@ -1392,6 +1530,35 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
>>  	WARN_ON(iommu->notifier.head);
>>  }
>>  
>> +/*
>> + * Called when a domain is removed in detach. It is possible that
>> + * the removed domain decided the iova aperture window. Modify the
>> + * iova aperture with the smallest window among existing domains.
>> + */
>> +static void vfio_iommu_iova_aper_refresh(struct vfio_iommu *iommu)
>> +{
>> +	struct vfio_domain *domain;
>> +	struct iommu_domain_geometry geo;
>> +	struct vfio_iova *node;
>> +	phys_addr_t start = 0;
>> +	phys_addr_t end = (phys_addr_t)~0;
>> +
>> +	list_for_each_entry(domain, &iommu->domain_list, next) {
>> +		iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
>> +				      &geo);
>> +			if (geo.aperture_start > start)
>> +				start = geo.aperture_start;
>> +			if (geo.aperture_end < end)
>> +				end = geo.aperture_end;
>> +	}
>> +
>> +	/* modify iova aperture limits */
>> +	node = list_first_entry(&iommu->iova_list, struct vfio_iova, list);
>> +	node->start = start;
>> +	node = list_last_entry(&iommu->iova_list, struct vfio_iova, list);
>> +	node->end = end;
> 
> We can do this because the new aperture is the same or bigger than the
> current aperture, never smaller.  That's not fully obvious and should
> be noted in the comment.  Perhaps this function should be "expand"
> rather than "refresh".
This one is not obvious to me either:
assuming you have 2 domains, resp with aperture 1 and 2, resulting into
aperture 3. Holes are created by resv regions for instance. If you
remove domain 1, don't you get 4) instead of 2)?

1)   |------------|
 +
2) |---|    |--|       |-----|
=
3)   |-|    |--|


4) |---|    |----------------|

Thanks

Eric
> 
>> +}
>> +
>>  static void vfio_iommu_type1_detach_group(void *iommu_data,
>>  					  struct iommu_group *iommu_group)
>>  {
>> @@ -1445,6 +1612,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
>>  			iommu_domain_free(domain->domain);
>>  			list_del(&domain->next);
>>  			kfree(domain);
>> +			vfio_iommu_iova_aper_refresh(iommu);
>>  		}
>>  		break;
>>  	}
>> @@ -1475,6 +1643,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>>  	}
>>  
>>  	INIT_LIST_HEAD(&iommu->domain_list);
>> +	INIT_LIST_HEAD(&iommu->iova_list);
>>  	iommu->dma_list = RB_ROOT;
>>  	mutex_init(&iommu->lock);
>>  	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>> @@ -1502,6 +1671,7 @@ static void vfio_iommu_type1_release(void *iommu_data)
>>  {
>>  	struct vfio_iommu *iommu = iommu_data;
>>  	struct vfio_domain *domain, *domain_tmp;
>> +	struct vfio_iova *iova, *iova_tmp;
>>  
>>  	if (iommu->external_domain) {
>>  		vfio_release_domain(iommu->external_domain, true);
>> @@ -1517,6 +1687,13 @@ static void vfio_iommu_type1_release(void *iommu_data)
>>  		list_del(&domain->next);
>>  		kfree(domain);
>>  	}
>> +
>> +	list_for_each_entry_safe(iova, iova_tmp,
>> +				 &iommu->iova_list, list) {
>> +		list_del(&iova->list);
>> +		kfree(iova);
>> +	}
>> +
>>  	kfree(iommu);
>>  }
>>  
> 



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux