Re: [PATCH v1] iommu/tegra-smmu: Add missing locks around mapping operations

Thierry Reding <thierry.reding@xxxxxxxxx> · Mon, 25 May 2020 10:35:56 +0200

On Sun, May 24, 2020 at 09:37:55PM +0300, Dmitry Osipenko wrote:
> The mapping operations of the Tegra SMMU driver are subjected to a race
> condition issues because SMMU Address Space isn't allocated and freed
> atomically, while it should be. This patch makes the mapping operations
> atomic, it fixes an accidentally released Host1x Address Space problem
> which happens while running multiple graphics tests in parallel on
> Tegra30, i.e. by having multiple threads racing with each other in the
> Host1x's submission and completion code paths, performing IOVA mappings
> and unmappings in parallel.
> 
> Cc: <stable@xxxxxxxxxxxxxxx>
> Signed-off-by: Dmitry Osipenko <digetx@xxxxxxxxx>
> ---
>  drivers/iommu/tegra-smmu.c | 43 +++++++++++++++++++++++++++++++++-----
>  1 file changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
> index 7426b7666e2b..4f956a797838 100644
> --- a/drivers/iommu/tegra-smmu.c
> +++ b/drivers/iommu/tegra-smmu.c
> @@ -12,6 +12,7 @@
>  #include <linux/of_device.h>
>  #include <linux/platform_device.h>
>  #include <linux/slab.h>
> +#include <linux/spinlock.h>
>  #include <linux/dma-mapping.h>
>  
>  #include <soc/tegra/ahb.h>
> @@ -49,6 +50,7 @@ struct tegra_smmu_as {
>  	struct iommu_domain domain;
>  	struct tegra_smmu *smmu;
>  	unsigned int use_count;
> +	spinlock_t lock;
>  	u32 *count;
>  	struct page **pts;
>  	struct page *pd;
> @@ -308,6 +310,8 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
>  		return NULL;
>  	}
>  
> +	spin_lock_init(&as->lock);
> +
>  	/* setup aperture */
>  	as->domain.geometry.aperture_start = 0;
>  	as->domain.geometry.aperture_end = 0xffffffff;
> @@ -578,7 +582,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova,
>  		struct page *page;
>  		dma_addr_t dma;
>  
> -		page = alloc_page(GFP_KERNEL | __GFP_DMA | __GFP_ZERO);
> +		page = alloc_page(GFP_ATOMIC | __GFP_DMA | __GFP_ZERO);

I'm not sure this is a good idea. My recollection is that GFP_ATOMIC
will allocate from a special reserved region of memory, which may be
easily exhausted.

Is there any reason why we need the spinlock? Can't we use a mutex
instead?

>  		if (!page)
>  			return NULL;
>  
> @@ -655,8 +659,9 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova,
>  	smmu_flush(smmu);
>  }
>  
> -static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
> -			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
> +static int
> +tegra_smmu_map_locked(struct iommu_domain *domain, unsigned long iova,
> +		      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)

I think it's more typical to use the _unlocked suffix for functions that
don't take a lock themselves.

>  {
>  	struct tegra_smmu_as *as = to_smmu_as(domain);
>  	dma_addr_t pte_dma;
> @@ -685,8 +690,9 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
>  	return 0;
>  }
>  
> -static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
> -			       size_t size, struct iommu_iotlb_gather *gather)
> +static size_t
> +tegra_smmu_unmap_locked(struct iommu_domain *domain, unsigned long iova,
> +			size_t size, struct iommu_iotlb_gather *gather)
>  {
>  	struct tegra_smmu_as *as = to_smmu_as(domain);
>  	dma_addr_t pte_dma;
> @@ -702,6 +708,33 @@ static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
>  	return size;
>  }
>  
> +static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
> +			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
> +{
> +	struct tegra_smmu_as *as = to_smmu_as(domain);
> +	unsigned long flags;
> +	int ret;
> +
> +	spin_lock_irqsave(&as->lock, flags);
> +	ret = tegra_smmu_map_locked(domain, iova, paddr, size, prot, gfp);
> +	spin_unlock_irqrestore(&as->lock, flags);
> +
> +	return ret;
> +}
> +
> +static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
> +			       size_t size, struct iommu_iotlb_gather *gather)
> +{
> +	struct tegra_smmu_as *as = to_smmu_as(domain);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&as->lock, flags);
> +	size = tegra_smmu_unmap_locked(domain, iova, size, gather);
> +	spin_unlock_irqrestore(&as->lock, flags);
> +
> +	return size;
> +}

Why the extra functions here? We never call locked vs. unlocked variants
in the driver and the IOMMU framework only has a single callback, so I
think the locking can just move into the main implementation.

Thierry
Attachment:
signature.asc

Description: PGP signature