Re: [PATCH v6 17/25] iommu/arm-smmu-v3: Implement iommu_sva_bind/unbind()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Jean,

A couple question on how SMMU handles CD.v and translation disable.

On Thu, 30 Apr 2020 16:34:16 +0200
Jean-Philippe Brucker <jean-philippe@xxxxxxxxxx> wrote:

> The sva_bind() function allows devices to access process address
> spaces using a PASID (aka SSID).
> 
> (1) bind() allocates or gets an existing MMU notifier tied to the
>     (domain, mm) pair. Each mm gets one PASID.
> 
> (2) Any change to the address space calls invalidate_range() which
> sends ATC invalidations (in a subsequent patch).
> 
> (3) When the process address space dies, the release() notifier
> disables the CD to allow reclaiming the page tables. Since release()
> has to be light we do not instruct device drivers to stop DMA here,
> we just ignore incoming page faults.
> 
>     To avoid any event 0x0a print (C_BAD_CD) we disable translation
>     without clearing CD.V. PCIe Translation Requests and Page Requests
>     are silently denied. Don't clear the R bit because the S bit can't
>     be cleared when STALL_MODEL==0b10 (forced), and clearing R without
>     clearing S is useless. Faulting transactions will stall and will
> be aborted by the IOPF handler.
> 
> (4) After stopping DMA, the device driver releases the bond by calling
>     unbind(). We release the MMU notifier, free the PASID and the
> bond.
> 
> Three structures keep track of bonds:
> * arm_smmu_bond: one per (device, mm) pair, the handle returned to the
>   device driver for a bind() request.
> * arm_smmu_mmu_notifier: one per (domain, mm) pair, deals with ATS/TLB
>   invalidations and clearing the context descriptor on mm exit.
> * arm_smmu_ctx_desc: one per mm, holds the pinned ASID and pgd.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@xxxxxxxxxx>
> ---
> v5->v6:
> * Implement bind() directly instead of going through io_mm_ops
> * Don't clear S and R bits in step (3), it doesn't work with
>   STALL_FORCE.
> ---
>  drivers/iommu/Kconfig       |   1 +
>  drivers/iommu/arm-smmu-v3.c | 256
> +++++++++++++++++++++++++++++++++++- 2 files changed, 253
> insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 1e64ee6592e16..f863c4562feeb 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -432,6 +432,7 @@ config ARM_SMMU_V3
>  	tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
>  	depends on ARM64
>  	select IOMMU_API
> +	select IOMMU_SVA
>  	select IOMMU_IO_PGTABLE_LPAE
>  	select GENERIC_MSI_IRQ_DOMAIN
>  	help
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index c7942d0540599..00e5b69bb81a5 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -24,6 +24,7 @@
>  #include <linux/iopoll.h>
>  #include <linux/module.h>
>  #include <linux/mmu_context.h>
> +#include <linux/mmu_notifier.h>
>  #include <linux/msi.h>
>  #include <linux/of.h>
>  #include <linux/of_address.h>
> @@ -36,6 +37,7 @@
>  #include <linux/amba/bus.h>
>  
>  #include "io-pgtable-arm.h"
> +#include "iommu-sva.h"
>  
>  /* MMIO registers */
>  #define ARM_SMMU_IDR0			0x0
> @@ -731,8 +733,31 @@ struct arm_smmu_domain {
>  
>  	struct list_head		devices;
>  	spinlock_t			devices_lock;
> +
> +	struct mmu_notifier_ops		mn_ops;
>  };
>  
> +struct arm_smmu_mmu_notifier {
> +	struct mmu_notifier		mn;
> +	struct arm_smmu_ctx_desc	*cd;
> +	bool				cleared;
> +	refcount_t			refs;
> +	struct arm_smmu_domain		*domain;
> +};
> +
> +#define mn_to_smmu(mn) container_of(mn, struct
> arm_smmu_mmu_notifier, mn) +
> +struct arm_smmu_bond {
> +	struct iommu_sva		sva;
> +	struct mm_struct		*mm;
> +	struct arm_smmu_mmu_notifier	*smmu_mn;
> +	struct list_head		list;
> +	refcount_t			refs;
> +};
> +
> +#define sva_to_bond(handle) \
> +	container_of(handle, struct arm_smmu_bond, sva)
> +
>  struct arm_smmu_option_prop {
>  	u32 opt;
>  	const char *prop;
> @@ -742,6 +767,13 @@ static DEFINE_XARRAY_ALLOC1(asid_xa);
>  static DEFINE_SPINLOCK(contexts_lock);
>  static DEFINE_MUTEX(arm_smmu_sva_lock);
>  
> +/*
> + * When a process dies, DMA is still running but we need to clear
> the pgd. If we
> + * simply cleared the valid bit from the context descriptor, we'd
> get event 0x0a
> + * which are not recoverable.
> + */
> +static struct arm_smmu_ctx_desc invalid_cd = { 0 };
> +
>  static struct arm_smmu_option_prop arm_smmu_options[] = {
>  	{ ARM_SMMU_OPT_SKIP_PREFETCH,
> "hisilicon,broken-prefetch-cmd" }, { ARM_SMMU_OPT_PAGE0_REGS_ONLY,
> "cavium,cn9900-broken-page1-regspace"}, @@ -1652,7 +1684,9 @@ static
> int __arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
>  	 * (2) Install a secondary CD, for SID+SSID traffic.
>  	 * (3) Update ASID of a CD. Atomically write the first 64
> bits of the
>  	 *     CD, then invalidate the old entry and mappings.
> -	 * (4) Remove a secondary CD.
> +	 * (4) Quiesce the context without clearing the valid bit.
> Disable
> +	 *     translation, and ignore any translation fault.
> +	 * (5) Remove a secondary CD.
>  	 */
>  	u64 val;
>  	bool cd_live;
> @@ -1669,8 +1703,10 @@ static int __arm_smmu_write_ctx_desc(struct
> arm_smmu_domain *smmu_domain, val = le64_to_cpu(cdptr[0]);
>  	cd_live = !!(val & CTXDESC_CD_0_V);
>  
> -	if (!cd) { /* (4) */
> +	if (!cd) { /* (5) */
>  		val = 0;
> +	} else if (cd == &invalid_cd) { /* (4) */
> +		val |= CTXDESC_CD_0_TCR_EPD0;
>  	} else if (cd_live) { /* (3) */
>  		val &= ~CTXDESC_CD_0_ASID;
>  		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
> @@ -1883,7 +1919,6 @@ static struct arm_smmu_ctx_desc
> *arm_smmu_share_asid(u16 asid) return NULL;
>  }
>  
> -__maybe_unused
>  static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct
> mm_struct *mm) {
>  	u16 asid;
> @@ -1976,7 +2011,6 @@ static struct arm_smmu_ctx_desc
> *arm_smmu_alloc_shared_cd(struct mm_struct *mm) return ERR_PTR(ret);
>  }
>  
> -__maybe_unused
>  static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd)
>  {
>  	if (arm_smmu_free_asid(cd)) {
> @@ -2611,6 +2645,8 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>  	}
>  }
>  
> +static struct mmu_notifier_ops arm_smmu_mmu_notifier_ops;
> +
>  static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>  {
>  	struct arm_smmu_domain *smmu_domain;
> @@ -2638,6 +2674,7 @@ static struct iommu_domain
> *arm_smmu_domain_alloc(unsigned type)
> mutex_init(&smmu_domain->init_mutex);
> INIT_LIST_HEAD(&smmu_domain->devices);
> spin_lock_init(&smmu_domain->devices_lock);
> +	smmu_domain->mn_ops = arm_smmu_mmu_notifier_ops;
>  
>  	return &smmu_domain->domain;
>  }
> @@ -3118,6 +3155,208 @@ arm_smmu_iova_to_phys(struct iommu_domain
> *domain, dma_addr_t iova) return ops->iova_to_phys(ops, iova);
>  }
>  
> +static struct mmu_notifier *arm_smmu_mmu_notifier_alloc(struct
> mm_struct *mm) +{
> +	struct arm_smmu_mmu_notifier *smmu_mn;
> +
> +	smmu_mn = kzalloc(sizeof(*smmu_mn), GFP_KERNEL);
> +	if (!smmu_mn)
> +		return ERR_PTR(-ENOMEM);
> +
> +	smmu_mn->cd = arm_smmu_alloc_shared_cd(mm);
> +	if (IS_ERR(smmu_mn->cd)) {
> +		void *ptr = ERR_CAST(smmu_mn->cd);
> +
> +		kfree(smmu_mn);
> +		return ptr;
> +	}
> +	refcount_set(&smmu_mn->refs, 1);
> +
> +	return &smmu_mn->mn;
> +}
> +
> +static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn)
> +{
> +	struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
> +
> +	arm_smmu_free_shared_cd(smmu_mn->cd);
> +	kfree(smmu_mn);
> +}
> +
> +static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn,
> +					 struct mm_struct *mm,
> +					 unsigned long start,
> unsigned long end) +{
> +	/* TODO: invalidate ATS */
> +}
> +
> +static void arm_smmu_mm_release(struct mmu_notifier *mn, struct
> mm_struct *mm) +{
> +	struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn);
> +	struct arm_smmu_domain *smmu_domain;
> +
> +	mutex_lock(&arm_smmu_sva_lock);
> +	if (smmu_mn->cleared) {
> +		mutex_unlock(&arm_smmu_sva_lock);
> +		return;
> +	}
> +
> +	smmu_domain = smmu_mn->domain;
> +
> +	/*
> +	 * DMA may still be running. Keep the cd valid but disable
> +	 * translation, so that new events will still result in
> stall.
> +	 */
Does "disable translation" also disable translated requests? I guess
release is called after tlb invalidate range, so assuming no more
devTLB left to generate translated request?

> +	arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &invalid_cd);
> +
> +	arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
> +	/* TODO: invalidate ATS */
> +
If mm release is called after tlb invalidate range, is it still
necessary to invalidate again?

> +	smmu_mn->cleared = true;
> +	mutex_unlock(&arm_smmu_sva_lock);
> +}
> +
> +static struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = {
> +	.alloc_notifier		= arm_smmu_mmu_notifier_alloc,
> +	.free_notifier		= arm_smmu_mmu_notifier_free,
> +	.invalidate_range	= arm_smmu_mm_invalidate_range,
> +	.release		= arm_smmu_mm_release,
> +};
> +
> +static struct iommu_sva *
> +__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
> +{
> +	int ret;
> +	ioasid_t pasid;
> +	struct mmu_notifier *mn;
> +	struct arm_smmu_bond *bond;
> +	struct arm_smmu_mmu_notifier *smmu_mn;
> +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> +	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +
> +	if (!master || !master->sva_enabled)
> +		return ERR_PTR(-ENODEV);
> +
> +	/* If bind() was already called for this (dev, mm) pair,
> reuse it. */
> +	list_for_each_entry(bond, &master->bonds, list) {
> +		if (bond->mm == mm) {
> +			refcount_inc(&bond->refs);
> +			return &bond->sva;
> +		}
> +	}
> +
> +	mn = mmu_notifier_get(&smmu_domain->mn_ops, mm);
> +	if (IS_ERR(mn))
> +		return ERR_CAST(mn);
> +
> +	smmu_mn = mn_to_smmu(mn);
> +	if (smmu_mn->domain)
> +		refcount_inc(&smmu_mn->refs);
> +
> +	bond = kzalloc(sizeof(*bond), GFP_KERNEL);
> +	if (!bond) {
> +		ret = -ENOMEM;
> +		goto err_put_mn;
> +	}
> +
> +	/* Allocate a PASID for this mm if necessary */
> +	pasid = iommu_sva_alloc_pasid(mm, 1, (1U <<
> master->ssid_bits) - 1);
> +	if (pasid == INVALID_IOASID) {
> +		ret = -ENOSPC;
> +		goto err_free_bond;
> +	}
> +	bond->mm = mm;
> +	bond->sva.dev = dev;
> +	bond->smmu_mn = smmu_mn;
> +	refcount_set(&bond->refs, 1);
> +
> +	ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid,
> smmu_mn->cd);
> +	if (ret)
> +		goto err_free_pasid;
> +
> +	bond->sva.dev = dev;
> +	list_add(&bond->list, &master->bonds);
> +	smmu_mn->domain = smmu_domain;
> +	return &bond->sva;
> +
> +err_free_pasid:
> +	iommu_sva_free_pasid(mm);
> +err_free_bond:
> +	kfree(bond);
> +err_put_mn:
> +	refcount_dec(&smmu_mn->refs);
> +	mmu_notifier_put(mn);
> +	return ERR_PTR(ret);
> +}
> +
> +static void __arm_smmu_sva_unbind(struct iommu_sva *handle)
> +{
> +	struct arm_smmu_mmu_notifier *smmu_mn;
> +	struct arm_smmu_bond *bond = sva_to_bond(handle);
> +	struct iommu_domain *domain =
> iommu_get_domain_for_dev(handle->dev);
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +
> +	if (!refcount_dec_and_test(&bond->refs))
> +		return;
> +
> +	list_del(&bond->list);
> +
> +	smmu_mn = bond->smmu_mn;
> +	/*
> +	 * This is redundant as the MMU notifier already counts
> refs, but frees
> +	 * the bond in a RCU callback which cannot sleep. We have
> much cleaning
> +	 * to do and we hold all the right locks, so duplicate the
> refcounting.
> +	 */
> +	if (refcount_dec_and_test(&smmu_mn->refs)) {
> +		arm_smmu_write_ctx_desc(smmu_domain,
> bond->mm->pasid, NULL); +
> +		/*
> +		 * If we went through clear(), we've already
> invalidated, and no
> +		 * new TLB entry can have been formed.
> +		 */
> +		if (!smmu_mn->cleared) {
> +			arm_smmu_tlb_inv_asid(smmu_domain->smmu,
> +					      smmu_mn->cd->asid);
> +			/* TODO: invalidate ATS */
> +		}
> +	}
> +
> +	iommu_sva_free_pasid(bond->mm);
> +	kfree(bond);
> +	mmu_notifier_put(&smmu_mn->mn);
> +}
> +
> +static struct iommu_sva *
> +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
> *drvdata) +{
> +	struct iommu_sva *handle;
> +	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return ERR_PTR(-EINVAL);
> +
> +	mutex_lock(&arm_smmu_sva_lock);
> +	handle = __arm_smmu_sva_bind(dev, mm);
> +	mutex_unlock(&arm_smmu_sva_lock);
> +	return handle;
> +}
> +
> +static void arm_smmu_sva_unbind(struct iommu_sva *handle)
> +{
> +	mutex_lock(&arm_smmu_sva_lock);
> +	__arm_smmu_sva_unbind(handle);
> +	mutex_unlock(&arm_smmu_sva_lock);
> +}
> +
> +static int arm_smmu_sva_get_pasid(struct iommu_sva *handle)
> +{
> +	struct arm_smmu_bond *bond = sva_to_bond(handle);
> +
> +	return bond->mm->pasid;
> +}
> +
>  static struct platform_driver arm_smmu_driver;
>  
>  static
> @@ -3426,6 +3665,12 @@ static int arm_smmu_dev_disable_sva(struct
> device *dev) master->sva_enabled = false;
>  	mutex_unlock(&arm_smmu_sva_lock);
>  
> +	/*
> +	 * Since the MMU notifier ops are held in the domain, it is
> not safe to
> +	 * free the domain until all MMU notifiers are freed.
> +	 */
> +	mmu_notifier_synchronize();
> +
>  	return 0;
>  }
>  
> @@ -3482,6 +3727,9 @@ static struct iommu_ops arm_smmu_ops = {
>  	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
>  	.dev_enable_feat	= arm_smmu_dev_enable_feature,
>  	.dev_disable_feat	= arm_smmu_dev_disable_feature,
> +	.sva_bind		= arm_smmu_sva_bind,
> +	.sva_unbind		= arm_smmu_sva_unbind,
> +	.sva_get_pasid		= arm_smmu_sva_get_pasid,
>  	.pgsize_bitmap		= -1UL, /* Restricted during
> device attach */ };
>  

[Jacob Pan]



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux