RE: [PATCH RFC 14/19] iommu/arm-smmu-v3: Add read_and_clear_dirty() support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: Joao Martins [mailto:joao.m.martins@xxxxxxxxxx]
> Sent: 28 April 2022 22:09
> To: iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
> Cc: Joao Martins <joao.m.martins@xxxxxxxxxx>; Joerg Roedel
> <joro@xxxxxxxxxx>; Suravee Suthikulpanit
> <suravee.suthikulpanit@xxxxxxx>; Will Deacon <will@xxxxxxxxxx>; Robin
> Murphy <robin.murphy@xxxxxxx>; Jean-Philippe Brucker
> <jean-philippe@xxxxxxxxxx>; zhukeqian <zhukeqian1@xxxxxxxxxx>;
> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>;
> David Woodhouse <dwmw2@xxxxxxxxxxxxx>; Lu Baolu
> <baolu.lu@xxxxxxxxxxxxxxx>; Jason Gunthorpe <jgg@xxxxxxxxxx>; Nicolin
> Chen <nicolinc@xxxxxxxxxx>; Yishai Hadas <yishaih@xxxxxxxxxx>; Kevin Tian
> <kevin.tian@xxxxxxxxx>; Eric Auger <eric.auger@xxxxxxxxxx>; Yi Liu
> <yi.l.liu@xxxxxxxxx>; Alex Williamson <alex.williamson@xxxxxxxxxx>;
> Cornelia Huck <cohuck@xxxxxxxxxx>; kvm@xxxxxxxxxxxxxxx; jiangkunkun
> <jiangkunkun@xxxxxxxxxx>
> Subject: [PATCH RFC 14/19] iommu/arm-smmu-v3: Add
> read_and_clear_dirty() support
> 
> .read_and_clear_dirty() IOMMU domain op takes care of
> reading the dirty bits (i.e. PTE has both DBM and AP[2] set)
> and marshalling into a bitmap of a given page size.
> 
> While reading the dirty bits we also clear the PTE AP[2]
> bit to mark it as writable-clean.
> 
> Structure it in a way that the IOPTE walker is generic,
> and so we pass a function pointer over what to do on a per-PTE
> basis. This is useful for a followup patch where we supply an
> io-pgtable op to enable DBM when starting/stopping dirty tracking.
> 
> Co-developed-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
> Signed-off-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
> Co-developed-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
> Signed-off-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
> Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  27 ++++++
>  drivers/iommu/io-pgtable-arm.c              | 102
> +++++++++++++++++++-
>  2 files changed, 128 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 4dba53bde2e3..232057d20197 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -2743,6 +2743,32 @@ static int arm_smmu_enable_nesting(struct
> iommu_domain *domain)
>  	return ret;
>  }
> 
> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain
> *domain,
> +					 unsigned long iova, size_t size,
> +					 struct iommu_dirty_bitmap *dirty)
> +{
> +	struct arm_smmu_domain *smmu_domain =
> to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	int ret;
> +
> +	if (!(smmu->features & ARM_SMMU_FEAT_HD) ||
> +	    !(smmu->features & ARM_SMMU_FEAT_BBML2))
> +		return -ENODEV;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;
> +
> +	if (!ops || !ops->read_and_clear_dirty) {
> +		pr_err_once("io-pgtable don't support dirty tracking\n");
> +		return -ENODEV;
> +	}
> +
> +	ret = ops->read_and_clear_dirty(ops, iova, size, dirty);
> +
> +	return ret;
> +}
> +
>  static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args
> *args)
>  {
>  	return iommu_fwspec_add_ids(dev, args->args, 1);
> @@ -2871,6 +2897,7 @@ static struct iommu_ops arm_smmu_ops = {
>  		.iova_to_phys		= arm_smmu_iova_to_phys,
>  		.enable_nesting		= arm_smmu_enable_nesting,
>  		.free			= arm_smmu_domain_free,
> +		.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
>  	}
>  };
> 
> diff --git a/drivers/iommu/io-pgtable-arm.c
> b/drivers/iommu/io-pgtable-arm.c
> index 94ff319ae8ac..3c99028d315a 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -75,6 +75,7 @@
> 
>  #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>  #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
>  #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>  #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>  #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
> @@ -84,7 +85,7 @@
> 
>  #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
>  /* Ignore the contiguous bit for block splitting */
> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) << 51)
>  #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK
> |	\
>  					 ARM_LPAE_PTE_ATTR_HI_MASK)
>  /* Software bit for solving coherency races */
> @@ -93,6 +94,9 @@
>  /* Stage-1 PTE */
>  #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
>  #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
> +#define ARM_LPAE_PTE_AP_WRITABLE	(ARM_LPAE_PTE_AP_RDONLY | \
> +					 ARM_LPAE_PTE_DBM)
>  #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
>  #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
> 
> @@ -737,6 +741,101 @@ static phys_addr_t arm_lpae_iova_to_phys(struct
> io_pgtable_ops *ops,
>  	return iopte_to_paddr(pte, data) | iova;
>  }
> 
> +static int __arm_lpae_read_and_clear_dirty(unsigned long iova, size_t size,
> +					   arm_lpae_iopte *ptep, void *opaque)
> +{
> +	struct iommu_dirty_bitmap *dirty = opaque;
> +	arm_lpae_iopte pte;
> +
> +	pte = READ_ONCE(*ptep);
> +	if (WARN_ON(!pte))
> +		return -EINVAL;
> +
> +	if (pte & ARM_LPAE_PTE_AP_WRITABLE)
> +		return 0;

We might have set ARM_LPAE_PTE_DBM already. So does the above needs to be,
if ((pte & ARM_LPAE_PTE_AP_WRITABLE) == ARM_LPAE_PTE_AP_WRITABLE) ?

Thanks,
Shameer

> +
> +	if (!(pte & ARM_LPAE_PTE_DBM))
> +		return 0;
> +
> +	iommu_dirty_bitmap_record(dirty, iova, size);
> +	set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)ptep);
> +	return 0;
> +}
> +
> +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
> +				 unsigned long iova, size_t size,
> +				 int lvl, arm_lpae_iopte *ptep,
> +				 int (*fn)(unsigned long iova, size_t size,
> +					   arm_lpae_iopte *pte, void *opaque),
> +				 void *opaque)
> +{
> +	arm_lpae_iopte pte;
> +	struct io_pgtable *iop = &data->iop;
> +	size_t base, next_size;
> +	int ret;
> +
> +	if (WARN_ON_ONCE(!fn))
> +		return -EINVAL;
> +
> +	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
> +		return -EINVAL;
> +
> +	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> +	pte = READ_ONCE(*ptep);
> +	if (WARN_ON(!pte))
> +		return -EINVAL;
> +
> +	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
> +		if (iopte_leaf(pte, lvl, iop->fmt))
> +			return fn(iova, size, ptep, opaque);
> +
> +		/* Current level is table, traverse next level */
> +		next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data);
> +		ptep = iopte_deref(pte, data);
> +		for (base = 0; base < size; base += next_size) {
> +			ret = __arm_lpae_iopte_walk(data, iova + base,
> +						    next_size, lvl + 1, ptep,
> +						    fn, opaque);
> +			if (ret)
> +				return ret;
> +		}
> +		return 0;
> +	} else if (iopte_leaf(pte, lvl, iop->fmt)) {
> +		return fn(iova, size, ptep, opaque);
> +	}
> +
> +	/* Keep on walkin */
> +	ptep = iopte_deref(pte, data);
> +	return __arm_lpae_iopte_walk(data, iova, size, lvl + 1, ptep,
> +				     fn, opaque);
> +}
> +
> +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
> +					 unsigned long iova, size_t size,
> +					 struct iommu_dirty_bitmap *dirty)
> +{
> +	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> +	struct io_pgtable_cfg *cfg = &data->iop.cfg;
> +	arm_lpae_iopte *ptep = data->pgd;
> +	int lvl = data->start_level;
> +	long iaext = (s64)iova >> cfg->ias;
> +
> +	if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
> +		return -EINVAL;
> +
> +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
> +		iaext = ~iaext;
> +	if (WARN_ON(iaext))
> +		return -EINVAL;
> +
> +	if (data->iop.fmt != ARM_64_LPAE_S1 &&
> +	    data->iop.fmt != ARM_32_LPAE_S1)
> +		return -EINVAL;
> +
> +	return __arm_lpae_iopte_walk(data, iova, size, lvl, ptep,
> +				     __arm_lpae_read_and_clear_dirty, dirty);
> +}
> +
>  static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>  {
>  	unsigned long granule, page_sizes;
> @@ -817,6 +916,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
>  		.unmap		= arm_lpae_unmap,
>  		.unmap_pages	= arm_lpae_unmap_pages,
>  		.iova_to_phys	= arm_lpae_iova_to_phys,
> +		.read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
>  	};
> 
>  	return data;
> --
> 2.17.2




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux