Re: [PATCH RFCv2 22/24] iommu/arm-smmu-v3: Add read_and_clear_dirty() support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 16/06/2023 17:46, Shameerali Kolothum Thodi wrote:
> Hi Joao,
> 
>> -----Original Message-----
>> From: Joao Martins [mailto:joao.m.martins@xxxxxxxxxx]
>> Sent: 18 May 2023 21:47
>> To: iommu@xxxxxxxxxxxxxxx
>> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx>; Kevin Tian <kevin.tian@xxxxxxxxx>;
>> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>; Lu
>> Baolu <baolu.lu@xxxxxxxxxxxxxxx>; Yi Liu <yi.l.liu@xxxxxxxxx>; Yi Y Sun
>> <yi.y.sun@xxxxxxxxx>; Eric Auger <eric.auger@xxxxxxxxxx>; Nicolin Chen
>> <nicolinc@xxxxxxxxxx>; Joerg Roedel <joro@xxxxxxxxxx>; Jean-Philippe
>> Brucker <jean-philippe@xxxxxxxxxx>; Suravee Suthikulpanit
>> <suravee.suthikulpanit@xxxxxxx>; Will Deacon <will@xxxxxxxxxx>; Robin
>> Murphy <robin.murphy@xxxxxxx>; Alex Williamson
>> <alex.williamson@xxxxxxxxxx>; kvm@xxxxxxxxxxxxxxx; Joao Martins
>> <joao.m.martins@xxxxxxxxxx>
>> Subject: [PATCH RFCv2 22/24] iommu/arm-smmu-v3: Add
>> read_and_clear_dirty() support
>>
>> From: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
>>
>> .read_and_clear_dirty() IOMMU domain op takes care of reading the dirty
>> bits (i.e. PTE has both DBM and AP[2] set) and marshalling into a bitmap of
>> a given page size.
>>
>> While reading the dirty bits we also clear the PTE AP[2] bit to mark it as
>> writable-clean depending on read_and_clear_dirty() flags.
>>
>> Structure it in a way that the IOPTE walker is generic, and so we pass a
>> function pointer over what to do on a per-PTE basis.
>>
>> [Link below points to the original version that was based on]
>>
>> Link:
>> https://lore.kernel.org/lkml/20210413085457.25400-11-zhukeqian1@huaw
>> ei.com/
>> Co-developed-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
>> Co-developed-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
>> Signed-off-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
>> [joaomart: Massage commit message]
>> Co-developed-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
>> Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
>> ---
>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c |  23 +++++
>>  drivers/iommu/io-pgtable-arm.c              | 104
>> ++++++++++++++++++++
>>  2 files changed, 127 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> index e2b98a6a6b74..2cde14003469 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> @@ -2765,6 +2765,28 @@ static int arm_smmu_enable_nesting(struct
>> iommu_domain *domain)
>>  	return ret;
>>  }
>>
>> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain
>> *domain,
>> +					 unsigned long iova, size_t size,
>> +					 unsigned long flags,
>> +					 struct iommu_dirty_bitmap *dirty)
>> +{
>> +	struct arm_smmu_domain *smmu_domain =
>> to_smmu_domain(domain);
>> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
>> +	int ret;
>> +
>> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
>> +		return -EINVAL;
>> +
>> +	if (!ops || !ops->read_and_clear_dirty) {
>> +		pr_err_once("io-pgtable don't support dirty tracking\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	ret = ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
>> +
>> +	return ret;
>> +}
>> +
>>  static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args
>> *args)
>>  {
>>  	return iommu_fwspec_add_ids(dev, args->args, 1);
>> @@ -2893,6 +2915,7 @@ static struct iommu_ops arm_smmu_ops = {
>>  		.iova_to_phys		= arm_smmu_iova_to_phys,
>>  		.enable_nesting		= arm_smmu_enable_nesting,
>>  		.free			= arm_smmu_domain_free,
>> +		.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
>>  	}
>>  };
>>
>> diff --git a/drivers/iommu/io-pgtable-arm.c
>> b/drivers/iommu/io-pgtable-arm.c
>> index b2f470529459..de9e61f8452d 100644
>> --- a/drivers/iommu/io-pgtable-arm.c
>> +++ b/drivers/iommu/io-pgtable-arm.c
>> @@ -717,6 +717,109 @@ static phys_addr_t arm_lpae_iova_to_phys(struct
>> io_pgtable_ops *ops,
>>  	return iopte_to_paddr(pte, data) | iova;
>>  }
>>
>> +struct arm_lpae_iopte_read_dirty {
>> +	unsigned long flags;
>> +	struct iommu_dirty_bitmap *dirty;
>> +};
>> +
>> +static int __arm_lpae_read_and_clear_dirty(unsigned long iova, size_t size,
>> +					   arm_lpae_iopte *ptep, void *opaque)
>> +{
>> +	struct arm_lpae_iopte_read_dirty *arg = opaque;
>> +	struct iommu_dirty_bitmap *dirty = arg->dirty;
>> +	arm_lpae_iopte pte;
>> +
>> +	pte = READ_ONCE(*ptep);
>> +	if (WARN_ON(!pte))
>> +		return -EINVAL;
>> +
>> +	if ((pte & ARM_LPAE_PTE_AP_WRITABLE) ==
>> ARM_LPAE_PTE_AP_WRITABLE)
>> +		return 0;
>> +
>> +	iommu_dirty_bitmap_record(dirty, iova, size);
>> +	if (!(arg->flags & IOMMU_DIRTY_NO_CLEAR))
>> +		set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)ptep);
>> +	return 0;
>> +}
>> +
>> +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
>> +				 unsigned long iova, size_t size,
>> +				 int lvl, arm_lpae_iopte *ptep,
>> +				 int (*fn)(unsigned long iova, size_t size,
>> +					   arm_lpae_iopte *pte, void *opaque),
>> +				 void *opaque)
>> +{
>> +	arm_lpae_iopte pte;
>> +	struct io_pgtable *iop = &data->iop;
>> +	size_t base, next_size;
>> +	int ret;
>> +
>> +	if (WARN_ON_ONCE(!fn))
>> +		return -EINVAL;
>> +
>> +	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
>> +		return -EINVAL;
>> +
>> +	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
>> +	pte = READ_ONCE(*ptep);
>> +	if (WARN_ON(!pte))
>> +		return -EINVAL;
>> +
>> +	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
>> +		if (iopte_leaf(pte, lvl, iop->fmt))
>> +			return fn(iova, size, ptep, opaque);
>> +
>> +		/* Current level is table, traverse next level */
>> +		next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data);
>> +		ptep = iopte_deref(pte, data);
>> +		for (base = 0; base < size; base += next_size) {
>> +			ret = __arm_lpae_iopte_walk(data, iova + base,
>> +						    next_size, lvl + 1, ptep,
>> +						    fn, opaque);
>> +			if (ret)
>> +				return ret;
>> +		}
>> +		return 0;
>> +	} else if (iopte_leaf(pte, lvl, iop->fmt)) {
>> +		return fn(iova, size, ptep, opaque);
>> +	}
>> +
>> +	/* Keep on walkin */
>> +	ptep = iopte_deref(pte, data);
>> +	return __arm_lpae_iopte_walk(data, iova, size, lvl + 1, ptep,
>> +				     fn, opaque);
>> +}
>> +
>> +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
>> +					 unsigned long iova, size_t size,
>> +					 unsigned long flags,
>> +					 struct iommu_dirty_bitmap *dirty)
>> +{
>> +	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
>> +	struct io_pgtable_cfg *cfg = &data->iop.cfg;
>> +	struct arm_lpae_iopte_read_dirty arg = {
>> +		.flags = flags, .dirty = dirty,
>> +	};
>> +	arm_lpae_iopte *ptep = data->pgd;
>> +	int lvl = data->start_level;
>> +	long iaext = (s64)iova >> cfg->ias;
>> +
>> +	if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
>> +		return -EINVAL;
> 
> I guess the size here is supposed to be one of the pgsize that iommu supports.
> But looking at the code, it looks like we are passing the iova mapped length and
> it fails here in my test setup. Could you please check and confirm.
> 
I think this might be from the original patch, and it's meant to test that
length is aligned to the page size, but I failed to removed it this snip. We
should remove this.

> Thanks,
> Shameer
> 
> 
>> +
>> +	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
>> +		iaext = ~iaext;
>> +	if (WARN_ON(iaext))
>> +		return -EINVAL;
>> +
>> +	if (data->iop.fmt != ARM_64_LPAE_S1 &&
>> +	    data->iop.fmt != ARM_32_LPAE_S1)
>> +		return -EINVAL;
>> +
>> +	return __arm_lpae_iopte_walk(data, iova, size, lvl, ptep,
>> +				     __arm_lpae_read_and_clear_dirty, &arg);
>> +}
>> +
>>  static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>>  {
>>  	unsigned long granule, page_sizes;
>> @@ -795,6 +898,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
>>  		.map_pages	= arm_lpae_map_pages,
>>  		.unmap_pages	= arm_lpae_unmap_pages,
>>  		.iova_to_phys	= arm_lpae_iova_to_phys,
>> +		.read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
>>  	};
>>
>>  	return data;
>> --
>> 2.17.2
> 



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux