Re: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 22/05/2023 11:34, Shameerali Kolothum Thodi wrote:
>> -----Original Message-----
>> From: Joao Martins [mailto:joao.m.martins@xxxxxxxxxx]
>> Sent: 18 May 2023 21:47
>> To: iommu@xxxxxxxxxxxxxxx
>> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx>; Kevin Tian <kevin.tian@xxxxxxxxx>;
>> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>; Lu
>> Baolu <baolu.lu@xxxxxxxxxxxxxxx>; Yi Liu <yi.l.liu@xxxxxxxxx>; Yi Y Sun
>> <yi.y.sun@xxxxxxxxx>; Eric Auger <eric.auger@xxxxxxxxxx>; Nicolin Chen
>> <nicolinc@xxxxxxxxxx>; Joerg Roedel <joro@xxxxxxxxxx>; Jean-Philippe
>> Brucker <jean-philippe@xxxxxxxxxx>; Suravee Suthikulpanit
>> <suravee.suthikulpanit@xxxxxxx>; Will Deacon <will@xxxxxxxxxx>; Robin
>> Murphy <robin.murphy@xxxxxxx>; Alex Williamson
>> <alex.williamson@xxxxxxxxxx>; kvm@xxxxxxxxxxxxxxx; Joao Martins
>> <joao.m.martins@xxxxxxxxxx>
>> Subject: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for
>> stage1 with io-pgtable mapping
>>
>> From: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
>>
>> As nested mode is not upstreamed now, we just aim to support dirty
>> log tracking for stage1 with io-pgtable mapping (means not support
>> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU
>> CD and transfer ARM_HD quirk to io-pgtable.
>>
>> We additionally filter out HD|HA if not supportted. The CD.HD bit
>> is not particularly useful unless we toggle the DBM bit in the PTE
>> entries.
>>
>> Link:
>> https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei
>> .com/
>> Co-developed-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
>> Signed-off-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx>
>> Signed-off-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx>
>> [joaomart:Convey HD|HA bits over to the context descriptor
>>  and update commit message; original in Link, where this is based on]
>> Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx>
>> ---
>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++
>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +++
>>  drivers/iommu/io-pgtable-arm.c              | 11 +++++++++--
>>  include/linux/io-pgtable.h                  |  4 ++++
>>  4 files changed, 26 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> index e110ff4710bf..e2b98a6a6b74 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops
>> arm_smmu_flush_ops = {
>>  	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
>>  };
>>
>> +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
>> +{
>> +	return smmu->features & (ARM_SMMU_FEAT_HD |
>> ARM_SMMU_FEAT_COHERENCY);
>> +}
>> +
> 
> This will claim DBM capability for systems with just ARM_SMMU_FEAT_COHERENCY.

Gah, yes. It should be:

	(smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY)) ==
		(ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY)

or making these two a macro on its own.

> 
>>  /* IOMMU API */
>>  static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
>>  {
>> @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct
>> arm_smmu_domain *smmu_domain,
>>  			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
>>  			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
>>  			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
>> +	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
>> +		cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD;
>>  	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
>>
>>  	/*
>> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct
>> iommu_domain *domain,
>>  		.iommu_dev	= smmu->dev,
>>  	};
>>
>> +	if (smmu->features & arm_smmu_dbm_capable(smmu))
>> +		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>> +
>>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>>  	if (!pgtbl_ops)
>>  		return -ENOMEM;
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> index d82dd125446c..83d6f3a2554f 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> @@ -288,6 +288,9 @@
>>  #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
>>  #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
>>
>> +#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
>> +#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
>> +
>>  #define CTXDESC_CD_0_AA64		(1UL << 41)
>>  #define CTXDESC_CD_0_S			(1UL << 44)
>>  #define CTXDESC_CD_0_R			(1UL << 45)
>> diff --git a/drivers/iommu/io-pgtable-arm.c
>> b/drivers/iommu/io-pgtable-arm.c
>> index 72dcdd468cf3..b2f470529459 100644
>> --- a/drivers/iommu/io-pgtable-arm.c
>> +++ b/drivers/iommu/io-pgtable-arm.c
>> @@ -75,6 +75,7 @@
>>
>>  #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>>  #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
>> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
>>  #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>>  #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>>  #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
>> @@ -84,7 +85,7 @@
>>
>>  #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
>>  /* Ignore the contiguous bit for block splitting */
>> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
>> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) << 51)
>>  #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK
>> |	\
>>  					 ARM_LPAE_PTE_ATTR_HI_MASK)
>>  /* Software bit for solving coherency races */
>> @@ -93,6 +94,9 @@
>>  /* Stage-1 PTE */
>>  #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
>>  #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
>> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
>> +#define ARM_LPAE_PTE_AP_WRITABLE	(ARM_LPAE_PTE_AP_RDONLY | \
>> +					 ARM_LPAE_PTE_DBM)
>>  #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
>>  #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
>>
>> @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct
>> arm_lpae_io_pgtable *data,
>>  		pte = ARM_LPAE_PTE_nG;
>>  		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>>  			pte |= ARM_LPAE_PTE_AP_RDONLY;
>> +		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
>> +			pte |= ARM_LPAE_PTE_AP_WRITABLE;
>>  		if (!(prot & IOMMU_PRIV))
>>  			pte |= ARM_LPAE_PTE_AP_UNPRIV;
>>  	} else {
>> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct
>> io_pgtable_cfg *cfg, void *cookie)
>>
>>  	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
>>  			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
>> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
>> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
>> +			    IO_PGTABLE_QUIRK_ARM_HD))
>>  		return NULL;
>>
>>  	data = arm_lpae_alloc_pgtable(cfg);
>> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
>> index 25142a0e2fc2..9a996ba7856d 100644
>> --- a/include/linux/io-pgtable.h
>> +++ b/include/linux/io-pgtable.h
>> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>>  	 *
>>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the
>> outer-cacheability
>>  	 *	attributes set in the TCR for a non-coherent page-table walker.
>> +	 *
>> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
>>  	 */
>>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
>> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
>> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
>> +
>>  	unsigned long			quirks;
>>  	unsigned long			pgsize_bitmap;
>>  	unsigned int			ias;
>> --
>> 2.17.2
> 



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux