Hi Jason, Sorry, I haven’t followed up on that, I was out for a while. On Tue, Aug 27, 2024 at 12:51:32PM -0300, Jason Gunthorpe wrote: > Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field > works. When S2FWB is supported and enabled the IOPTE will force cachable > access to IOMMU_CACHE memory when nesting with a S1 and deny cachable > access otherwise. > > When using a single stage of translation, a simple S2 domain, it doesn't > change anything as it is just a different encoding for the exsting mapping > of the IOMMU protection flags to cachability attributes. > > However, when used with a nested S1, FWB has the effect of preventing the > guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to > bypass the cache. Consistent with KVM we wish to deny the guest the > ability to become incoherent with cached memory the hypervisor believes is > cachable so we don't have to flush it. > > Turn on S2FWB whenever the SMMU supports it and use it for all S2 > mappings. > > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 11 +++++++++ > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > drivers/iommu/io-pgtable-arm.c | 27 +++++++++++++++++---- > include/linux/io-pgtable.h | 2 ++ > 4 files changed, 38 insertions(+), 5 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index 531125f231b662..e2b97ad6d74b03 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -1612,6 +1612,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, > FIELD_PREP(STRTAB_STE_1_EATS, > ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); > > + if (smmu->features & ARM_SMMU_FEAT_S2FWB) > + target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB); > if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) > target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, > STRTAB_STE_1_SHCFG_INCOMING)); > @@ -2400,6 +2402,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, > pgtbl_cfg.oas = smmu->oas; > fmt = ARM_64_LPAE_S2; > finalise_stage_fn = arm_smmu_domain_finalise_s2; > + if (smmu->features & ARM_SMMU_FEAT_S2FWB) > + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB; > break; > default: > return -EINVAL; > @@ -4189,6 +4193,13 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) > > /* IDR3 */ > reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); > + /* > + * If for some reason the HW does not support DMA coherency then using > + * S2FWB won't work. This will also disable nesting support. > + */ > + if (FIELD_GET(IDR3_FWB, reg) && > + (smmu->features & ARM_SMMU_FEAT_COHERENCY)) > + smmu->features |= ARM_SMMU_FEAT_S2FWB; I think that’s for the SMMU coherency which in theory is not related to the master which FWB overrides, so this check is not correct. What I meant in the previous thread that we should set FWB only for coherent masters as (in attach s2): if (smmu->features & ARM_SMMU_FEAT_S2FWB && dev_is_dma_coherent(master->dev) // set S2FWB in STE Thanks, Mostafa > if (FIELD_GET(IDR3_RIL, reg)) > smmu->features |= ARM_SMMU_FEAT_RANGE_INV; > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index 8851a7abb5f0f3..7e8d2f36faebf3 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -55,6 +55,7 @@ > #define IDR1_SIDSIZE GENMASK(5, 0) > > #define ARM_SMMU_IDR3 0xc > +#define IDR3_FWB (1 << 8) > #define IDR3_RIL (1 << 10) > > #define ARM_SMMU_IDR5 0x14 > @@ -258,6 +259,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) > #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) > > #define STRTAB_STE_1_S1STALLD (1UL << 27) > +#define STRTAB_STE_1_S2FWB (1UL << 25) > > #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) > #define STRTAB_STE_1_EATS_ABT 0UL > @@ -700,6 +702,7 @@ struct arm_smmu_device { > #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) > #define ARM_SMMU_FEAT_HA (1 << 21) > #define ARM_SMMU_FEAT_HD (1 << 22) > +#define ARM_SMMU_FEAT_S2FWB (1 << 23) > u32 features; > > #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > index f5d9fd1f45bf49..9b3658aae21005 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -106,6 +106,18 @@ > #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) > #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) > #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) > +/* > + * For !FWB these code to: > + * 1111 = Normal outer write back cachable / Inner Write Back Cachable > + * Permit S1 to override > + * 0101 = Normal Non-cachable / Inner Non-cachable > + * 0001 = Device / Device-nGnRE > + * For S2FWB these code: > + * 0110 Force Normal Write Back > + * 0101 Normal* is forced Normal-NC, Device unchanged > + * 0001 Force Device-nGnRE > + */ > +#define ARM_LPAE_PTE_MEMATTR_FWB_WB (((arm_lpae_iopte)0x6) << 2) > #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) > #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) > #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) > @@ -458,12 +470,16 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, > */ > if (data->iop.fmt == ARM_64_LPAE_S2 || > data->iop.fmt == ARM_32_LPAE_S2) { > - if (prot & IOMMU_MMIO) > + if (prot & IOMMU_MMIO) { > pte |= ARM_LPAE_PTE_MEMATTR_DEV; > - else if (prot & IOMMU_CACHE) > - pte |= ARM_LPAE_PTE_MEMATTR_OIWB; > - else > + } else if (prot & IOMMU_CACHE) { > + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB) > + pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB; > + else > + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; > + } else { > pte |= ARM_LPAE_PTE_MEMATTR_NC; > + } > } else { > if (prot & IOMMU_MMIO) > pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV > @@ -932,7 +948,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) > if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | > IO_PGTABLE_QUIRK_ARM_TTBR1 | > IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | > - IO_PGTABLE_QUIRK_ARM_HD)) > + IO_PGTABLE_QUIRK_ARM_HD | > + IO_PGTABLE_QUIRK_ARM_S2FWB)) > return NULL; > > data = arm_lpae_alloc_pgtable(cfg); > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h > index f9a81761bfceda..aff9b020b6dcc7 100644 > --- a/include/linux/io-pgtable.h > +++ b/include/linux/io-pgtable.h > @@ -87,6 +87,7 @@ struct io_pgtable_cfg { > * attributes set in the TCR for a non-coherent page-table walker. > * > * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. > + * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits > */ > #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) > #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) > @@ -95,6 +96,7 @@ struct io_pgtable_cfg { > #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) > #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) > #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) > + #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) > unsigned long quirks; > unsigned long pgsize_bitmap; > unsigned int ias; > -- > 2.46.0 >