On Fri, Jun 28, 2024 at 9:18 PM Rob Clark <robdclark@xxxxxxxxx> wrote: > > On Fri, Jun 28, 2024 at 8:20 AM Pranjal Shrivastava <praan@xxxxxxxxxx> wrote: > > > > On Fri, Jun 28, 2024 at 3:46 AM Rob Clark <robdclark@xxxxxxxxx> wrote: > > > > > > From: Rob Clark <robdclark@xxxxxxxxxxxx> > > > > > > Parse out the bitfields for easier-to-read fault messages. > > > > > > Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx> > > > --- > > > .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 62 ++++++---------- > > > drivers/iommu/arm/arm-smmu/arm-smmu.c | 70 +++++++++++++++---- > > > drivers/iommu/arm/arm-smmu/arm-smmu.h | 67 ++++++++++++------ > > > 3 files changed, 119 insertions(+), 80 deletions(-) > > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c > > > index 552199cbd9e2..da2e605014a5 100644 > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c > > > @@ -141,7 +141,7 @@ static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_doma > > > writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); > > > > > > fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); > > > - if ((fsr & ARM_SMMU_FSR_FAULT) && (fsr & ARM_SMMU_FSR_SS)) { > > > + if ((fsr & ARM_SMMU_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) { > > > u32 sctlr_orig, sctlr; > > > > > > /* > > > @@ -306,7 +306,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, > > > * TBU halt takes care of resuming any stalled transcation. > > > * Kept it here for completeness sake. > > > */ > > > - if (fsr & ARM_SMMU_FSR_SS) > > > + if (fsr & ARM_SMMU_CB_FSR_SS) > > > arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, > > > ARM_SMMU_RESUME_TERMINATE); > > > } > > > @@ -324,7 +324,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, > > > /* Clear pending interrupts */ > > > arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); > > > > > > - if (fsr & ARM_SMMU_FSR_SS) > > > + if (fsr & ARM_SMMU_CB_FSR_SS) > > > arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, > > > ARM_SMMU_RESUME_TERMINATE); > > > } > > > @@ -383,68 +383,46 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) > > > struct arm_smmu_domain *smmu_domain = dev; > > > struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; > > > struct arm_smmu_device *smmu = smmu_domain->smmu; > > > - u32 fsr, fsynr, cbfrsynra, resume = 0; > > > + struct arm_smmu_context_fault_info cfi; > > > + u32 resume = 0; > > > int idx = smmu_domain->cfg.cbndx; > > > phys_addr_t phys_soft; > > > - unsigned long iova; > > > int ret, tmp; > > > > > > static DEFINE_RATELIMIT_STATE(_rs, > > > DEFAULT_RATELIMIT_INTERVAL, > > > DEFAULT_RATELIMIT_BURST); > > > > > > - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); > > > - if (!(fsr & ARM_SMMU_FSR_FAULT)) > > > - return IRQ_NONE; > > > + arm_smmu_read_context_fault_info(smmu, idx, &cfi); > > > > > > - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > > > - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); > > > - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); > > > + if (!(cfi.fsr & ARM_SMMU_FSR_FAULT)) > > > + return IRQ_NONE; > > > > > > if (list_empty(&tbu_list)) { > > > - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, > > > - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); > > > + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, > > > + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); > > > > > > if (ret == -ENOSYS) > > > - dev_err_ratelimited(smmu->dev, > > > - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", > > > - fsr, iova, fsynr, cbfrsynra, idx); > > > + arm_smmu_print_context_fault_info(smmu, idx, &cfi); > > > > > > - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); > > > + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); > > > return IRQ_HANDLED; > > > } > > > > > > - phys_soft = ops->iova_to_phys(ops, iova); > > > + phys_soft = ops->iova_to_phys(ops, cfi.iova); > > > > > > - tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, > > > - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); > > > + tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, > > > + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); > > > if (!tmp || tmp == -EBUSY) { > > > - dev_dbg(smmu->dev, > > > - "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", > > > - iova, fsr, fsynr, idx); > > > + arm_smmu_print_context_fault_info(smmu, idx, &cfi); > > > > Not sure if we'd want to remove the original log here. > > I don't think adding the log "Context fault handled by client" in the > > `arm_smmu_print_context_fault_info` function would be correct. > > Since the same message would be logged even when the context fault > > is unhandled whereas it seems we log this only when report_iommu_fault > > returns NULL or -EBUSY. I like the approach to break out a print helper, > > but let's not log "Context fault handled by client" as a part of it. > > Ahh, yeah, I copied the wrong msg > > But IMO nothing should be printed for faults that are handled, similar > to cpu page faults, so I'll just remove the print for the handled > case. If you're removing the "Context fault handled by client" message then I guess, it's okay to log the rest at both places. I too believe that we don't need to log anything for the handled case, but I'm unsure as to why the original print was there in the first place. Maybe Will/Robin could provide some context about the original message? > > > > dev_dbg(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); > > > ret = IRQ_HANDLED; > > > resume = ARM_SMMU_RESUME_TERMINATE; > > > } else { > > > - phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, iova, fsr); > > > + phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); > > > > > > if (__ratelimit(&_rs)) { > > > - dev_err(smmu->dev, > > > - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", > > > - fsr, iova, fsynr, cbfrsynra, idx); > > > - dev_err(smmu->dev, > > > - "FSR = %08x [%s%s%s%s%s%s%s%s%s], SID=0x%x\n", > > > - fsr, > > > - (fsr & 0x02) ? "TF " : "", > > > - (fsr & 0x04) ? "AFF " : "", > > > - (fsr & 0x08) ? "PF " : "", > > > - (fsr & 0x10) ? "EF " : "", > > > - (fsr & 0x20) ? "TLBMCF " : "", > > > - (fsr & 0x40) ? "TLBLKF " : "", > > > - (fsr & 0x80) ? "MHF " : "", > > > - (fsr & 0x40000000) ? "SS " : "", > > > - (fsr & 0x80000000) ? "MULTI " : "", > > > - cbfrsynra); > > > + arm_smmu_print_context_fault_info(smmu, idx, &cfi); > > > > > > dev_err(smmu->dev, > > > "soft iova-to-phys=%pa\n", &phys_soft); > > > @@ -478,10 +456,10 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) > > > */ > > > if (tmp != -EBUSY) { > > > /* Clear the faulting FSR */ > > > - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); > > > + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); > > > > > > /* Retry or terminate any stalled transactions */ > > > - if (fsr & ARM_SMMU_FSR_SS) > > > + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) > > > arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); > > > } > > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c > > > index 87c81f75cf84..246a39081879 100644 > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c > > > @@ -405,32 +405,72 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { > > > .tlb_add_page = arm_smmu_tlb_add_page_s2_v1, > > > }; > > > > > > + > > > +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, > > > + struct arm_smmu_context_fault_info *cfi) > > > +{ > > > + cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); > > > + cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); > > > + cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > > > + cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); > > > +} > > > + > > > +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, > > > + const struct arm_smmu_context_fault_info *cfi) > > > > Maybe add a "isHandled" bool in the parameter based on which we either print > > "Context fault handled by client" or "Unhandled context fault" before > > logging registers. > > > > > +{ > > > + dev_dbg(smmu->dev, > > > + "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", > > > > As mentioned above, let's print if the fault was handled or not based > > on a handled flag. > > > > > + cfi->iova, cfi->fsr, cfi->fsynr, idx); > > > + > > > + dev_err(smmu->dev, "FSR = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n", > > > > I liked the " | " separators too (sorry!) but I'm okay unless someone > > else feels the same. > > > > > + cfi->fsr, > > > + (cfi->fsr & ARM_SMMU_CB_FSR_MULTI) ? "MULTI " : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_SS) ? "SS " : "", > > > > Nit: Let's be consistent with the spacing, either add a space before or after. > > The spacing actually is intentional. It'll make sense if you think > about the output string (ie. it avoids extra or missing spaces) Ah, yes, it avoids the extra spaces, I missed that. > > BR, > -R > > > > > > + (u32)FIELD_GET(ARM_SMMU_CB_FSR_FORMAT, cfi->fsr), > > > + (cfi->fsr & ARM_SMMU_CB_FSR_UUT) ? " UUT" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_ASF) ? " ASF" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_TLBLKF) ? " TLBLKF" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_TLBMCF) ? " TLBMCF" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_EF) ? " EF" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_PF) ? " PF" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_AFF) ? " AFF" : "", > > > + (cfi->fsr & ARM_SMMU_CB_FSR_TF) ? " TF" : "", > > > + cfi->cbfrsynra); > > > + > > > + dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n", > > > + cfi->fsynr, > > > + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr), > > > + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "", > > > + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "", > > > + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "", > > > + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "", > > > + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "", > > > + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "", > > > + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr)); > > > +} > > > + > > > static irqreturn_t arm_smmu_context_fault(int irq, void *dev) > > > { > > > - u32 fsr, fsynr, cbfrsynra; > > > - unsigned long iova; > > > + struct arm_smmu_context_fault_info cfi; > > > > I like this, it looks clean! > > > > > struct arm_smmu_domain *smmu_domain = dev; > > > struct arm_smmu_device *smmu = smmu_domain->smmu; > > > + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, > > > + DEFAULT_RATELIMIT_BURST); > > > int idx = smmu_domain->cfg.cbndx; > > > int ret; > > > > > > - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); > > > - if (!(fsr & ARM_SMMU_FSR_FAULT)) > > > - return IRQ_NONE; > > > + arm_smmu_read_context_fault_info(smmu, idx, &cfi); > > > > > > - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); > > > - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); > > > - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); > > > + if (!(cfi.fsr & ARM_SMMU_FSR_FAULT)) > > > + return IRQ_NONE; > > > > > > - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, > > > - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); > > > + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, > > > + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); > > > > > > - if (ret == -ENOSYS) > > > - dev_err_ratelimited(smmu->dev, > > > - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", > > > - fsr, iova, fsynr, cbfrsynra, idx); > > > > Same here, I wouldn't want the "Unhandled context fault" log to be > > replaced by "Context fault handled by client". > > > > > + if (ret == -ENOSYS && __ratelimit(&rs)) > > > + arm_smmu_print_context_fault_info(smmu, idx, &cfi); > > > > > > - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); > > > + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); > > > return IRQ_HANDLED; > > > } > > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h > > > index 4765c6945c34..faf475366d4d 100644 > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h > > > @@ -196,34 +196,42 @@ enum arm_smmu_cbar_type { > > > #define ARM_SMMU_CB_PAR_F BIT(0) > > > > > > #define ARM_SMMU_CB_FSR 0x58 > > > -#define ARM_SMMU_FSR_MULTI BIT(31) > > > -#define ARM_SMMU_FSR_SS BIT(30) > > > -#define ARM_SMMU_FSR_UUT BIT(8) > > > -#define ARM_SMMU_FSR_ASF BIT(7) > > > -#define ARM_SMMU_FSR_TLBLKF BIT(6) > > > -#define ARM_SMMU_FSR_TLBMCF BIT(5) > > > -#define ARM_SMMU_FSR_EF BIT(4) > > > -#define ARM_SMMU_FSR_PF BIT(3) > > > -#define ARM_SMMU_FSR_AFF BIT(2) > > > -#define ARM_SMMU_FSR_TF BIT(1) > > > - > > > -#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ > > > - ARM_SMMU_FSR_ASF | \ > > > - ARM_SMMU_FSR_TLBMCF | \ > > > - ARM_SMMU_FSR_TLBLKF) > > > - > > > -#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ > > > - ARM_SMMU_FSR_SS | \ > > > - ARM_SMMU_FSR_UUT | \ > > > - ARM_SMMU_FSR_EF | \ > > > - ARM_SMMU_FSR_PF | \ > > > - ARM_SMMU_FSR_TF | \ > > > +#define ARM_SMMU_CB_FSR_MULTI BIT(31) > > > +#define ARM_SMMU_CB_FSR_SS BIT(30) > > > +#define ARM_SMMU_CB_FSR_FORMAT GENMASK(10, 9) > > > +#define ARM_SMMU_CB_FSR_UUT BIT(8) > > > +#define ARM_SMMU_CB_FSR_ASF BIT(7) > > > +#define ARM_SMMU_CB_FSR_TLBLKF BIT(6) > > > +#define ARM_SMMU_CB_FSR_TLBMCF BIT(5) > > > +#define ARM_SMMU_CB_FSR_EF BIT(4) > > > +#define ARM_SMMU_CB_FSR_PF BIT(3) > > > +#define ARM_SMMU_CB_FSR_AFF BIT(2) > > > +#define ARM_SMMU_CB_FSR_TF BIT(1) > > > + > > > +#define ARM_SMMU_FSR_IGN (ARM_SMMU_CB_FSR_AFF | \ > > > + ARM_SMMU_CB_FSR_ASF | \ > > > + ARM_SMMU_CB_FSR_TLBMCF | \ > > > + ARM_SMMU_CB_FSR_TLBLKF) > > > + > > > +#define ARM_SMMU_FSR_FAULT (ARM_SMMU_CB_FSR_MULTI | \ > > > + ARM_SMMU_CB_FSR_SS | \ > > > + ARM_SMMU_CB_FSR_UUT | \ > > > + ARM_SMMU_CB_FSR_EF | \ > > > + ARM_SMMU_CB_FSR_PF | \ > > > + ARM_SMMU_CB_FSR_TF | \ > > > ARM_SMMU_FSR_IGN) > > > > > > #define ARM_SMMU_CB_FAR 0x60 > > > > > > #define ARM_SMMU_CB_FSYNR0 0x68 > > > -#define ARM_SMMU_FSYNR0_WNR BIT(4) > > > +#define ARM_SMMU_CB_FSYNR0_PLVL GENMASK(1, 0) > > > +#define ARM_SMMU_CB_FSYNR0_WNR BIT(4) > > > +#define ARM_SMMU_CB_FSYNR0_PNU BIT(5) > > > +#define ARM_SMMU_CB_FSYNR0_IND BIT(6) > > > +#define ARM_SMMU_CB_FSYNR0_NSATTR BIT(8) > > > +#define ARM_SMMU_CB_FSYNR0_PTWF BIT(10) > > > +#define ARM_SMMU_CB_FSYNR0_AFR BIT(11) > > > +#define ARM_SMMU_CB_FSYNR0_S1CBNDX GENMASK(23, 16) > > > > > > #define ARM_SMMU_CB_FSYNR1 0x6c > > > > > > @@ -533,4 +541,17 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu); > > > void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx); > > > int arm_mmu500_reset(struct arm_smmu_device *smmu); > > > > > > +struct arm_smmu_context_fault_info { > > > + unsigned long iova; > > > + u32 fsr; > > > + u32 fsynr; > > > + u32 cbfrsynra; > > > +}; > > > + > > > +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, > > > + struct arm_smmu_context_fault_info *cfi); > > > + > > > +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, > > > + const struct arm_smmu_context_fault_info *cfi); > > > + > > > #endif /* _ARM_SMMU_H */ > > > -- > > > 2.45.2 > > > > Apart from this, the rest looks good to me. Thanks, Pranjal