On 10/22/2023 12:05 AM, Bjorn Andersson wrote: > On Wed, Oct 18, 2023 at 07:19:20PM -0700, Georgi Djakov wrote: >> Add driver for the Qualcomm implementation of the ARM MMU-500 TBU. >> The driver will enable the resources needed by the TBU and will >> configure the registers for some debug features like checking if >> there are any pending transactions, capturing transactions and >> running ATOS (Address Translation Operations). ATOS/eCATS are used >> to manually trigger an address translation of IOVA to physical >> address by the SMMU hardware. > > I still don't think this commit message clearly enough describe the > problem you're trying to solve. > > Not until I had read the Kconfig help text did I pay attention to the > significance of the words "some debug features" in the middle of the > paragraph. > > > Please describe your changes in accordance with [1], i.e. clearly > describe the problem you're trying to solve, then discuss the technical > solution in the patch. Thanks Bjorn, I'll try to improve it! > [1] https://docs.kernel.org/process/submitting-patches.html#describe-your-changes > > [..] >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > [..] >> +#ifdef CONFIG_ARM_SMMU_QCOM_TBU >> + >> +struct qsmmuv500_tbu { >> + struct device *dev; >> + struct arm_smmu_device *smmu; >> + u32 sid_range[2]; >> + struct list_head list; >> + struct clk *clk; >> + struct icc_path *path; >> + void __iomem *base; >> + spinlock_t halt_lock; /* protects halt count */ > > But in particular it makes sure that multiple halt or resume can't > execute concurrently. Exactly. Will mention it. >> + int halt_count; >> +}; >> + >> +static DEFINE_SPINLOCK(ecats_lock); >> + >> +static struct qsmmuv500_tbu *qsmmuv500_find_tbu(struct qcom_smmu *qsmmu, u32 sid) >> +{ >> + struct qsmmuv500_tbu *tbu = NULL; >> + u32 start, end; >> + >> + mutex_lock(&qsmmu->tbu_list_lock); >> + >> + list_for_each_entry(tbu, &qsmmu->tbu_list, list) { >> + start = tbu->sid_range[0]; >> + end = start + tbu->sid_range[1]; >> + >> + if (start <= sid && sid < end) >> + break; >> + } >> + >> + mutex_unlock(&qsmmu->tbu_list_lock); >> + >> + return tbu; >> +} >> + >> +static int qsmmuv500_tbu_halt(struct qsmmuv500_tbu *tbu, struct arm_smmu_domain *smmu_domain) >> +{ >> + struct arm_smmu_device *smmu = smmu_domain->smmu; >> + int ret = 0, idx = smmu_domain->cfg.cbndx; >> + unsigned long flags; >> + u32 val, fsr, status; >> + >> + spin_lock_irqsave(&tbu->halt_lock, flags); > > Does this really need to run with interrupts disabled? This is being executed in threaded irq context. >> + if (tbu->halt_count) { >> + tbu->halt_count++; >> + goto out; >> + } >> + > [..] >> +static phys_addr_t qsmmuv500_iova_to_phys(struct arm_smmu_domain *smmu_domain, >> + dma_addr_t iova, u32 sid) >> +{ > [..] >> + /* Only one concurrent atos operation */ >> + spin_lock_irqsave(&ecats_lock, flags); > > Does this require interrupts to be disabled? This also runs in the irq handler during context fault. >> + >> + /* >> + * After a failed translation, the next successful translation will >> + * incorrectly be reported as a failure. > > "So if the ECATS translation fails, attempt the lookup more time." > >> + */ >> + do { >> + phys = qsmmuv500_tbu_trigger_atos(smmu_domain, tbu, iova, sid); >> + >> + fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); >> + if (fsr & ARM_SMMU_FSR_FAULT) { >> + /* Clear pending interrupts */ >> + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); >> + /* >> + * Barrier required to ensure that the FSR is cleared >> + * before resuming SMMU operation. >> + */ > > Better be clear on what this actually does, for future readers' sake: > > /* Ensure that FSR and RESUME operations aren't reordered. */ > > But is this really necessary, the two writes are for the same device, > can they still be reordered? Right, these are to the same endpoint. It can be dropped. >> + wmb(); >> + >> + if (fsr & ARM_SMMU_FSR_SS) >> + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, >> + ARM_SMMU_RESUME_TERMINATE); >> + } >> + } while (!phys && needs_redo++ < 2); > > "needs_redo" sounds like a boolean to me. I think "attempt" would be a > better fit here. > Ok. >> + >> + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig); >> + spin_unlock_irqrestore(&ecats_lock, flags); >> + qsmmuv500_tbu_resume(tbu); >> + >> + /* Read to complete prior write transcations */ >> + readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG); >> + >> + /* Wait for read to complete */ > > That's not what rmb() does. You don't need to do anything here, > readl_relaxed() returns when the read is done. Ack. >> + rmb(); >> + >> +disable_clk: >> + clk_disable_unprepare(tbu->clk); >> +disable_icc: >> + icc_set_bw(tbu->path, 0, 0); >> + >> + return phys; >> +} >> +#endif >> + >> static void qcom_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, >> int sync, int status) >> { >> @@ -588,3 +895,80 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) >> >> return smmu; >> } >> + >> +#ifdef CONFIG_ARM_SMMU_QCOM_TBU >> + >> +static const struct of_device_id qsmmuv500_tbu_of_match[] = { >> + { .compatible = "qcom,qsmmuv500-tbu" }, >> + { } >> +}; > > Place this below the remove function, as most other drivers do. Ack. >> + >> +static int qsmmuv500_tbu_probe(struct platform_device *pdev) >> +{ > [..] >> + mutex_lock(&qsmmu->tbu_list_lock); >> + list_add_tail(&tbu->list, &qsmmu->tbu_list); > > "tbu" is devres allocated, but you don't pull it off the list (or > synchronize) during remove. Right, but I'll just make this a builtin. >> + mutex_unlock(&qsmmu->tbu_list_lock); >> + >> + dev_set_drvdata(dev, tbu); >> + >> + return 0; >> +} >> + >> +static void qsmmuv500_tbu_remove(struct platform_device *pdev) >> +{ >> + struct qsmmuv500_tbu *tbu = dev_get_drvdata(&pdev->dev); >> + >> + clk_disable_unprepare(tbu->clk); > > This isn't balanced. > >> + clk_put(tbu->clk); >> + icc_put(tbu->path); >> +} >> + >> +static struct platform_driver qsmmuv500_tbu_driver = { >> + .driver = { >> + .name = "qsmmuv500-tbu", >> + .of_match_table = of_match_ptr(qsmmuv500_tbu_of_match), > > Won't of_match_ptr() result in a build warning if built without > CONFIG_OF? Will drop. >> + }, >> + .probe = qsmmuv500_tbu_probe, >> + .remove_new = qsmmuv500_tbu_remove, >> +}; >> +module_platform_driver(qsmmuv500_tbu_driver); > > This file acts as a library for the arm-smmu driver today, adding a > platform_driver here makes it look like this is a separate driver. > > Which makes me wonder, why is this a separate driver? Why not just > loop over the subnodes and build the tbu_list in qcom_smmu_impl_init()? > I am using the platform framework in order to get a more compact code for this optional driver, but it adds some overhead.. I'll start with moving all the TBU stuff into a separate file and will try meanwhile your suggestion.. Thanks, Georgi