On Mon, Apr 29, 2024 at 09:43:48PM -0700, Nicolin Chen wrote: > static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) > { > + if (smmu->tegra241_cmdqv) > + return tegra241_cmdqv_get_cmdq(smmu); Since it is compile time optional it would make some sense to optimize this (in all the places) too: if (arm_smmu_has_smmu_tegra241_cmdqv(smmu)) [..] static inline bool arm_smmu_has_smmu_tegra241_cmdqv(struct arm_smmu_device *smmu) { return IS_ENABLED(CONFIG_TEGRA241_CMDQV) && smmu->tegra241_cmdqv; } > @@ -3105,12 +3108,10 @@ static struct iommu_ops arm_smmu_ops = { > }; > > /* Probing and initialisation functions */ > -static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, > - struct arm_smmu_queue *q, > - void __iomem *page, > - unsigned long prod_off, > - unsigned long cons_off, > - size_t dwords, const char *name) > +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, > + struct arm_smmu_queue *q, void __iomem *page, > + unsigned long prod_off, unsigned long cons_off, > + size_t dwords, const char *name) > { > size_t qsz; This hunk and the .h file part should be moved to the prior patch that is de-exporting things. > +/* MMIO helpers */ > +#define cmdqv_readl(reg) \ > + readl(cmdqv->base + TEGRA241_CMDQV_##reg) > +#define cmdqv_readl_relaxed(reg) \ > + readl_relaxed(cmdqv->base + TEGRA241_CMDQV_##reg) > +#define cmdqv_writel(val, reg) \ > + writel((val), cmdqv->base + TEGRA241_CMDQV_##reg) > +#define cmdqv_writel_relaxed(val, reg) \ > + writel_relaxed((val), cmdqv->base + TEGRA241_CMDQV_##reg) Please don't hide access to a stack variable in a macro, and I'm not keen on the ##reg scheme either - it makes it much harder to search for things. Really this all seems like alot of overkill to make a little bit of shorthand. It is not so wordy just to type it out: readl(vintf->base + TEGRA241_VINTF_CONFIG) > +/* Logging helpers */ > +#define cmdqv_warn(fmt, ...) \ > + dev_warn(cmdqv->dev, "CMDQV: " fmt, ##__VA_ARGS__) > +#define cmdqv_err(fmt, ...) \ > + dev_err(cmdqv->dev, "CMDQV: " fmt, ##__VA_ARGS__) > +#define cmdqv_info(fmt, ...) \ > + dev_info(cmdqv->dev, "CMDQV: " fmt, ##__VA_ARGS__) > +#define cmdqv_dbg(fmt, ...) \ > + dev_dbg(cmdqv->dev, "CMDQV: " fmt, ##__VA_ARGS__) Really not sure these are necessary, same remark about the stack variable. Also cmdqv->dev is the wrong thing to print, this is part of the smmu driver, it should print cmdqv->smmu->dev for consistency > +#define vintf_warn(fmt, ...) \ > + dev_warn(vintf->cmdqv->dev, "VINTF%u: " fmt, vintf->idx, ##__VA_ARGS__) > +#define vintf_err(fmt, ...) \ > + dev_err(vintf->cmdqv->dev, "VINTF%u: " fmt, vintf->idx, ##__VA_ARGS__) > +#define vintf_info(fmt, ...) \ > + dev_info(vintf->cmdqv->dev, "VINTF%u: " fmt, vintf->idx, ##__VA_ARGS__) > +#define vintf_dbg(fmt, ...) \ > + dev_dbg(vintf->cmdqv->dev, "VINTF%u: " fmt, vintf->idx, ##__VA_ARGS__) > + > +#define vcmdq_warn(fmt, ...) \ > + ({ \ > + struct tegra241_vintf *vintf = vcmdq->vintf; \ > + if (vintf) \ > + vintf_warn("VCMDQ%u/LVCMDQ%u: " fmt, \ > + vcmdq->idx, vcmdq->lidx, \ > + ##__VA_ARGS__); \ > + else \ > + dev_warn(vcmdq->cmdqv->dev, "VCMDQ%u: " fmt, \ > + vcmdq->idx, ##__VA_ARGS__); \ > + }) > +#define vcmdq_err(fmt, ...) \ > + ({ \ > + struct tegra241_vintf *vintf = vcmdq->vintf; \ > + if (vintf) \ > + vintf_err("VCMDQ%u/LVCMDQ%u: " fmt, \ > + vcmdq->idx, vcmdq->lidx, \ > + ##__VA_ARGS__); \ > + else \ > + dev_err(vcmdq->cmdqv->dev, "VCMDQ%u: " fmt, \ > + vcmdq->idx, ##__VA_ARGS__); \ > + }) > +#define vcmdq_info(fmt, ...) \ > + ({ \ > + struct tegra241_vintf *vintf = vcmdq->vintf; \ > + if (vintf) \ > + vintf_info("VCMDQ%u/LVCMDQ%u: " fmt, \ > + vcmdq->idx, vcmdq->lidx, \ > + ##__VA_ARGS__); \ > + else \ > + dev_info(vcmdq->cmdqv->dev, "VCMDQ%u: " fmt, \ > + vcmdq->idx, ##__VA_ARGS__); \ > + }) > +#define vcmdq_dbg(fmt, ...) \ > + ({ \ > + struct tegra241_vintf *vintf = vcmdq->vintf; \ > + if (vintf) \ > + vintf_dbg("VCMDQ%u/LVCMDQ%u: " fmt, \ > + vcmdq->idx, vcmdq->lidx, \ > + ##__VA_ARGS__); \ > + else \ > + dev_dbg(vcmdq->cmdqv->dev, "VCMDQ%u: " fmt, \ > + vcmdq->idx, ##__VA_ARGS__); \ > + }) Some of these are barely used, is it worth all these macros?? > + > +/* Configuring and polling helpers */ > +#define tegra241_cmdqv_write_config(_owner, _OWNER, _regval) \ > + ({ \ > + bool _en = (_regval) & _OWNER##_EN; \ > + u32 _status; \ > + int _ret; \ > + writel((_regval), _owner->base + TEGRA241_##_OWNER##_CONFIG); \ > + _ret = readl_poll_timeout( \ > + _owner->base + TEGRA241_##_OWNER##_STATUS, _status, \ > + _en ? (_regval) & _OWNER##_ENABLED : \ > + !((_regval) & _OWNER##_ENABLED), \ > + 1, ARM_SMMU_POLL_TIMEOUT_US); \ > + if (_ret) \ > + _owner##_err("failed to %sable, STATUS = 0x%08X\n", \ > + _en ? "en" : "dis", _status); \ > + atomic_set(&_owner->status, _status); \ > + _ret; \ > + }) I feel like this could be an actual inline function without the macro wrapper with a little fiddling. > + > +#define cmdqv_write_config(_regval) \ > + tegra241_cmdqv_write_config(cmdqv, CMDQV, _regval) > +#define vintf_write_config(_regval) \ > + tegra241_cmdqv_write_config(vintf, VINTF, _regval) > +#define vcmdq_write_config(_regval) \ > + tegra241_cmdqv_write_config(vcmdq, VCMDQ, _regval) More hidden access to stack values > +/** > + * struct tegra241_cmdqv - CMDQ-V for SMMUv3 > + * @smmu: SMMUv3 pointer > + * @dev: Device pointer This should probably be clarified as the device pointer to the ACPI companion device > +static void tegra241_cmdqv_handle_vintf0_error(struct tegra241_cmdqv *cmdqv) > +{ > + struct tegra241_vintf *vintf = cmdqv->vintfs[0]; > + int i; > + > + /* Cache status to bypass VCMDQs until error is recovered */ > + atomic_set(&vintf->status, vintf_readl(STATUS)); > + > + for (i = 0; i < 4; i++) { > + u32 lvcmdq_err_map = vintf_readl_relaxed(CMDQ_ERR_MAP(i)); > + > + while (lvcmdq_err_map) { > + int lidx = ffs(lvcmdq_err_map) - 1; > + struct tegra241_vcmdq *vcmdq = vintf->vcmdqs[lidx]; > + u32 gerrorn, gerror; > + > + lvcmdq_err_map &= ~BIT(lidx); > + > + __arm_smmu_cmdq_skip_err(cmdqv->smmu, &vcmdq->cmdq.q); > + > + gerrorn = vcmdq_page0_readl_relaxed(GERRORN); > + gerror = vcmdq_page0_readl_relaxed(GERROR); > + > + vcmdq_page0_writel(gerror, GERRORN); > + } > + } > + > + /* Now error status should be clean, cache it again */ > + atomic_set(&vintf->status, vintf_readl(STATUS)); > +} > + > +static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) > +{ > + struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid; > + u32 vintf_errs[2]; > + u32 vcmdq_errs[4]; > + > + vintf_errs[0] = cmdqv_readl_relaxed(VINTF_ERR_MAP); > + vintf_errs[1] = cmdqv_readl_relaxed(VINTF_ERR_MAP + 0x4); > + > + vcmdq_errs[0] = cmdqv_readl_relaxed(VCMDQ_ERR_MAP(0)); > + vcmdq_errs[1] = cmdqv_readl_relaxed(VCMDQ_ERR_MAP(1)); > + vcmdq_errs[2] = cmdqv_readl_relaxed(VCMDQ_ERR_MAP(2)); > + vcmdq_errs[3] = cmdqv_readl_relaxed(VCMDQ_ERR_MAP(3)); > + > + cmdqv_warn("unexpected cmdqv error reported\n"); > + cmdqv_warn(" vintf_map: 0x%08X%08X\n", vintf_errs[1], vintf_errs[0]); > + cmdqv_warn(" vcmdq_map: 0x%08X%08X%08X%08X\n", > + vcmdq_errs[3], vcmdq_errs[2], vcmdq_errs[1], vcmdq_errs[0]); Put warnings in one print only, spreading them like this just increases the risk of tearing.. It doesn't need to be all pretty. > +struct arm_smmu_cmdq *tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) > +{ > + struct tegra241_cmdqv *cmdqv = smmu->tegra241_cmdqv; > + struct tegra241_vintf *vintf = cmdqv->vintfs[0]; > + struct tegra241_vcmdq *vcmdq; > + u16 lidx; > + > + if (bypass_vcmdq) READ_ONCE > + return &smmu->cmdq; > + > + /* Use SMMU CMDQ if vintfs[0] is uninitialized */ > + if (!FIELD_GET(VINTF_ENABLED, atomic_read(&vintf->status))) > + return &smmu->cmdq; > + > + /* Use SMMU CMDQ if vintfs[0] has error status */ > + if (FIELD_GET(VINTF_STATUS, atomic_read(&vintf->status))) > + return &smmu->cmdq; Why atomic_read? The unlocked interaction with tegra241_cmdqv_handle_vintf0_error() doesn't seem especially sane IMHO > +static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) > +{ > + u32 gerrorn, gerror; > + > + if (vcmdq_write_config(0)) { > + vcmdq_err("GERRORN=0x%X\n", vcmdq_page0_readl_relaxed(GERRORN)); > + vcmdq_err("GERROR=0x%X\n", vcmdq_page0_readl_relaxed(GERROR)); > + vcmdq_err("CONS=0x%X\n", vcmdq_page0_readl_relaxed(CONS)); Less prints, include a unique message about why this is being printed.. > + } > + vcmdq_page0_writel_relaxed(0, PROD); > + vcmdq_page0_writel_relaxed(0, CONS); > + vcmdq_page1_writeq_relaxed(0, BASE); > + vcmdq_page1_writeq_relaxed(0, CONS_INDX_BASE); > + > + gerrorn = vcmdq_page0_readl_relaxed(GERRORN); > + gerror = vcmdq_page0_readl_relaxed(GERROR); > + if (gerror != gerrorn) { > + vcmdq_info("Uncleared error detected, resetting\n"); > + vcmdq_page0_writel(gerror, GERRORN); > + } > + > + vcmdq_dbg("deinited\n"); > +} > + > +static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) > +{ > + int ret; > + > + /* Configure and enable the vcmdq */ > + tegra241_vcmdq_hw_deinit(vcmdq); > + > + vcmdq_page1_writeq_relaxed(vcmdq->cmdq.q.q_base, BASE); > + > + ret = vcmdq_write_config(VCMDQ_EN); > + if (ret) { > + vcmdq_err("GERRORN=0x%X\n", vcmdq_page0_readl_relaxed(GERRORN)); > + vcmdq_err("GERROR=0x%X\n", vcmdq_page0_readl_relaxed(GERROR)); > + vcmdq_err("CONS=0x%X\n", vcmdq_page0_readl_relaxed(CONS)); > + return ret; Same print? > +static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) > +{ > + struct tegra241_cmdqv *cmdqv = vcmdq->cmdqv; > + struct arm_smmu_queue *q = &vcmdq->cmdq.q; > + size_t nents = 1 << q->llq.max_n_shift; > + > + dmam_free_coherent(cmdqv->smmu->dev, (nents * CMDQ_ENT_DWORDS) << 3, > + q->base, q->base_dma); If we are calling dmam_free, do we really need devm at all? > +static struct tegra241_vcmdq * > +tegra241_vintf_lvcmdq_alloc(struct tegra241_vintf *vintf, u16 lidx) > +{ > + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; > + struct tegra241_vcmdq *vcmdq; > + int ret; > + > + vcmdq = devm_kzalloc(cmdqv->dev, sizeof(*vcmdq), GFP_KERNEL); > + if (!vcmdq) > + return ERR_PTR(-ENOMEM); > + > + ret = tegra241_vintf_lvcmdq_init(vintf, lidx, vcmdq); > + if (ret) > + goto free_vcmdq; > + > + /* Setup struct arm_smmu_cmdq data members */ > + ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq); > + if (ret) > + goto deinit_lvcmdq; > + > + ret = tegra241_vcmdq_hw_init(vcmdq); > + if (ret) > + goto free_queue; > + > + vcmdq_dbg("allocated\n"); > + return vcmdq; > +free_queue: > + tegra241_vcmdq_free_smmu_cmdq(vcmdq); > +deinit_lvcmdq: > + tegra241_vintf_lvcmdq_deinit(vcmdq); > +free_vcmdq: > + devm_kfree(cmdqv->dev, vcmdq); > + return ERR_PTR(ret); > +} > + > +static void tegra241_vintf_lvcmdq_free(struct tegra241_vcmdq *vcmdq) > +{ > + tegra241_vcmdq_hw_deinit(vcmdq); > + tegra241_vcmdq_free_smmu_cmdq(vcmdq); > + tegra241_vintf_lvcmdq_deinit(vcmdq); > + devm_kfree(vcmdq->cmdqv->dev, vcmdq); Ditto for devm_kfree. > +struct tegra241_cmdqv * > +tegra241_cmdqv_acpi_probe(struct arm_smmu_device *smmu, int id) id is a u32. It might be clearer to just pass in the struct acpi_iort_node *? > +{ > + struct tegra241_cmdqv *cmdqv; > + > + cmdqv = tegra241_cmdqv_find_resource(smmu, id); > + if (!cmdqv) > + return NULL; > + > + if (tegra241_cmdqv_probe(cmdqv)) { > + if (cmdqv->irq > 0) > + devm_free_irq(smmu->dev, cmdqv->irq, cmdqv); > + devm_iounmap(smmu->dev, cmdqv->base); > + devm_kfree(smmu->dev, cmdqv); > + return NULL; Oh. Please don't use devm at all in this code then, it is not attached to a probed driver with the proper scope, devm isn't going to work in sensible way. Jason