On Fri, May 10, 2024 at 5:52 AM Bibek Kumar Patro <quic_bibekkum@xxxxxxxxxxx> wrote: > > > > On 5/1/2024 12:30 AM, Rob Clark wrote: > > On Tue, Jan 23, 2024 at 7:00 AM Bibek Kumar Patro > > <quic_bibekkum@xxxxxxxxxxx> wrote: > >> > >> Currently in Qualcomm SoCs the default prefetch is set to 1 which allows > >> the TLB to fetch just the next page table. MMU-500 features ACTLR > >> register which is implementation defined and is used for Qualcomm SoCs > >> to have a custom prefetch setting enabling TLB to prefetch the next set > >> of page tables accordingly allowing for faster translations. > >> > >> ACTLR value is unique for each SMR (Stream matching register) and stored > >> in a pre-populated table. This value is set to the register during > >> context bank initialisation. > >> > >> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@xxxxxxxxxxx> > >> --- > >> drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 61 ++++++++++++++++++++++ > >> drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 16 +++++- > >> drivers/iommu/arm/arm-smmu/arm-smmu.c | 5 +- > >> drivers/iommu/arm/arm-smmu/arm-smmu.h | 5 ++ > >> 4 files changed, 84 insertions(+), 3 deletions(-) > >> > >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > >> index 333daeb18c1c..6004c6d9a7b2 100644 > >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > >> @@ -215,10 +215,42 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) > >> return true; > >> } > >> > >> +static void qcom_smmu_set_actlr(struct device *dev, struct arm_smmu_device *smmu, int cbndx, > >> + const struct actlr_config *actlrcfg, const size_t num_actlrcfg) > >> +{ > >> + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); > >> + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > >> + struct arm_smmu_smr *smr; > >> + u16 mask; > >> + int idx; > >> + u16 id; > >> + int i; > >> + int j; > >> + > >> + for (i = 0; i < num_actlrcfg; i++) { > >> + id = actlrcfg[i].sid; > >> + mask = actlrcfg[i].mask; > >> + > >> + for_each_cfg_sme(cfg, fwspec, j, idx) { > >> + smr = &smmu->smrs[idx]; > >> + if (smr_is_subset(smr, id, mask)) { > >> + arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, > >> + actlrcfg[i].actlr); > > > > So, this makes ACTLR look like kind of a FIFO. But I'm looking at > > downstream kgsl's PRR thing (which we'll need to implement vulkan > > sparse residency), and it appears to be wanting to set BIT(5) in ACTLR > > to enable PRR. > > > > val = KGSL_IOMMU_GET_CTX_REG(ctx, KGSL_IOMMU_CTX_ACTLR); > > val |= FIELD_PREP(KGSL_IOMMU_ACTLR_PRR_ENABLE, 1); > > KGSL_IOMMU_SET_CTX_REG(ctx, KGSL_IOMMU_CTX_ACTLR, val); > > > > Any idea how this works? And does it need to be done before or after > > the ACTLR programming done in this patch? > > > > BR, > > -R > > > > Hi Rob, > > Can you please help provide some more clarification on the FIFO part? By > FIFO are you referring to the storing of ACTLR data in the table? Yeah, I mean it is writing the same ACTLR register multiple times to program the table. I'm wondering if that means we need to program the table in a particular order compared to setting the PRR bit? Like do we need to program PRR bit first, or last? I'm planning on adding an adreno_smmu_priv interface so that drm/msm can call into arm-smmu-qcom to setup the PRR bit and the related PRR_CFG_LADDR/PRR_CFG_UADDR registers. And I'm just wondering if there is an ordering constraint wrt. when qcom_smmu_set_actlr() is called? BR, -R > Thanks for pointing to the downstream implementation of kgsl driver for > the PRR bit. Since kgsl driver is already handling this PRR bit's > setting, this makes setting the PRR BIT(5) by SMMU driver redundant. > Thanks for bringing up this point. > I will send v10 patch series removing this BIT(5) setting from the ACTLR > table. > > Thanks & regards, > Bibek > > >> + break; > >> + } > >> + } > >> + } > >> +} > >> + > >> static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, > >> struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) > >> { > >> + struct arm_smmu_device *smmu = smmu_domain->smmu; > >> + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > >> + const struct actlr_variant *actlrvar; > >> + int cbndx = smmu_domain->cfg.cbndx; > >> struct adreno_smmu_priv *priv; > >> + int i; > >> > >> smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; > >> > >> @@ -248,6 +280,18 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, > >> priv->set_stall = qcom_adreno_smmu_set_stall; > >> priv->resume_translation = qcom_adreno_smmu_resume_translation; > >> > >> + actlrvar = qsmmu->data->actlrvar; > >> + if (!actlrvar) > >> + return 0; > >> + > >> + for (i = 0; i < qsmmu->data->num_smmu ; i++) { > >> + if (actlrvar[i].io_start == smmu->ioaddr) { > >> + qcom_smmu_set_actlr(dev, smmu, cbndx, actlrvar[i].actlrcfg, > >> + actlrvar[i].num_actlrcfg); > >> + break; > >> + } > >> + } > >> + > >> return 0; > >> } > >> > >> @@ -274,7 +318,24 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { > >> static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, > >> struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) > >> { > >> + struct arm_smmu_device *smmu = smmu_domain->smmu; > >> + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > >> + const struct actlr_variant *actlrvar; > >> + int cbndx = smmu_domain->cfg.cbndx; > >> + int i; > >> + > >> smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; > >> + actlrvar = qsmmu->data->actlrvar; > >> + if (!actlrvar) > >> + return 0; > >> + > >> + for (i = 0; i < qsmmu->data->num_smmu ; i++) { > >> + if (actlrvar[i].io_start == smmu->ioaddr) { > >> + qcom_smmu_set_actlr(dev, smmu, cbndx, actlrvar[i].actlrcfg, > >> + actlrvar[i].num_actlrcfg); > >> + break; > >> + } > >> + } > >> > >> return 0; > >> } > >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h > >> index f3b91963e234..3f651242de7c 100644 > >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h > >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h > >> @@ -1,6 +1,6 @@ > >> /* SPDX-License-Identifier: GPL-2.0-only */ > >> /* > >> - * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved. > >> + * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved. > >> */ > >> > >> #ifndef _ARM_SMMU_QCOM_H > >> @@ -24,8 +24,22 @@ struct qcom_smmu_config { > >> const u32 *reg_offset; > >> }; > >> > >> +struct actlr_config { > >> + u16 sid; > >> + u16 mask; > >> + u32 actlr; > >> +}; > >> + > >> +struct actlr_variant { > >> + const resource_size_t io_start; > >> + const struct actlr_config * const actlrcfg; > >> + const size_t num_actlrcfg; > >> +}; > >> + > >> struct qcom_smmu_match_data { > >> + const struct actlr_variant * const actlrvar; > >> const struct qcom_smmu_config *cfg; > >> + const size_t num_smmu; > >> const struct arm_smmu_impl *impl; > >> const struct arm_smmu_impl *adreno_impl; > >> }; > >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c > >> index d6d1a2a55cc0..0c7f700b27dd 100644 > >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c > >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c > >> @@ -990,9 +990,10 @@ static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask) > >> * expect simply identical entries for this case, but there's > >> * no harm in accommodating the generalisation. > >> */ > >> - if ((mask & smrs[i].mask) == mask && > >> - !((id ^ smrs[i].id) & ~smrs[i].mask)) > >> + > >> + if (smr_is_subset(&smrs[i], id, mask)) > >> return i; > >> + > >> /* > >> * If the new entry has any other overlap with an existing one, > >> * though, then there always exists at least one stream ID > >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h > >> index 703fd5817ec1..2e4f65412c6b 100644 > >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h > >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h > >> @@ -501,6 +501,11 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page, > >> writeq_relaxed(val, arm_smmu_page(smmu, page) + offset); > >> } > >> > >> +static inline bool smr_is_subset(struct arm_smmu_smr *smrs, u16 id, u16 mask) > >> +{ > >> + return (mask & smrs->mask) == mask && !((id ^ smrs->id) & ~smrs->mask); > >> +} > >> + > >> #define ARM_SMMU_GR0 0 > >> #define ARM_SMMU_GR1 1 > >> #define ARM_SMMU_CB(s, n) ((s)->numpage + (n)) > >> -- > >> 2.17.1 > >> > >>