On Sun, Nov 24, 2024 at 02:52:46AM +0530, Akhil P Oommen wrote: > On Tue, Nov 19, 2024 at 06:56:40PM +0100, Neil Armstrong wrote: > > The Adreno GMU Management Unit (GMU) can also scale DDR Bandwidth along > > the Frequency and Power Domain level, but by default we leave the > > OPP core scale the interconnect ddr path. > > > > In order to calculate vote values used by the GPU Management > > Unit (GMU), we need to parse all the possible OPP Bandwidths and > > GMU expects a table of votes for each DDR frequency corners. Can we > please try to figure out a way to do that? Generally, we should ensure the > data that is send to GMU firmware match downstream exactly. Because, > when something breaks in firmware or worst, at SoC level, it will be pretty > hard to narrow down the issue. So, I prefer to be very conservative about > this. > > KGSL keeps the ddr frequency table in the devicetree. That helps to keep > the driver lean, but I am not sure if that is viable upstream. > > -Akhil. > > > create a vote value to be sent to the appropriate Bus Control > > Modules (BCMs) declared in the GPU info struct. > > > > The vote array will then be used to dynamically generate the GMU > > bw_table sent during the GMU power-up. > > > > Signed-off-by: Neil Armstrong <neil.armstrong@xxxxxxxxxx> > > --- > > drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 153 ++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 14 ++++ > > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + > > 3 files changed, 168 insertions(+) > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c > > index 14db7376c712d19446b38152e480bd5a1e0a5198..f6814d92a4edb29ba8a34a34aabb8b2324e9c6a4 100644 > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c > > @@ -9,6 +9,7 @@ > > #include <linux/pm_domain.h> > > #include <linux/pm_opp.h> > > #include <soc/qcom/cmd-db.h> > > +#include <soc/qcom/tcs.h> > > #include <drm/drm_gem.h> > > > > #include "a6xx_gpu.h" > > @@ -1287,6 +1288,109 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu) > > return 0; > > } > > > > +/** > > + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM) > > + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg > > + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg > > + * @vcd: virtual clock domain that this bcm belongs to > > + * @reserved: reserved field > > + */ > > +struct bcm_db { > > + __le32 unit; > > + __le16 width; > > + u8 vcd; > > + u8 reserved; > > +}; Shouldn't this be a packed struct? It is naturally aligned, but still! > > + > > +static u64 bcm_div(u64 num, u32 base) > > +{ > > + /* Ensure that small votes aren't lost. */ > > + if (num && num < base) > > + return 1; > > + > > + do_div(num, base); > > + > > + return num; > > +} > > + > > +static int a6xx_gmu_rpmh_bw_votes_init(const struct a6xx_info *info, > > + struct a6xx_gmu *gmu) > > +{ > > + const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 }; > > + unsigned int bcm_index, bw_index; > > + > > + /* Retrieve BCM data from cmd-db */ > > + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) { > > + size_t count; > > + > > + /* Skip unconfigured BCM */ > > + if (!info->bcm[bcm_index].name) > > + continue; > > + > > + bcm_data[bcm_index] = cmd_db_read_aux_data( > > + info->bcm[bcm_index].name, > > + &count); > > + if (IS_ERR(bcm_data[bcm_index])) > > + return PTR_ERR(bcm_data[bcm_index]); > > + > > + if (!count) > > + return -EINVAL; > > + } > > + > > + /* Generate BCM votes values for each bandwidth & BCM */ > > + for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) { > > + u32 *data = gmu->gpu_bw_votes[bw_index]; > > + u32 bw = gmu->gpu_bw_table[bw_index]; > > + > > + /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */ > > + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) { > > + bool commit = false; > > + u64 peak, vote; > > + u16 width; > > + u32 unit; > > + > > + /* Skip unconfigured BCM */ > > + if (!info->bcm[bcm_index].name || !bcm_data[bcm_index]) > > + continue; > > + > > + if (bcm_index == GMU_MAX_BCMS - 1 || > > + (bcm_data[bcm_index + 1] && > > + bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd)) > > + commit = true; > > + > > + if (!bw) { > > + data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0); > > + continue; > > + } > > + > > + if (info->bcm[bcm_index].fixed) { > > + u32 perfmode = 0; > > + > > + if (bw >= info->bcm[bcm_index].perfmode_bw) > > + perfmode = info->bcm[bcm_index].perfmode; > > + > > + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode); > > + continue; > > + } > > + > > + /* Multiply the bandwidth by the width of the connection */ > > + width = le16_to_cpu(bcm_data[bcm_index]->width); > > + peak = bcm_div((u64)bw * width, info->bcm[bcm_index].buswidth); > > + > > + /* Input bandwidth value is in KBps, scale the value to BCM unit */ > > + unit = le32_to_cpu(bcm_data[bcm_index]->unit); > > + vote = bcm_div(peak * 1000ULL, unit); > > + > > + if (vote > BCM_TCS_CMD_VOTE_MASK) > > + vote = BCM_TCS_CMD_VOTE_MASK; > > + > > + data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote); > > + } > > + } > > + > > + return 0; > > +} > > + > > /* Return the 'arc-level' for the given frequency */ > > static unsigned int a6xx_gmu_get_arc_level(struct device *dev, > > unsigned long freq) > > @@ -1390,12 +1494,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes, > > * The GMU votes with the RPMh for itself and on behalf of the GPU but we need > > * to construct the list of votes on the CPU and send it over. Query the RPMh > > * voltage levels and build the votes > > + * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries > > + * and BCM parameters to build the votes. > > */ > > > > static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) > > { > > struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); > > struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > > + const struct a6xx_info *info = adreno_gpu->info->a6xx; > > struct msm_gpu *gpu = &adreno_gpu->base; > > int ret; > > > > @@ -1407,6 +1514,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu) > > ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes, > > gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl"); > > > > + /* Build the interconnect votes */ > > + if (adreno_gpu->info->features & ADRENO_FEAT_GMU_BW_VOTE) > > + ret |= a6xx_gmu_rpmh_bw_votes_init(info, gmu); > > + > > return ret; > > } > > > > @@ -1442,6 +1553,38 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs, > > return index; > > } > > > > +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths, > > + u32 size) > > +{ > > + int count = dev_pm_opp_get_opp_count(dev); I suppose this doesn't count the opps which are not supported by the SKU. If we can go through *all* OPPs in the opp table irrespective of the SKU, we will get something close to a full DDR bw table I mentioned in the prevous mail. > > + struct dev_pm_opp *opp; > > + int i, index = 0; > > + unsigned int bandwidth = 1; > > + > > + /* > > + * The OPP table doesn't contain the "off" bandwidth level so we need to > > + * add 1 to the table size to account for it > > + */ > > + > > + if (WARN(count + 1 > size, > > + "The GMU bandwidth table is being truncated\n")) > > + count = size - 1; > > + > > + /* Set the "off" bandwidth */ > > + bandwidths[index++] = 0; > > + > > + for (i = 0; i < count; i++) { > > + opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0); > > + if (IS_ERR(opp)) > > + break; > > + > > + dev_pm_opp_put(opp); > > + bandwidths[index++] = bandwidth++; > > + } > > + > > + return index; > > +} > > + > > static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) > > { > > struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); > > @@ -1472,6 +1615,16 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu) > > > > gmu->current_perf_index = gmu->nr_gpu_freqs - 1; > > > > + /* > > + * The GMU also handles GPU Interconnect Votes so build a list > > + * of DDR bandwidths from the GPU OPP table > > + */ > > + if (adreno_gpu->info->features & ADRENO_FEAT_GMU_BW_VOTE) > > + gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev, > > + gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table)); > > + > > + gmu->current_perf_index = gmu->nr_gpu_freqs - 1; duplicate line. > > + > > /* Build the list of RPMh votes that we'll send to the GMU */ > > return a6xx_gmu_rpmh_votes_init(gmu); > > } > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h > > index b4a79f88ccf45cfe651c86d2a9da39541c5772b3..03603eadc0f9ed866899c95e99f333a511ebc3c1 100644 > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h > > @@ -19,6 +19,16 @@ struct a6xx_gmu_bo { > > u64 iova; > > }; > > > > +#define GMU_MAX_BCMS 3 > > + > > +struct a6xx_bcm { > > + char *name; > > + unsigned int buswidth; > > + bool fixed; > > + unsigned int perfmode; > > + unsigned int perfmode_bw; > > +}; > > + > > /* > > * These define the different GMU wake up options - these define how both the > > * CPU and the GMU bring up the hardware > > @@ -82,6 +92,10 @@ struct a6xx_gmu { > > unsigned long gpu_freqs[16]; > > u32 gx_arc_votes[16]; > > > > + int nr_gpu_bws; > > + unsigned long gpu_bw_table[16]; > > + u32 gpu_bw_votes[16][GMU_MAX_BCMS]; > > + > > int nr_gmu_freqs; > > unsigned long gmu_freqs[4]; > > u32 cx_arc_votes[4]; > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > > index 4aceffb6aae89c781facc2a6e4a82b20b341b6cb..5b80919e595fa1ba0a3afcca55feb89e60870cb1 100644 > > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > > @@ -44,6 +44,7 @@ struct a6xx_info { > > u32 gmu_chipid; > > u32 gmu_cgc_mode; > > u32 prim_fifo_threshold; > > + const struct a6xx_bcm bcm[GMU_MAX_BCMS]; This table is duplicated a lot. Lets keep a pointer instead. We can probably use this pointer as a flag to check for GMU_IB_VOTE support too. -Akhil > > }; > > > > struct a6xx_gpu { > > > > -- > > 2.34.1 > >