On 2020-07-10 22:38, Rob Clark wrote:
> On Thu, Jun 18, 2020 at 7:09 AM Kalyan Thota <kalyan_t@xxxxxxxxxxxxxx>
> wrote:
>>
>> This change adds support to scale src clk and bandwidth as
>> per composition requirements.
>>
>> Interconnect registration for bw has been moved to mdp
>> device node from mdss to facilitate the scaling.
>>
>> Changes in v1:
>> - Address armv7 compilation issues with the patch (Rob)
>>
>> Signed-off-by: Kalyan Thota <kalyan_t@xxxxxxxxxxxxxx>
>> ---
>> drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c | 109
>> +++++++++++++++++++++----
>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 5 +-
>> drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 4 +
>> drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 37 ++++++++-
>> drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 4 +
>> drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 9 +-
>> drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 84
>> +++++++++++++++++++
>> drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h | 4 +
>> 8 files changed, 233 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
>> index 7c230f7..e52bc44 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
>> @@ -29,6 +29,74 @@ enum dpu_perf_mode {
>> DPU_PERF_MODE_MAX
>> };
>>
>> +/**
>> + * @_dpu_core_perf_calc_bw() - to calculate BW per crtc
>> + * @kms - pointer to the dpu_kms
>> + * @crtc - pointer to a crtc
>> + * Return: returns aggregated BW for all planes in crtc.
>> + */
>> +static u64 _dpu_core_perf_calc_bw(struct dpu_kms *kms,
>> + struct drm_crtc *crtc)
>> +{
>> + struct drm_plane *plane;
>> + struct dpu_plane_state *pstate;
>> + u64 crtc_plane_bw = 0;
>> + u32 bw_factor;
>> +
>> + drm_atomic_crtc_for_each_plane(plane, crtc) {
>> + pstate = to_dpu_plane_state(plane->state);
>> + if (!pstate)
>> + continue;
>> +
>> + crtc_plane_bw += pstate->plane_fetch_bw;
>> + }
>> +
>> + bw_factor = kms->catalog->perf.bw_inefficiency_factor;
>> + if (bw_factor) {
>> + crtc_plane_bw *= bw_factor;
>> + do_div(crtc_plane_bw, 100);
>> + }
>> +
>> + return crtc_plane_bw;
>> +}
>> +
>> +/**
>> + * _dpu_core_perf_calc_clk() - to calculate clock per crtc
>> + * @kms - pointer to the dpu_kms
>> + * @crtc - pointer to a crtc
>> + * @state - pointer to a crtc state
>> + * Return: returns max clk for all planes in crtc.
>> + */
>> +static u64 _dpu_core_perf_calc_clk(struct dpu_kms *kms,
>> + struct drm_crtc *crtc, struct drm_crtc_state *state)
>> +{
>> + struct drm_plane *plane;
>> + struct dpu_plane_state *pstate;
>> + struct drm_display_mode *mode;
>> + u64 crtc_clk;
>> + u32 clk_factor;
>> +
>> + mode = &state->adjusted_mode;
>> +
>> + crtc_clk = mode->vtotal * mode->hdisplay *
>> drm_mode_vrefresh(mode);
>> +
>> + drm_atomic_crtc_for_each_plane(plane, crtc) {
>> + pstate = to_dpu_plane_state(plane->state);
>> + if (!pstate)
>> + continue;
>> +
>> + crtc_clk = max(pstate->plane_clk, crtc_clk);
>> + }
>> +
>> + clk_factor = kms->catalog->perf.clk_inefficiency_factor;
>> + if (clk_factor) {
>> + crtc_clk *= clk_factor;
>> + do_div(crtc_clk, 100);
>> + }
>> +
>> + return crtc_clk;
>> +}
>> +
>> static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc)
>> {
>> struct msm_drm_private *priv;
>> @@ -51,12 +119,7 @@ static void _dpu_core_perf_calc_crtc(struct
>> dpu_kms *kms,
>> dpu_cstate = to_dpu_crtc_state(state);
>> memset(perf, 0, sizeof(struct dpu_core_perf_params));
>>
>> - if (!dpu_cstate->bw_control) {
>> - perf->bw_ctl = kms->catalog->perf.max_bw_high *
>> - 1000ULL;
>> - perf->max_per_pipe_ib = perf->bw_ctl;
>> - perf->core_clk_rate = kms->perf.max_core_clk_rate;
>> - } else if (kms->perf.perf_tune.mode == DPU_PERF_MODE_MINIMUM)
>> {
>> + if (kms->perf.perf_tune.mode == DPU_PERF_MODE_MINIMUM) {
>> perf->bw_ctl = 0;
>> perf->max_per_pipe_ib = 0;
>> perf->core_clk_rate = 0;
>> @@ -64,6 +127,10 @@ static void _dpu_core_perf_calc_crtc(struct
>> dpu_kms *kms,
>> perf->bw_ctl = kms->perf.fix_core_ab_vote;
>> perf->max_per_pipe_ib = kms->perf.fix_core_ib_vote;
>> perf->core_clk_rate = kms->perf.fix_core_clk_rate;
>> + } else {
>> + perf->bw_ctl = _dpu_core_perf_calc_bw(kms, crtc);
>> + perf->max_per_pipe_ib =
>> kms->catalog->perf.min_dram_ib;
>> + perf->core_clk_rate = _dpu_core_perf_calc_clk(kms,
>> crtc, state);
>> }
>>
>> DPU_DEBUG(
>> @@ -115,11 +182,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc
>> *crtc,
>> DPU_DEBUG("crtc:%d bw:%llu ctrl:%d\n",
>> tmp_crtc->base.id,
>> tmp_cstate->new_perf.bw_ctl,
>> tmp_cstate->bw_control);
>> - /*
>> - * For bw check only use the bw if the
>> - * atomic property has been already set
>> - */
>> - if (tmp_cstate->bw_control)
>> +
>> bw_sum_of_intfs +=
>> tmp_cstate->new_perf.bw_ctl;
>> }
>>
>> @@ -131,9 +194,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc
>> *crtc,
>>
>> DPU_DEBUG("final threshold bw limit = %d\n",
>> threshold);
>>
>> - if (!dpu_cstate->bw_control) {
>> - DPU_DEBUG("bypass bandwidth check\n");
>> - } else if (!threshold) {
>> + if (!threshold) {
>> DPU_ERROR("no bandwidth limits specified\n");
>> return -E2BIG;
>> } else if (bw > threshold) {
>> @@ -154,7 +215,11 @@ static int _dpu_core_perf_crtc_update_bus(struct
>> dpu_kms *kms,
>> =
>> dpu_crtc_get_client_type(crtc);
>> struct drm_crtc *tmp_crtc;
>> struct dpu_crtc_state *dpu_cstate;
>> - int ret = 0;
>> + int i, ret = 0;
>> + u64 avg_bw;
>> +
>> + if (!kms->num_paths)
>> + return -EINVAL;
>>
>> drm_for_each_crtc(tmp_crtc, crtc->dev) {
>> if (tmp_crtc->enabled &&
>> @@ -165,10 +230,20 @@ static int _dpu_core_perf_crtc_update_bus(struct
>> dpu_kms *kms,
>> perf.max_per_pipe_ib =
>> max(perf.max_per_pipe_ib,
>>
>> dpu_cstate->new_perf.max_per_pipe_ib);
>>
>> - DPU_DEBUG("crtc=%d bw=%llu\n",
>> tmp_crtc->base.id,
>> - dpu_cstate->new_perf.bw_ctl);
>> + perf.bw_ctl += dpu_cstate->new_perf.bw_ctl;
>> +
>> + DPU_DEBUG("crtc=%d bw=%llu paths:%d\n",
>> + tmp_crtc->base.id,
>> + dpu_cstate->new_perf.bw_ctl,
>> kms->num_paths);
>> }
>> }
>> +
>> + avg_bw = perf.bw_ctl;
>> + do_div(avg_bw, (kms->num_paths * 1000)); /*Bps_to_icc*/
>> +
>> + for (i = 0; i < kms->num_paths; i++)
>> + icc_set_bw(kms->path[i], avg_bw,
>> perf.max_per_pipe_ib);
>> +
>> return ret;
>> }
>>
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
>> index 29d4fde..8f2357d 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
>> @@ -541,7 +541,8 @@
>> .max_bw_high = 6800000,
>> .min_core_ib = 2400000,
>> .min_llcc_ib = 800000,
>> - .min_dram_ib = 800000,
>> + .min_dram_ib = 1600000,
>> + .min_prefill_lines = 24,
>> .danger_lut_tbl = {0xff, 0xffff, 0x0},
>> .qos_lut_tbl = {
>> {.nentry = ARRAY_SIZE(sc7180_qos_linear),
>> @@ -558,6 +559,8 @@
>> {.rd_enable = 1, .wr_enable = 1},
>> {.rd_enable = 1, .wr_enable = 0}
>> },
>> + .clk_inefficiency_factor = 105,
>> + .bw_inefficiency_factor = 120,
>> };
>>
>> /*************************************************************
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
>> index f7de438..f2a5fe2 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
>> @@ -651,6 +651,8 @@ struct dpu_perf_cdp_cfg {
>> * @downscaling_prefill_lines downscaling latency in lines
>> * @amortizable_theshold minimum y position for traffic shaping
>> prefill
>> * @min_prefill_lines minimum pipeline latency in lines
>> + * @clk_inefficiency_factor DPU src clock inefficiency factor
>> + * @bw_inefficiency_factor DPU axi bus bw inefficiency factor
>> * @safe_lut_tbl: LUT tables for safe signals
>> * @danger_lut_tbl: LUT tables for danger signals
>> * @qos_lut_tbl: LUT tables for QoS signals
>> @@ -675,6 +677,8 @@ struct dpu_perf_cfg {
>> u32 downscaling_prefill_lines;
>> u32 amortizable_threshold;
>> u32 min_prefill_lines;
>> + u32 clk_inefficiency_factor;
>> + u32 bw_inefficiency_factor;
>> u32 safe_lut_tbl[DPU_QOS_LUT_USAGE_MAX];
>> u32 danger_lut_tbl[DPU_QOS_LUT_USAGE_MAX];
>> struct dpu_qos_lut_tbl qos_lut_tbl[DPU_QOS_LUT_USAGE_MAX];
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>> index b8615d4..a5da7aa 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
>> @@ -303,6 +303,28 @@ static int dpu_kms_global_obj_init(struct dpu_kms
>> *dpu_kms)
>> return 0;
>> }
>>
>> +static int dpu_kms_parse_data_bus_icc_path(struct dpu_kms *dpu_kms)
>> +{
>> + struct icc_path *path0;
>> + struct icc_path *path1;
>> + struct drm_device *dev = dpu_kms->dev;
>> +
>> + path0 = of_icc_get(dev->dev, "mdp0-mem");
>> + path1 = of_icc_get(dev->dev, "mdp1-mem");
>> +
>> + if (IS_ERR_OR_NULL(path0))
>> + return PTR_ERR_OR_ZERO(path0);
>> +
>> + dpu_kms->path[0] = path0;
>> + dpu_kms->num_paths = 1;
>> +
>> + if (!IS_ERR_OR_NULL(path1)) {
>> + dpu_kms->path[1] = path1;
>> + dpu_kms->num_paths++;
>> + }
>> + return 0;
>> +}
>
>
> so wait, why do we need a 2nd nearly identical copy of
> dpu_mdss_parse_data_bus_icc_path() for sc7180? And tracking of the
> path in dpu_mdss for some gens and dpu_kms in other gens?
>
> (I have a suspicion that the answer is dpu has too much indirection
> and abstraction.)
>
> BR,
> -R
Hi Rob,
If you could remember, we have discussed this earlier. We don't want
to
change the way interconnects are defined in the DT.
but since the change is about scaling the BW as per composition cycle,
icc paths are needed to be available for MDP node in the kms structure