On Thu, Jun 18, 2020 at 7:09 AM Kalyan Thota <kalyan_t@xxxxxxxxxxxxxx> wrote: > > This change adds support to scale src clk and bandwidth as > per composition requirements. > > Interconnect registration for bw has been moved to mdp > device node from mdss to facilitate the scaling. > > Changes in v1: > - Address armv7 compilation issues with the patch (Rob) > > Signed-off-by: Kalyan Thota <kalyan_t@xxxxxxxxxxxxxx> > --- > drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c | 109 +++++++++++++++++++++---- > drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 5 +- > drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 4 + > drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 37 ++++++++- > drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 4 + > drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 9 +- > drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 84 +++++++++++++++++++ > drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h | 4 + > 8 files changed, 233 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c > index 7c230f7..e52bc44 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c > @@ -29,6 +29,74 @@ enum dpu_perf_mode { > DPU_PERF_MODE_MAX > }; > > +/** > + * @_dpu_core_perf_calc_bw() - to calculate BW per crtc > + * @kms - pointer to the dpu_kms > + * @crtc - pointer to a crtc > + * Return: returns aggregated BW for all planes in crtc. > + */ > +static u64 _dpu_core_perf_calc_bw(struct dpu_kms *kms, > + struct drm_crtc *crtc) > +{ > + struct drm_plane *plane; > + struct dpu_plane_state *pstate; > + u64 crtc_plane_bw = 0; > + u32 bw_factor; > + > + drm_atomic_crtc_for_each_plane(plane, crtc) { > + pstate = to_dpu_plane_state(plane->state); > + if (!pstate) > + continue; > + > + crtc_plane_bw += pstate->plane_fetch_bw; > + } > + > + bw_factor = kms->catalog->perf.bw_inefficiency_factor; > + if (bw_factor) { > + crtc_plane_bw *= bw_factor; > + do_div(crtc_plane_bw, 100); > + } > + > + return crtc_plane_bw; > +} > + > +/** > + * _dpu_core_perf_calc_clk() - to calculate clock per crtc > + * @kms - pointer to the dpu_kms > + * @crtc - pointer to a crtc > + * @state - pointer to a crtc state > + * Return: returns max clk for all planes in crtc. > + */ > +static u64 _dpu_core_perf_calc_clk(struct dpu_kms *kms, > + struct drm_crtc *crtc, struct drm_crtc_state *state) > +{ > + struct drm_plane *plane; > + struct dpu_plane_state *pstate; > + struct drm_display_mode *mode; > + u64 crtc_clk; > + u32 clk_factor; > + > + mode = &state->adjusted_mode; > + > + crtc_clk = mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); > + > + drm_atomic_crtc_for_each_plane(plane, crtc) { > + pstate = to_dpu_plane_state(plane->state); > + if (!pstate) > + continue; > + > + crtc_clk = max(pstate->plane_clk, crtc_clk); > + } > + > + clk_factor = kms->catalog->perf.clk_inefficiency_factor; > + if (clk_factor) { > + crtc_clk *= clk_factor; > + do_div(crtc_clk, 100); > + } > + > + return crtc_clk; > +} > + > static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) > { > struct msm_drm_private *priv; > @@ -51,12 +119,7 @@ static void _dpu_core_perf_calc_crtc(struct dpu_kms *kms, > dpu_cstate = to_dpu_crtc_state(state); > memset(perf, 0, sizeof(struct dpu_core_perf_params)); > > - if (!dpu_cstate->bw_control) { > - perf->bw_ctl = kms->catalog->perf.max_bw_high * > - 1000ULL; > - perf->max_per_pipe_ib = perf->bw_ctl; > - perf->core_clk_rate = kms->perf.max_core_clk_rate; > - } else if (kms->perf.perf_tune.mode == DPU_PERF_MODE_MINIMUM) { > + if (kms->perf.perf_tune.mode == DPU_PERF_MODE_MINIMUM) { > perf->bw_ctl = 0; > perf->max_per_pipe_ib = 0; > perf->core_clk_rate = 0; > @@ -64,6 +127,10 @@ static void _dpu_core_perf_calc_crtc(struct dpu_kms *kms, > perf->bw_ctl = kms->perf.fix_core_ab_vote; > perf->max_per_pipe_ib = kms->perf.fix_core_ib_vote; > perf->core_clk_rate = kms->perf.fix_core_clk_rate; > + } else { > + perf->bw_ctl = _dpu_core_perf_calc_bw(kms, crtc); > + perf->max_per_pipe_ib = kms->catalog->perf.min_dram_ib; > + perf->core_clk_rate = _dpu_core_perf_calc_clk(kms, crtc, state); > } > > DPU_DEBUG( > @@ -115,11 +182,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc *crtc, > DPU_DEBUG("crtc:%d bw:%llu ctrl:%d\n", > tmp_crtc->base.id, tmp_cstate->new_perf.bw_ctl, > tmp_cstate->bw_control); > - /* > - * For bw check only use the bw if the > - * atomic property has been already set > - */ > - if (tmp_cstate->bw_control) > + > bw_sum_of_intfs += tmp_cstate->new_perf.bw_ctl; > } > > @@ -131,9 +194,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc *crtc, > > DPU_DEBUG("final threshold bw limit = %d\n", threshold); > > - if (!dpu_cstate->bw_control) { > - DPU_DEBUG("bypass bandwidth check\n"); > - } else if (!threshold) { > + if (!threshold) { > DPU_ERROR("no bandwidth limits specified\n"); > return -E2BIG; > } else if (bw > threshold) { > @@ -154,7 +215,11 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, > = dpu_crtc_get_client_type(crtc); > struct drm_crtc *tmp_crtc; > struct dpu_crtc_state *dpu_cstate; > - int ret = 0; > + int i, ret = 0; > + u64 avg_bw; > + > + if (!kms->num_paths) > + return -EINVAL; > > drm_for_each_crtc(tmp_crtc, crtc->dev) { > if (tmp_crtc->enabled && > @@ -165,10 +230,20 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, > perf.max_per_pipe_ib = max(perf.max_per_pipe_ib, > dpu_cstate->new_perf.max_per_pipe_ib); > > - DPU_DEBUG("crtc=%d bw=%llu\n", tmp_crtc->base.id, > - dpu_cstate->new_perf.bw_ctl); > + perf.bw_ctl += dpu_cstate->new_perf.bw_ctl; > + > + DPU_DEBUG("crtc=%d bw=%llu paths:%d\n", > + tmp_crtc->base.id, > + dpu_cstate->new_perf.bw_ctl, kms->num_paths); > } > } > + > + avg_bw = perf.bw_ctl; > + do_div(avg_bw, (kms->num_paths * 1000)); /*Bps_to_icc*/ > + > + for (i = 0; i < kms->num_paths; i++) > + icc_set_bw(kms->path[i], avg_bw, perf.max_per_pipe_ib); > + > return ret; > } > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c > index 29d4fde..8f2357d 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c > @@ -541,7 +541,8 @@ > .max_bw_high = 6800000, > .min_core_ib = 2400000, > .min_llcc_ib = 800000, > - .min_dram_ib = 800000, > + .min_dram_ib = 1600000, > + .min_prefill_lines = 24, > .danger_lut_tbl = {0xff, 0xffff, 0x0}, > .qos_lut_tbl = { > {.nentry = ARRAY_SIZE(sc7180_qos_linear), > @@ -558,6 +559,8 @@ > {.rd_enable = 1, .wr_enable = 1}, > {.rd_enable = 1, .wr_enable = 0} > }, > + .clk_inefficiency_factor = 105, > + .bw_inefficiency_factor = 120, > }; > > /************************************************************* > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > index f7de438..f2a5fe2 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h > @@ -651,6 +651,8 @@ struct dpu_perf_cdp_cfg { > * @downscaling_prefill_lines downscaling latency in lines > * @amortizable_theshold minimum y position for traffic shaping prefill > * @min_prefill_lines minimum pipeline latency in lines > + * @clk_inefficiency_factor DPU src clock inefficiency factor > + * @bw_inefficiency_factor DPU axi bus bw inefficiency factor > * @safe_lut_tbl: LUT tables for safe signals > * @danger_lut_tbl: LUT tables for danger signals > * @qos_lut_tbl: LUT tables for QoS signals > @@ -675,6 +677,8 @@ struct dpu_perf_cfg { > u32 downscaling_prefill_lines; > u32 amortizable_threshold; > u32 min_prefill_lines; > + u32 clk_inefficiency_factor; > + u32 bw_inefficiency_factor; > u32 safe_lut_tbl[DPU_QOS_LUT_USAGE_MAX]; > u32 danger_lut_tbl[DPU_QOS_LUT_USAGE_MAX]; > struct dpu_qos_lut_tbl qos_lut_tbl[DPU_QOS_LUT_USAGE_MAX]; > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c > index b8615d4..a5da7aa 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c > @@ -303,6 +303,28 @@ static int dpu_kms_global_obj_init(struct dpu_kms *dpu_kms) > return 0; > } > > +static int dpu_kms_parse_data_bus_icc_path(struct dpu_kms *dpu_kms) > +{ > + struct icc_path *path0; > + struct icc_path *path1; > + struct drm_device *dev = dpu_kms->dev; > + > + path0 = of_icc_get(dev->dev, "mdp0-mem"); > + path1 = of_icc_get(dev->dev, "mdp1-mem"); > + > + if (IS_ERR_OR_NULL(path0)) > + return PTR_ERR_OR_ZERO(path0); > + > + dpu_kms->path[0] = path0; > + dpu_kms->num_paths = 1; > + > + if (!IS_ERR_OR_NULL(path1)) { > + dpu_kms->path[1] = path1; > + dpu_kms->num_paths++; > + } > + return 0; > +} so wait, why do we need a 2nd nearly identical copy of dpu_mdss_parse_data_bus_icc_path() for sc7180? And tracking of the path in dpu_mdss for some gens and dpu_kms in other gens? (I have a suspicion that the answer is dpu has too much indirection and abstraction.) BR, -R > + > static int dpu_kms_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) > { > return dpu_crtc_vblank(crtc, true); > @@ -972,6 +994,9 @@ static int dpu_kms_hw_init(struct msm_kms *kms) > > dpu_vbif_init_memtypes(dpu_kms); > > + if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss")) > + dpu_kms_parse_data_bus_icc_path(dpu_kms); > + > pm_runtime_put_sync(&dpu_kms->pdev->dev); > > return 0; > @@ -1077,7 +1102,7 @@ static int dpu_dev_remove(struct platform_device *pdev) > > static int __maybe_unused dpu_runtime_suspend(struct device *dev) > { > - int rc = -1; > + int i, rc = -1; > struct platform_device *pdev = to_platform_device(dev); > struct dpu_kms *dpu_kms = platform_get_drvdata(pdev); > struct dss_module_power *mp = &dpu_kms->mp; > @@ -1086,6 +1111,9 @@ static int __maybe_unused dpu_runtime_suspend(struct device *dev) > if (rc) > DPU_ERROR("clock disable failed rc:%d\n", rc); > > + for (i = 0; i < dpu_kms->num_paths; i++) > + icc_set_bw(dpu_kms->path[i], 0, 0); > + > return rc; > } > > @@ -1097,8 +1125,15 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev) > struct drm_encoder *encoder; > struct drm_device *ddev; > struct dss_module_power *mp = &dpu_kms->mp; > + int i; > > ddev = dpu_kms->dev; > + > + /* Min vote of BW is required before turning on AXI clk */ > + for (i = 0; i < dpu_kms->num_paths; i++) > + icc_set_bw(dpu_kms->path[i], 0, > + dpu_kms->catalog->perf.min_dram_ib); > + > rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true); > if (rc) { > DPU_ERROR("clock enable failed rc:%d\n", rc); > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h > index 4e32d04..94410ca 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h > @@ -8,6 +8,8 @@ > #ifndef __DPU_KMS_H__ > #define __DPU_KMS_H__ > > +#include <linux/interconnect.h> > + > #include <drm/drm_drv.h> > > #include "msm_drv.h" > @@ -137,6 +139,8 @@ struct dpu_kms { > * when disabled. > */ > atomic_t bandwidth_ref; > + struct icc_path *path[2]; > + u32 num_paths; > }; > > struct vsync_info { > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c > index 80d3cfc..df0a983 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c > @@ -8,7 +8,6 @@ > #include <linux/irqdesc.h> > #include <linux/irqchip/chained_irq.h> > #include "dpu_kms.h" > -#include <linux/interconnect.h> > > #define to_dpu_mdss(x) container_of(x, struct dpu_mdss, base) > > @@ -315,9 +314,11 @@ int dpu_mdss_init(struct drm_device *dev) > } > dpu_mdss->mmio_len = resource_size(res); > > - ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss); > - if (ret) > - return ret; > + if (!of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss")) { > + ret = dpu_mdss_parse_data_bus_icc_path(dev, dpu_mdss); > + if (ret) > + return ret; > + } > > mp = &dpu_mdss->mp; > ret = msm_dss_parse_clock(pdev, mp); > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c > index 3b9c33e..6379fe1 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c > @@ -132,6 +132,86 @@ static struct dpu_kms *_dpu_plane_get_kms(struct drm_plane *plane) > } > > /** > + * _dpu_plane_calc_bw - calculate bandwidth required for a plane > + * @Plane: Pointer to drm plane. > + * Result: Updates calculated bandwidth in the plane state. > + * BW Equation: src_w * src_h * bpp * fps * (v_total / v_dest) > + * Prefill BW Equation: line src bytes * line_time > + */ > +static void _dpu_plane_calc_bw(struct drm_plane *plane, > + struct drm_framebuffer *fb) > +{ > + struct dpu_plane *pdpu = to_dpu_plane(plane); > + struct dpu_plane_state *pstate; > + struct drm_display_mode *mode; > + const struct dpu_format *fmt = NULL; > + struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); > + int src_width, src_height, dst_height, fps; > + u64 plane_prefill_bw; > + u64 plane_bw; > + u32 hw_latency_lines; > + u64 scale_factor; > + int vbp, vpw; > + > + pstate = to_dpu_plane_state(plane->state); > + mode = &plane->state->crtc->mode; > + > + fmt = dpu_get_dpu_format_ext(fb->format->format, fb->modifier); > + > + src_width = drm_rect_width(&pdpu->pipe_cfg.src_rect); > + src_height = drm_rect_height(&pdpu->pipe_cfg.src_rect); > + dst_height = drm_rect_height(&pdpu->pipe_cfg.dst_rect); > + fps = drm_mode_vrefresh(mode); > + vbp = mode->vtotal - mode->vsync_end; > + vpw = mode->vsync_end - mode->vsync_start; > + hw_latency_lines = dpu_kms->catalog->perf.min_prefill_lines; > + scale_factor = src_height > dst_height ? > + mult_frac(src_height, 1, dst_height) : 1; > + > + plane_bw = > + src_width * mode->vtotal * fps * fmt->bpp * > + scale_factor; > + > + plane_prefill_bw = > + src_width * hw_latency_lines * fps * fmt->bpp * > + scale_factor * mode->vtotal; > + > + do_div(plane_prefill_bw, (vbp+vpw)); > + > + pstate->plane_fetch_bw = max(plane_bw, plane_prefill_bw); > +} > + > +/** > + * _dpu_plane_calc_clk - calculate clock required for a plane > + * @Plane: Pointer to drm plane. > + * Result: Updates calculated clock in the plane state. > + * Clock equation: dst_w * v_total * fps * (src_h / dst_h) > + */ > +static void _dpu_plane_calc_clk(struct drm_plane *plane) > +{ > + struct dpu_plane *pdpu = to_dpu_plane(plane); > + struct dpu_plane_state *pstate; > + struct drm_display_mode *mode; > + int dst_width, src_height, dst_height, fps; > + > + pstate = to_dpu_plane_state(plane->state); > + mode = &plane->state->crtc->mode; > + > + src_height = drm_rect_height(&pdpu->pipe_cfg.src_rect); > + dst_width = drm_rect_width(&pdpu->pipe_cfg.dst_rect); > + dst_height = drm_rect_height(&pdpu->pipe_cfg.dst_rect); > + fps = drm_mode_vrefresh(mode); > + > + pstate->plane_clk = > + dst_width * mode->vtotal * fps; > + > + if (src_height > dst_height) { > + pstate->plane_clk *= src_height; > + do_div(pstate->plane_clk, dst_height); > + } > +} > + > +/** > * _dpu_plane_calc_fill_level - calculate fill level of the given source format > * @plane: Pointer to drm plane > * @fmt: Pointer to source buffer format > @@ -1102,6 +1182,10 @@ static void dpu_plane_sspp_atomic_update(struct drm_plane *plane) > } > > _dpu_plane_set_qos_remap(plane); > + > + _dpu_plane_calc_bw(plane, fb); > + > + _dpu_plane_calc_clk(plane); > } > > static void _dpu_plane_atomic_disable(struct drm_plane *plane) > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h > index 4569497..ca83b87 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h > @@ -25,6 +25,8 @@ > * @scaler3_cfg: configuration data for scaler3 > * @pixel_ext: configuration data for pixel extensions > * @cdp_cfg: CDP configuration > + * @plane_fetch_bw: calculated BW per plane > + * @plane_clk: calculated clk per plane > */ > struct dpu_plane_state { > struct drm_plane_state base; > @@ -39,6 +41,8 @@ struct dpu_plane_state { > struct dpu_hw_pixel_ext pixel_ext; > > struct dpu_hw_pipe_cdp_cfg cdp_cfg; > + u64 plane_fetch_bw; > + u64 plane_clk; > }; > > /** > -- > 1.9.1 > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel