On Tue, Sep 23, 2014 at 12:13:50PM +0100, Damien Lespiau wrote: > From: Pradeep Bhat <pradeep.bhat@xxxxxxxxx> > > This patch implements the watermark algorithm and its necessary > functions. Two function pointers skl_update_wm and > skl_update_sprite_wm are provided. The skl_update_wm will update > the watermarks for the crtc provided as an argument and then > checks for change in DDB allocation for other active pipes and > recomputes the watermarks for those Pipes and planes as well. > Finally it does the register programming for all dirty pipes. > The trigger of the Watermark double buffer registers will have > to be once the plane configurations are done by the caller. > > v2: fixed the divide-by-0 error in the results computation func. > Also reworked the PLANE_WM register values computation func to > make it more compact. Incorporated all other review comments > from Damien. > > v3: Changed the skl_compute_plane_wm function to now return success > or failure. Also the result blocks and lines are computed here > instead of in skl_compute_wm_results function. > > v4: Adjust skl_ddb_alloc_changed() to the new planes/cursor split > (Damien) > > v5: Reworked the affected functions to implement new plane/cursor > split. > > v6: Rework the logic that triggers the DDB allocation and WM computation > of skl_update_other_pipe_wm() to not depend on non-computed DDB > values. > Always give a valid cursor_width (at boot it's 0) to keep the > invariant that we consider the cursor plane always enabled. > Otherwise we end up dividing by 0 in skl_compute_plane_wm() > (Damien Lespiau) > > v7: Spell out allocation > skl_ddb_ functions should have the ddb as first argument > Make the skl_ddb_alloc_changed() parameters const > (Damien) > > v8: Rebase on top of the crtc->primary changes > > v9: Split the staging results structure to not exceed the 1Kb stack > allocation in skl_update_wm() > > v10: Make skl_pipe_pixel_rate() take a pointer to the pipe config > Add a comment about overflow considerations for skl_wm_method1() > Various additions of const > Various use of sizeof(variable) instead of sizeof(type) > Various move of variable definitons to a narrower scope > Zero initialize some stack allocated structures to make sure we > don't have garbage in case we don't write all the values > (Ville) > > v11: Remove non-necessary default number of blocks/lines when the plane > is disabled (Ville) > > Signed-off-by: Pradeep Bhat <pradeep.bhat@xxxxxxxxx> > Signed-off-by: Damien Lespiau <damien.lespiau@xxxxxxxxx> Looks OK. The fixup to move the validity check still seems to need a bit of tweaking but otherwise I think it's sane enough. I seem to recall that I went through all of the little details the first time around, so I decided to not do that this time. Hopefully my memory is correct ;) Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 12 +- > drivers/gpu/drm/i915/intel_pm.c | 422 ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 433 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0301a91..cde5136 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1722,8 +1722,18 @@ struct drm_i915_private { > */ > uint16_t skl_latency[8]; > > + /* > + * The skl_wm_values structure is a bit too big for stack > + * allocation, so we keep the staging struct where we store > + * intermediate results here instead. > + */ > + struct skl_wm_values skl_results; > + > /* current hardware state */ > - struct ilk_wm_values hw; > + union { > + struct ilk_wm_values hw; > + struct skl_wm_values skl_hw; > + }; > } wm; > > struct i915_runtime_pm pm; > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 768890e..e69a833 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2957,6 +2957,426 @@ static bool ilk_disable_lp_wm(struct drm_device *dev) > return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); > } > > +static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config) > +{ > + /* TODO: Take into account the scalers once we support them */ > + return config->adjusted_mode.crtc_clock; > +} > + > +/* > + * The max latency should be 257 (max the punit can code is 255 and we add 2us > + * for the read latency) and bytes_per_pixel should always be <= 8, so that > + * should allow pixel_rate up to ~2 GHz which seems sufficient since max > + * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. > +*/ > +static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, > + uint32_t latency) > +{ > + uint32_t wm_intermediate_val, ret; > + > + if (latency == 0) > + return UINT_MAX; > + > + wm_intermediate_val = latency * pixel_rate * bytes_per_pixel; > + ret = DIV_ROUND_UP(wm_intermediate_val, 1000); > + > + return ret; > +} > + > +static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, > + uint32_t horiz_pixels, uint8_t bytes_per_pixel, > + uint32_t latency) > +{ > + uint32_t ret, plane_bytes_per_line, wm_intermediate_val; > + > + if (latency == 0) > + return UINT_MAX; > + > + plane_bytes_per_line = horiz_pixels * bytes_per_pixel; > + wm_intermediate_val = latency * pixel_rate; > + ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * > + plane_bytes_per_line; > + > + return ret; > +} > + > +static void skl_compute_transition_wm(struct drm_crtc *crtc, > + struct skl_pipe_wm_parameters *params, > + struct skl_pipe_wm *pipe_wm) > +{ > + /* > + * For now it is suggested to use the LP0 wm val of corresponding > + * plane as transition wm val. This is done while computing results. > + */ > + if (!params->active) > + return; > +} > + > +static uint32_t > +skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p) > +{ > + if (!intel_crtc_active(crtc)) > + return 0; > + > + return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate); > + > +} > + > +static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, > + const struct intel_crtc *intel_crtc) > +{ > + struct drm_device *dev = intel_crtc->base.dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; > + enum pipe pipe = intel_crtc->pipe; > + > + if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe], > + sizeof(new_ddb->plane[pipe]))) > + return true; > + > + if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe], > + sizeof(new_ddb->cursor[pipe]))) > + return true; > + > + return false; > +} > + > +static void skl_compute_wm_global_parameters(struct drm_device *dev, > + struct intel_wm_config *config) > +{ > + struct drm_crtc *crtc; > + struct drm_plane *plane; > + > + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) > + config->num_pipes_active += intel_crtc_active(crtc); > + > + /* FIXME: I don't think we need those two global parameters on SKL */ > + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { > + struct intel_plane *intel_plane = to_intel_plane(plane); > + > + config->sprites_enabled |= intel_plane->wm.enabled; > + config->sprites_scaled |= intel_plane->wm.scaled; > + } > +} > + > +static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, > + struct skl_pipe_wm_parameters *p) > +{ > + struct drm_device *dev = crtc->dev; > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > + enum pipe pipe = intel_crtc->pipe; > + struct drm_plane *plane; > + int i = 1; /* Index for sprite planes start */ > + > + p->active = intel_crtc_active(crtc); > + if (p->active) { > + p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal; > + p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config); > + > + /* > + * For now, assume primary and cursor planes are always enabled. > + */ > + p->plane[0].enabled = true; > + p->plane[0].bytes_per_pixel = > + crtc->primary->fb->bits_per_pixel / 8; > + p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w; > + p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h; > + > + p->cursor.enabled = true; > + p->cursor.bytes_per_pixel = 4; > + p->cursor.horiz_pixels = intel_crtc->cursor_width ? > + intel_crtc->cursor_width : 64; > + } > + > + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { > + struct intel_plane *intel_plane = to_intel_plane(plane); > + > + if (intel_plane->pipe == pipe) > + p->plane[i++] = intel_plane->wm; > + } > +} > + > +static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p, > + struct intel_plane_wm_parameters *p_params, > + uint16_t max_page_buff_alloc, > + uint32_t mem_value, > + uint16_t *res_blocks, /* out */ > + uint8_t *res_lines /* out */) > +{ > + uint32_t method1, method2, plane_bytes_per_line; > + uint32_t result_bytes; > + > + if (!p->active || !p_params->enabled) > + return false; > + > + method1 = skl_wm_method1(p->pixel_rate, > + p_params->bytes_per_pixel, > + mem_value); > + method2 = skl_wm_method2(p->pixel_rate, > + p->pipe_htotal, > + p_params->horiz_pixels, > + p_params->bytes_per_pixel, > + mem_value); > + > + plane_bytes_per_line = p_params->horiz_pixels * > + p_params->bytes_per_pixel; > + > + /* For now xtile and linear */ > + if (((max_page_buff_alloc * 512) / plane_bytes_per_line) >= 1) > + result_bytes = min(method1, method2); > + else > + result_bytes = method1; > + > + *res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1; > + *res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line); > + > + return true; > +} > + > +static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, > + struct skl_ddb_allocation *ddb, > + struct skl_pipe_wm_parameters *p, > + enum pipe pipe, > + int level, > + int num_planes, > + struct skl_wm_level *result) > +{ > + uint16_t latency = dev_priv->wm.skl_latency[level]; > + uint16_t ddb_blocks; > + int i; > + > + for (i = 0; i < num_planes; i++) { > + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); > + > + result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i], > + ddb_blocks, > + latency, > + &result->plane_res_b[i], > + &result->plane_res_l[i]); > + } > + > + ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]); > + result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks, > + latency, &result->cursor_res_b, > + &result->cursor_res_l); > +} > + > +static void skl_compute_pipe_wm(struct drm_crtc *crtc, > + struct skl_ddb_allocation *ddb, > + struct skl_pipe_wm_parameters *params, > + struct skl_pipe_wm *pipe_wm) > +{ > + struct drm_device *dev = crtc->dev; > + const struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > + int level, max_level = ilk_wm_max_level(dev); > + > + for (level = 0; level <= max_level; level++) { > + skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe, > + level, intel_num_planes(intel_crtc), > + &pipe_wm->wm[level]); > + } > + pipe_wm->linetime = skl_compute_linetime_wm(crtc, params); > + > + skl_compute_transition_wm(crtc, params, pipe_wm); > +} > + > +static void skl_compute_wm_results(struct drm_device *dev, > + struct skl_pipe_wm_parameters *p, > + struct skl_pipe_wm *p_wm, > + struct skl_wm_values *r, > + struct intel_crtc *intel_crtc) > +{ > + int level, max_level = ilk_wm_max_level(dev); > + enum pipe pipe = intel_crtc->pipe; > + > + for (level = 0; level <= max_level; level++) { > + uint16_t ddb_blocks; > + uint32_t temp; > + int i; > + > + for (i = 0; i < intel_num_planes(intel_crtc); i++) { > + temp = 0; > + ddb_blocks = skl_ddb_entry_size(&r->ddb.plane[pipe][i]); > + > + if ((p_wm->wm[level].plane_res_b[i] > ddb_blocks) || > + (p_wm->wm[level].plane_res_l[i] > 31)) > + p_wm->wm[level].plane_en[i] = false; > + > + temp |= p_wm->wm[level].plane_res_l[i] << > + PLANE_WM_LINES_SHIFT; > + temp |= p_wm->wm[level].plane_res_b[i]; > + if (p_wm->wm[level].plane_en[i]) > + temp |= PLANE_WM_EN; > + > + r->plane[pipe][i][level] = temp; > + /* Use the LP0 WM value for transition WM for now. */ > + if (level == 0) > + r->plane_trans[pipe][i] = temp; > + } > + > + temp = 0; > + ddb_blocks = skl_ddb_entry_size(&r->ddb.cursor[pipe]); > + > + if ((p_wm->wm[level].cursor_res_b > ddb_blocks) || > + (p_wm->wm[level].cursor_res_l > 31)) > + p_wm->wm[level].cursor_en = false; > + > + temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT; > + temp |= p_wm->wm[level].cursor_res_b; > + > + if (p_wm->wm[level].cursor_en) > + temp |= PLANE_WM_EN; > + > + r->cursor[pipe][level] = temp; > + /* Use the LP0 WM value for transition WM for now. */ > + if (level == 0) > + r->cursor_trans[pipe] = temp; > + > + } > + > + r->wm_linetime[pipe] = p_wm->linetime; > +} > + > +static void skl_write_wm_values(struct drm_i915_private *dev_priv, > + const struct skl_wm_values *new) > +{ > + struct drm_device *dev = dev_priv->dev; > + struct intel_crtc *crtc; > + > + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { > + int i, level, max_level = ilk_wm_max_level(dev); > + enum pipe pipe = crtc->pipe; > + > + if (new->dirty[pipe]) { > + I915_WRITE(PIPE_WM_LINETIME(pipe), > + new->wm_linetime[pipe]); > + > + for (level = 0; level <= max_level; level++) { > + for (i = 0; i < intel_num_planes(crtc); i++) > + I915_WRITE(PLANE_WM(pipe, i, level), > + new->plane[pipe][i][level]); > + I915_WRITE(CUR_WM(pipe, level), > + new->cursor[pipe][level]); > + } > + for (i = 0; i < intel_num_planes(crtc); i++) > + I915_WRITE(PLANE_WM_TRANS(pipe, i), > + new->plane_trans[pipe][i]); > + I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); > + } > + } > + > + dev_priv->wm.skl_hw = *new; > +} > + > +static bool skl_update_pipe_wm(struct drm_crtc *crtc, > + struct skl_pipe_wm_parameters *params, > + struct intel_wm_config *config, > + struct skl_ddb_allocation *ddb, /* out */ > + struct skl_pipe_wm *pipe_wm /* out */) > +{ > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > + > + skl_compute_wm_pipe_parameters(crtc, params); > + skl_compute_pipe_wm(crtc, ddb, params, pipe_wm); > + > + if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm))) > + return false; > + > + intel_crtc->wm.skl_active = *pipe_wm; > + return true; > +} > + > +static void skl_update_other_pipe_wm(struct drm_device *dev, > + struct drm_crtc *crtc, > + struct intel_wm_config *config, > + struct skl_wm_values *r) > +{ > + struct intel_crtc *intel_crtc; > + struct intel_crtc *this_crtc = to_intel_crtc(crtc); > + > + /* > + * If the WM update hasn't changed the allocation for this_crtc (the > + * crtc we are currently computing the new WM values for), other > + * enabled crtcs will keep the same allocation and we don't need to > + * recompute anything for them. > + */ > + if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) > + return; > + > + /* > + * Otherwise, because of this_crtc being freshly enabled/disabled, the > + * other active pipes need new DDB allocation and WM values. > + */ > + list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, > + base.head) { > + struct skl_pipe_wm_parameters params = {}; > + struct skl_pipe_wm pipe_wm = {}; > + bool wm_changed; > + > + if (this_crtc->pipe == intel_crtc->pipe) > + continue; > + > + if (!intel_crtc->active) > + continue; > + > + wm_changed = skl_update_pipe_wm(&intel_crtc->base, > + ¶ms, config, > + &r->ddb, &pipe_wm); > + > + /* > + * If we end up re-computing the other pipe WM values, it's > + * because it was really needed, so we expect the WM values to > + * be different. > + */ > + WARN_ON(!wm_changed); > + > + skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc); > + r->dirty[intel_crtc->pipe] = true; > + } > +} > + > +static void skl_update_wm(struct drm_crtc *crtc) > +{ > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > + struct drm_device *dev = crtc->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct skl_pipe_wm_parameters params = {}; > + struct skl_wm_values *results = &dev_priv->wm.skl_results; > + struct skl_pipe_wm pipe_wm = {}; > + struct intel_wm_config config = {}; > + > + memset(results, 0, sizeof(*results)); > + > + skl_compute_wm_global_parameters(dev, &config); > + > + if (!skl_update_pipe_wm(crtc, ¶ms, &config, > + &results->ddb, &pipe_wm)) > + return; > + > + skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc); > + results->dirty[intel_crtc->pipe] = true; > + > + skl_update_other_pipe_wm(dev, crtc, &config, results); > + skl_write_wm_values(dev_priv, results); > +} > + > +static void > +skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, > + uint32_t sprite_width, uint32_t sprite_height, > + int pixel_size, bool enabled, bool scaled) > +{ > + struct intel_plane *intel_plane = to_intel_plane(plane); > + > + intel_plane->wm.enabled = enabled; > + intel_plane->wm.scaled = scaled; > + intel_plane->wm.horiz_pixels = sprite_width; > + intel_plane->wm.vert_pixels = sprite_height; > + intel_plane->wm.bytes_per_pixel = pixel_size; > + > + skl_update_wm(crtc); > +} > + > static void ilk_update_wm(struct drm_crtc *crtc) > { > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > @@ -7486,6 +7906,8 @@ void intel_init_pm(struct drm_device *dev) > skl_setup_wm_latency(dev); > > dev_priv->display.init_clock_gating = gen9_init_clock_gating; > + dev_priv->display.update_wm = skl_update_wm; > + dev_priv->display.update_sprite_wm = skl_update_sprite_wm; > } else if (HAS_PCH_SPLIT(dev)) { > ilk_setup_wm_latency(dev); > > -- > 1.8.3.1 -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx