On Fri, May 24, 2013 at 07:00:42PM -0300, Paulo Zanoni wrote: > 2013/5/24 Ville Syrj?l? <ville.syrjala at linux.intel.com>: > > On Fri, May 24, 2013 at 11:59:19AM -0300, Paulo Zanoni wrote: > >> From: Paulo Zanoni <paulo.r.zanoni at intel.com> > >> > >> We were previously calling sandybridge_update_wm on HSW, but the SNB > >> function didn't really match the HSW specification, so we were just > >> writing the wrong values. > >> > >> With this patch, the haswell_update_wm function will set the correct > >> values for the WM_PIPE registers, but it will still keep all the LP > >> watermarks disabled. > >> > >> The patch may look a little bit over-complicated for now, but it's > >> because much of the infrastructure for setting the LP watermarks is > >> already in place, so we won't have too much code churn on the patch > >> that sets the LP watermarks. > >> > >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_reg.h | 3 + > >> drivers/gpu/drm/i915/intel_pm.c | 340 +++++++++++++++++++++++++++++++++++++--- > >> 2 files changed, 325 insertions(+), 18 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > >> index 55caedb..e86606c 100644 > >> --- a/drivers/gpu/drm/i915/i915_reg.h > >> +++ b/drivers/gpu/drm/i915/i915_reg.h > >> @@ -4938,6 +4938,9 @@ > >> #define SFUSE_STRAP_DDIC_DETECTED (1<<1) > >> #define SFUSE_STRAP_DDID_DETECTED (1<<0) > >> > >> +#define WM_MISC 0x45260 > >> +#define WM_MISC_DATA_PARTITION_5_6 (1 << 0) > >> + > >> #define WM_DBG 0x45280 > >> #define WM_DBG_DISALLOW_MULTIPLE_LP (1<<0) > >> #define WM_DBG_DISALLOW_MAXFIFO (1<<1) > >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > >> index 0b61a0e..2ee1d01 100644 > >> --- a/drivers/gpu/drm/i915/intel_pm.c > >> +++ b/drivers/gpu/drm/i915/intel_pm.c > >> @@ -2072,19 +2072,173 @@ static void ivybridge_update_wm(struct drm_device *dev) > >> cursor_wm); > >> } > >> > >> -static void > >> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) > >> +static uint32_t hsw_wm_get_pixel_rate(struct drm_device *dev, > >> + struct drm_crtc *crtc) > >> +{ > >> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > >> + uint32_t pixel_rate, pfit_size; > >> + > >> + if (intel_crtc->config.pixel_target_clock) > >> + pixel_rate = intel_crtc->config.pixel_target_clock; > >> + else > >> + pixel_rate = intel_crtc->config.adjusted_mode.clock; > >> + > >> + /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to > >> + * adjust the pixel_rate here. */ > >> + > >> + pfit_size = intel_crtc->config.pch_pfit.size; > >> + if (pfit_size) { > >> + uint64_t x, y, crtc_x, crtc_y, hscale, vscale, totscale; > >> + > >> + x = (pfit_size >> 16) & 0xFFFF; > >> + y = pfit_size & 0xFFFF; > >> + crtc_x = intel_crtc->config.adjusted_mode.hdisplay; > >> + crtc_y = intel_crtc->config.adjusted_mode.vdisplay; > >> + > >> + hscale = crtc_x << 16; > >> + vscale = crtc_y << 16; > >> + do_div(hscale, x); > >> + do_div(vscale, y); > >> + hscale = (hscale < (1 << 16)) ? (1 << 16) : hscale; > >> + vscale = (vscale < (1 << 16)) ? (1 << 16) : vscale; > >> + totscale = (hscale * vscale) >> 16; > >> + pixel_rate = (pixel_rate * totscale) >> 16; > > > > No need for fixed point math if you go 64bits, and as stated before > > the scaling ratio is still being miscaclulated due to the use of > > adjusted_mode. > > > > Something like this ought to do it: > > > > in_w = req_mode.hdisplay; > > in_h = req_mode.vdisplay; > > out_w = (pfit_size >> 16) & 0xffff; > > out_h = pfit_size & 0xffff; > > if (in_w <= out_w) > > in_w = out_w; > > if (in_h <= out_h) > > in_h = out_h; > > > > pixel_rate = div_u64((uint64_t) pixel_rate * in_w * in_h, out_w * out_h); > > Ok, I re-checked and you were right. Fixed. Sorry for insisting :( > > > > > >> + } > >> + > >> + return pixel_rate; > >> +} > >> + > >> +static uint32_t hsw_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, > >> + uint32_t latency) > >> +{ > >> + uint64_t tmp; > >> + uint32_t ret; > >> + > >> + tmp = pixel_rate * bytes_per_pixel * latency; > > > > Would need a cast to make the multiplications actually 64bit. 'ret' is > > also pointless. > > Oops... Fixed. > > > > > >> + ret = DIV_ROUND_UP_ULL(tmp, 64 * 10000) + 2; > >> + > >> + return ret; > >> +} > >> + > >> +static uint32_t hsw_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, > >> + uint32_t horiz_pixels, uint8_t bytes_per_pixel, > >> + uint32_t latency) > >> +{ > >> + uint32_t ret; > >> + > >> + ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate); > >> + ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel; > > > > w/ 64bit maths this could be: > > > > tmp = (uint64_t) latency * pixel_rate * 100; > > ret = (div_u64(tmp, pipe_htotal) + 1) * horiz_pixels * bytes_per_pixel > > I did the math on a paper and your formula doesn't look correct. For > latency=10 rate=120000 pipe_htotal=2000 horiz_pixels=1500 bpp=4 the > correct value should be 96, but your formula gives me a really huge > value. Yeah looks like I had the *100 at the wrong place. > Besides, I like having the formula match BSpec exactly. And I > can't see how the current code would give us overflows, that's why I > kept it using uint32_t. It doesn't overflow, but it does round up the the line time value, which the bspec formula doesn't do. > >> + ret = DIV_ROUND_UP(ret, 64) + 2; > >> + return ret; > >> +} > >> + > >> +struct hsw_pipe_wm_parameters { > >> + bool active; > >> + bool sprite_enabled; > >> + uint8_t pri_bytes_per_pixel; > >> + uint8_t spr_bytes_per_pixel; > >> + uint8_t cur_bytes_per_pixel; > >> + uint32_t pri_horiz_pixels; > >> + uint32_t spr_horiz_pixels; > >> + uint32_t cur_horiz_pixels; > >> + uint32_t pipe_htotal; > >> + uint32_t pixel_rate; > >> +}; > >> + > >> +struct hsw_wm_values { > >> + uint32_t wm_pipe[3]; > >> + uint32_t wm_lp[3]; > >> + uint32_t wm_lp_spr[3]; > >> + uint32_t wm_linetime[3]; > >> +}; > >> + > >> +enum hsw_data_buf_partitioning { > >> + HSW_DATA_BUF_PART_1_2, > >> + HSW_DATA_BUF_PART_5_6, > >> +}; > >> + > >> +/* Only for WM_PIPE. */ > >> +static uint32_t hsw_compute_pri_wm_pipe(struct hsw_pipe_wm_parameters *params, > >> + uint32_t mem_value) > >> +{ > >> + /* TODO: for now, assume the primary plane is always enabled. */ > >> + if (!params->active) > >> + return 0; > >> + > >> + return hsw_wm_method1(params->pixel_rate, > >> + params->pri_bytes_per_pixel, > >> + mem_value); > >> +} > >> + > >> +/* For both WM_PIPE and WM_LP. */ > >> +static uint32_t hsw_compute_spr_wm(struct hsw_pipe_wm_parameters *params, > >> + uint32_t mem_value) > >> +{ > >> + uint32_t method1, method2; > >> + > >> + if (!params->active || !params->sprite_enabled) > >> + return 0; > >> + > >> + method1 = hsw_wm_method1(params->pixel_rate, > >> + params->spr_bytes_per_pixel, > >> + mem_value); > >> + method2 = hsw_wm_method2(params->pixel_rate, > >> + params->pipe_htotal, > >> + params->spr_horiz_pixels, > >> + params->spr_bytes_per_pixel, > >> + mem_value); > >> + return min(method1, method2); > >> +} > >> + > >> +/* For both WM_PIPE and WM_LP. */ > >> +static uint32_t hsw_compute_cur_wm(struct hsw_pipe_wm_parameters *params, > >> + uint32_t mem_value) > >> +{ > >> + if (!params->active) > >> + return 0; > >> + > >> + return hsw_wm_method2(params->pixel_rate, > >> + params->pipe_htotal, > >> + params->cur_horiz_pixels, > >> + params->cur_bytes_per_pixel, > >> + mem_value); > >> +} > >> + > >> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv, > >> + uint32_t mem_value, enum pipe pipe, > >> + struct hsw_pipe_wm_parameters *params) > >> +{ > >> + uint32_t pri_val, cur_val, spr_val; > >> + > >> + pri_val = hsw_compute_pri_wm_pipe(params, mem_value); > >> + spr_val = hsw_compute_spr_wm(params, mem_value); > >> + cur_val = hsw_compute_cur_wm(params, mem_value); > >> + > >> + WARN(pri_val > 127, > >> + "Primary WM error, mode not supported for pipe %c\n", > >> + pipe_name(pipe)); > >> + WARN(spr_val > 127, > >> + "Sprite WM error, mode not supported for pipe %c\n", > >> + pipe_name(pipe)); > >> + WARN(cur_val > 63, > >> + "Cursor WM error, mode not supported for pipe %c\n", > >> + pipe_name(pipe)); > >> + > >> + return (pri_val << WM0_PIPE_PLANE_SHIFT) | > >> + (spr_val << WM0_PIPE_SPRITE_SHIFT) | > >> + cur_val; > >> +} > >> + > >> +static uint32_t > >> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) > >> { > >> struct drm_i915_private *dev_priv = dev->dev_private; > >> struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > >> - enum pipe pipe = intel_crtc->pipe; > >> struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode; > >> u32 linetime, ips_linetime; > >> > >> - if (!intel_crtc_active(crtc)) { > >> - I915_WRITE(PIPE_WM_LINETIME(pipe), 0); > >> - return; > >> - } > >> + if (!intel_crtc_active(crtc)) > >> + return 0; > >> > >> /* The WM are computed with base on how long it takes to fill a single > >> * row at the given clock rate, multiplied by 8. > >> @@ -2093,29 +2247,179 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) > >> ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8, > >> intel_ddi_get_cdclk_freq(dev_priv)); > >> > >> - I915_WRITE(PIPE_WM_LINETIME(pipe), > >> - PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | > >> - PIPE_WM_LINETIME_TIME(linetime)); > >> + return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | > >> + PIPE_WM_LINETIME_TIME(linetime); > >> } > >> > >> -static void haswell_update_wm(struct drm_device *dev) > >> +static void hsw_compute_wm_parameters(struct drm_device *dev, > >> + struct hsw_pipe_wm_parameters *params, > >> + uint32_t *wm) > >> { > >> struct drm_i915_private *dev_priv = dev->dev_private; > >> struct drm_crtc *crtc; > >> + struct drm_plane *plane; > >> + uint64_t sskpd = I915_READ64(MCH_SSKPD); > >> enum pipe pipe; > >> > >> - /* Disable the LP WMs before changine the linetime registers. This is > >> - * just a temporary code that will be replaced soon. */ > >> - I915_WRITE(WM3_LP_ILK, 0); > >> - I915_WRITE(WM2_LP_ILK, 0); > >> - I915_WRITE(WM1_LP_ILK, 0); > >> + if ((sskpd >> 56) & 0xFF) > >> + wm[0] = (sskpd >> 56) & 0xFF; > >> + else > >> + wm[0] = sskpd & 0xF; > >> + wm[1] = ((sskpd >> 4) & 0xFF) * 5; > >> + wm[2] = ((sskpd >> 12) & 0xFF) * 5; > >> + wm[3] = ((sskpd >> 20) & 0x1FF) * 5; > >> + wm[4] = ((sskpd >> 32) & 0x1FF) * 5; > >> + > >> + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { > >> + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > >> + struct hsw_pipe_wm_parameters *p; > >> + > >> + pipe = intel_crtc->pipe; > >> + p = ¶ms[pipe]; > >> + > >> + p->active = intel_crtc_active(crtc); > >> + if (!p->active) > >> + continue; > >> + > >> + p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal; > >> + p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc); > >> + p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8; > >> + p->cur_bytes_per_pixel = 4; > >> + p->pri_horiz_pixels = intel_crtc->config.adjusted_mode.hdisplay; > >> + p->cur_horiz_pixels = 64; > >> + } > >> + > >> + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { > >> + struct intel_plane *intel_plane = to_intel_plane(plane); > >> + struct hsw_pipe_wm_parameters *p; > >> + > >> + pipe = intel_plane->pipe; > >> + p = ¶ms[pipe]; > >> + > >> + p->sprite_enabled = intel_plane->wm.enable; > >> + p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel; > >> + p->spr_horiz_pixels = intel_plane->wm.horiz_pixels; > >> + } > >> +} > >> + > >> +static void hsw_compute_wm_results(struct drm_device *dev, > >> + struct hsw_pipe_wm_parameters *params, > >> + uint32_t *wm, > >> + struct hsw_wm_values *results) > >> +{ > >> + struct drm_i915_private *dev_priv = dev->dev_private; > >> + struct drm_crtc *crtc; > >> + enum pipe pipe; > >> + > >> + /* No support for LP WMs yet. */ > >> + results->wm_lp[2] = 0; > >> + results->wm_lp[1] = 0; > >> + results->wm_lp[0] = 0; > >> + results->wm_lp_spr[2] = 0; > >> + results->wm_lp_spr[1] = 0; > >> + results->wm_lp_spr[0] = 0; > >> + > >> + for_each_pipe(pipe) > >> + results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0], > >> + pipe, > >> + ¶ms[pipe]); > >> > >> for_each_pipe(pipe) { > >> crtc = dev_priv->pipe_to_crtc_mapping[pipe]; > >> - haswell_update_linetime_wm(dev, crtc); > >> + results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc); > >> } > >> +} > >> + > >> +/* > >> + * The spec says we shouldn't write when we don't need, because every write > >> + * causes WMs to be re-evaluated, expending some power. > >> + */ > >> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv, > >> + struct hsw_wm_values *results, > >> + enum hsw_data_buf_partitioning partitioning) > >> +{ > >> + struct hsw_wm_values previous; > >> + uint32_t val; > >> + enum hsw_data_buf_partitioning prev_partitioning; > >> + > >> + previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK); > >> + previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK); > >> + previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB); > >> + previous.wm_lp[0] = I915_READ(WM1_LP_ILK); > >> + previous.wm_lp[1] = I915_READ(WM2_LP_ILK); > >> + previous.wm_lp[2] = I915_READ(WM3_LP_ILK); > >> + previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); > >> + previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); > >> + previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); > >> + previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A)); > >> + previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B)); > >> + previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C)); > >> + > >> + prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? > >> + HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2; > >> + > >> + if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 && > >> + memcmp(results->wm_lp, previous.wm_lp, 3) == 0 && > >> + memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 && > >> + memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0 && > >> + partitioning == prev_partitioning) > >> + return; > >> + > >> + if (previous.wm_lp[2] != 0) > >> + I915_WRITE(WM3_LP_ILK, 0); > >> + if (previous.wm_lp[1] != 0) > >> + I915_WRITE(WM2_LP_ILK, 0); > >> + if (previous.wm_lp[0] != 0) > >> + I915_WRITE(WM1_LP_ILK, 0); > > > > I don't know if this conditional writing makes sense in such a fine > > granularity. We're anyway going to write some of the registeres, so > > maybe it's better to just go ahead and write all of them. It would > > at least make the code look a bit better. > > The documentation says "Do not write the watermark registers when > there is no need to change a value, as every write will cause the > watermarks to be re-evaluated, expending some power.". That comment is quite ambigous. It doesn't really tell you whether there's a significant difference in writing just one register or all of them. > I do recognize > the function looks a little bit ugly, but I think it's worth the cost, > especially since I imagine we're not going to change it too much in > the future. How can you know if it's worth the cost if you haven't measured it? > > In any case you'd at least need to make sure that you disable/re-enable > > the LP1+ watermarks if linetime WMs or DDB partitioning changes, > > regardless of whether the LP1+ watermarks themselves changed. > > We already do this. Oh right. My eyes glaced over when looking at that function, so I misread what it did. > > Thanks again for the review, > Paulo > > > > > >> + > >> + if (previous.wm_pipe[0] != results->wm_pipe[0]) > >> + I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); > >> + if (previous.wm_pipe[1] != results->wm_pipe[1]) > >> + I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); > >> + if (previous.wm_pipe[2] != results->wm_pipe[2]) > >> + I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); > >> + > >> + if (previous.wm_linetime[0] != results->wm_linetime[0]) > >> + I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); > >> + if (previous.wm_linetime[1] != results->wm_linetime[1]) > >> + I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); > >> + if (previous.wm_linetime[2] != results->wm_linetime[2]) > >> + I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); > >> + > >> + if (prev_partitioning != partitioning) { > >> + val = I915_READ(WM_MISC); > >> + if (partitioning == HSW_DATA_BUF_PART_1_2) > >> + val &= ~WM_MISC_DATA_PARTITION_5_6; > >> + else > >> + val |= WM_MISC_DATA_PARTITION_5_6; > >> + I915_WRITE(WM_MISC, val); > >> + } > >> + > >> + if (previous.wm_lp_spr[0] != results->wm_lp_spr[0]) > >> + I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); > >> + if (previous.wm_lp_spr[1] != results->wm_lp_spr[1]) > >> + I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); > >> + if (previous.wm_lp_spr[2] != results->wm_lp_spr[2]) > >> + I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); > >> + > >> + if (results->wm_lp[0] != 0) > >> + I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); > >> + if (results->wm_lp[1] != 0) > >> + I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); > >> + if (results->wm_lp[2] != 0) > >> + I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); > >> +} > >> + > >> +static void haswell_update_wm(struct drm_device *dev) > >> +{ > >> + struct drm_i915_private *dev_priv = dev->dev_private; > >> + struct hsw_pipe_wm_parameters params[3]; > >> + struct hsw_wm_values results; > >> + uint32_t wm[5]; > >> > >> - sandybridge_update_wm(dev); > >> + hsw_compute_wm_parameters(dev, params, wm); > >> + hsw_compute_wm_results(dev, params, wm, &results); > >> + hsw_write_wm_values(dev_priv, &results, HSW_DATA_BUF_PART_1_2); > >> } > >> > >> static void haswell_update_sprite_wm(struct drm_device *dev, int pipe, > >> -- > >> 1.8.1.2 > >> > >> _______________________________________________ > >> Intel-gfx mailing list > >> Intel-gfx at lists.freedesktop.org > >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > > -- > > Ville Syrj?l? > > Intel OTC > > > > -- > Paulo Zanoni -- Ville Syrj?l? Intel OTC