On Tue, 17 Sep 2019, Swati Sharma <swati2.sharma@xxxxxxxxx> wrote: > For icl+, have hw read out to create hw blob of gamma > lut values. icl+ platforms supports multi segmented gamma > mode, add hw lut creation for this mode. > > This will be used to validate gamma programming using dsb > (display state buffer) which is a tgl feature. > > v2: -readout code for multisegmented gamma has to come > up with some intermediate entries that aren't preserved > in hardware (Jani N) > -linear interpolation (Ville) > -moved common code to check gamma_enable to specific funcs, > since icl doesn't support that > > Signed-off-by: Swati Sharma <swati2.sharma@xxxxxxxxx> > --- > drivers/gpu/drm/i915/display/intel_color.c | 243 ++++++++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_reg.h | 7 + > 2 files changed, 230 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c > index b1f0f7e..0008011 100644 > --- a/drivers/gpu/drm/i915/display/intel_color.c > +++ b/drivers/gpu/drm/i915/display/intel_color.c > @@ -1370,6 +1370,9 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) > > static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) > { > + if (!crtc_state->gamma_enable) > + return 0; > + Why are you moving these checks back to the individual functions? > switch (crtc_state->gamma_mode) { > case GAMMA_MODE_MODE_8BIT: > return 8; > @@ -1383,6 +1386,9 @@ static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) > > static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) > { > + if (!crtc_state->gamma_enable) > + return 0; > + > if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) > return 0; > > @@ -1399,6 +1405,9 @@ static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) > > static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) > { > + if (!crtc_state->gamma_enable) > + return 0; > + > if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) > return 10; > else > @@ -1407,6 +1416,9 @@ static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) > > static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) > { > + if (!crtc_state->gamma_enable) > + return 0; > + > switch (crtc_state->gamma_mode) { > case GAMMA_MODE_MODE_8BIT: > return 8; > @@ -1418,21 +1430,39 @@ static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) > } > } > > +static int icl_gamma_precision(const struct intel_crtc_state *crtc_state) > +{ > + if ((crtc_state->gamma_mode & POST_CSC_GAMMA_ENABLE) == 0) > + return 0; > + > + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { > + case GAMMA_MODE_MODE_8BIT: > + return 8; > + case GAMMA_MODE_MODE_10BIT: > + return 10; > + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: > + return 16; > + default: > + MISSING_CASE(crtc_state->gamma_mode); > + return 0; > + } > + > +} > + > int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state) > { > struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > > - if (!crtc_state->gamma_enable) > - return 0; > - Why? > if (HAS_GMCH(dev_priv)) { > if (IS_CHERRYVIEW(dev_priv)) > return chv_gamma_precision(crtc_state); > else > return i9xx_gamma_precision(crtc_state); > } else { > - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) > + if (INTEL_GEN(dev_priv) >= 11) > + return icl_gamma_precision(crtc_state); > + else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) > return glk_gamma_precision(crtc_state); > else if (IS_IRONLAKE(dev_priv)) > return ilk_gamma_precision(crtc_state); > @@ -1463,6 +1493,30 @@ static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1, > return true; > } > > +static bool intel_color_lut_entry_multi_equal(struct drm_color_lut *lut1, > + struct drm_color_lut *lut2, > + int lut_size, u32 err) > +{ > + int i; > + > + for (i = 0; i < 9; i++) { > + if (!err_check(&lut1[i], &lut2[i], err)) > + return false; > + } > + > + for (i = 1; i < 257; i++) { ^ extra space > + if (!err_check(&lut1[i * 8], &lut2[i * 8], err)) > + return false; > + } i == 8 will be checked twice. > + > + for (i = 0; i < 256; i++) { > + if (!err_check(&lut1[i * 8 * 128], &lut2[i * 8 * 128], err)) > + return false; > + } i == 0 will be checked twice. I note that these indices match the programming part, so maybe better to keep them as they are here. No harm done I guess. > + > + return true; > +} > + > bool intel_color_lut_equal(struct drm_property_blob *blob1, > struct drm_property_blob *blob2, > u32 gamma_mode, u32 bit_precision) > @@ -1481,16 +1535,8 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, > lut_size2 = drm_color_lut_size(blob2); > > /* check sw and hw lut size */ > - switch (gamma_mode) { > - case GAMMA_MODE_MODE_8BIT: > - case GAMMA_MODE_MODE_10BIT: > - if (lut_size1 != lut_size2) > - return false; > - break; > - default: > - MISSING_CASE(gamma_mode); > - return false; > - } > + if (lut_size1 != lut_size2) > + return false; > > lut1 = blob1->data; > lut2 = blob2->data; > @@ -1498,13 +1544,18 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, > err = 0xffff >> bit_precision; > > /* check sw and hw lut entry to be equal */ > - switch (gamma_mode) { > + switch (gamma_mode & GAMMA_MODE_MODE_MASK) { > case GAMMA_MODE_MODE_8BIT: > case GAMMA_MODE_MODE_10BIT: > if (!intel_color_lut_entry_equal(lut1, lut2, > lut_size2, err)) > return false; > break; > + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: > + if (!intel_color_lut_entry_multi_equal(lut1, lut2, > + lut_size2, err)) > + return false; > + break; > default: > MISSING_CASE(gamma_mode); > return false; > @@ -1744,6 +1795,157 @@ static void glk_read_luts(struct intel_crtc_state *crtc_state) > crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); > } > > +static struct drm_color_lut * > +icl_compute_interpolated_gamma_blob(struct drm_color_lut *tmp_lut, > + struct drm_color_lut *lut, u32 lut_size) > +{ I think you should just pass in the the actual lut, and not use a temp lut at all. See my comments below for icl_read_lut_multi_seg() function. > + __u16 a_red, b_red, a_green, b_green, a_blue, b_blue; > + __u16 red_step, green_step, blue_step; u16, not __u16. > + int i, j, k, m, n; > + > + for (i = 0, k = 0; k < 9; i++, k++) { > + lut[i].red = tmp_lut[k].red; > + lut[i].green = tmp_lut[k].green; > + lut[i].blue = tmp_lut[k].blue; > + } With a single lut, you can skip this. > + > + for (k = 9; k < 264; k++) { > + a_red = tmp_lut[k].red; > + b_red = tmp_lut[k + 1].red; > + red_step = (b_red - a_red) / 8; > + > + a_green = tmp_lut[k].green; > + b_green = tmp_lut[k + 1].green; > + green_step = (b_green - a_green) / 8; > + > + a_blue = tmp_lut[k].blue; > + b_blue = tmp_lut[k + 1].blue; > + blue_step = (b_blue - a_blue) / 8; > + > + for (j = 0; j < 8; j++) { > + lut[i].red = lut[i - 1].red + red_step; > + lut[i].green = lut[i - 1].green + green_step; > + lut[i].blue = lut[i - 1].blue + blue_step; > + > + i++; > + } > + } This would be written in a way to only cover the values that need to be interpolated. for (i = 1; i < 257 - 1; i++) { start = i * 8; end = (i + 1) * 8; steps = end - start; for (j = start + 1; j < end; j++) { } } Fill in the gaps. For me I think this is easier to grasp and compare against the readout and write. I find it very hard to check the ranges in the loops. > + > + for (k = 265; k < 521; k++) { > + a_red = tmp_lut[k].red; > + b_red = tmp_lut[k + 1].red; > + red_step = ((b_red - a_red) / 127) / 8; > + > + a_green = tmp_lut[k].green; > + b_green = tmp_lut[k + 1].green; > + green_step = ((b_green - a_green) / 127) / 8; > + > + a_blue = tmp_lut[k].blue; > + b_blue = tmp_lut[k + 1].blue; > + blue_step = ((b_blue - a_blue) / 127) / 8; > + > + for (m = 0; m < 127; m++) { > + for (n = 0; n < 8; n++) { > + lut[i].red = lut[i - 1].red + red_step; > + lut[i].green = lut[i - 1].green + green_step; > + lut[i].blue = lut[i - 1].blue + blue_step; > + > + i++; > + } > + } > + } Similarly: for (i = 0; i < 256 - 1; i++) { start = i * 8 * 128; end = (i + 1) * 8 * 128; steps = end - start; for (j = start + 1; j < end; j++) { } } > + > + return lut; > +} > + > +static struct drm_property_blob * > +icl_read_lut_multi_seg(const struct intel_crtc_state *crtc_state) > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > + int lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; > + int tmp_lut_size = 522; > + enum pipe pipe = crtc->pipe; > + struct drm_property_blob *blob, *tmp_blob; > + struct drm_color_lut *blob_data, *tmp_blob_data; > + u32 i, val1, val2; > + > + blob = drm_property_create_blob(&dev_priv->drm, > + sizeof(struct drm_color_lut) * lut_size, > + NULL); > + tmp_blob = drm_property_create_blob(&dev_priv->drm, > + sizeof(struct drm_color_lut) * tmp_lut_size, > + NULL); You end up leaking the temporary blob. But I don't think you really need the temporary blob at all. Read on. > + if (IS_ERR(blob) || IS_ERR(tmp_blob)) > + return NULL; > + > + blob_data = blob->data; > + tmp_blob_data = tmp_blob->data; > + > + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > + > + for (i = 0; i < 9; i++) { > + val1 = I915_READ(PREC_PAL_MULTI_SEG_DATA(pipe)); > + val2 = I915_READ(PREC_PAL_MULTI_SEG_DATA(pipe)); > + > + tmp_blob_data[i].red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, val1); > + tmp_blob_data[i].green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, val1); > + tmp_blob_data[i].blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, val1); > + } > + > + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > + > + for (i = 1; i < 257; i++) { > + val1 = I915_READ(PREC_PAL_DATA(pipe)); > + val2 = I915_READ(PREC_PAL_DATA(pipe)); > + > + tmp_blob_data[i + 8].red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, val1); > + tmp_blob_data[i + 8].green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, val1); > + tmp_blob_data[i + 8].blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, val1); > + } > + > + for (i = 0; i < 256; i++) { > + val1 = I915_READ(PREC_PAL_DATA(pipe)); > + val2 = I915_READ(PREC_PAL_DATA(pipe)); > + > + tmp_blob_data[i + 265].red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, val1); > + tmp_blob_data[i + 265].green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, val1); > + tmp_blob_data[i + 265].blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, val2) << 6 | > + REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, val1); > + } > + > + tmp_blob_data[521].red = REG_FIELD_GET(PREC_PAL_GC_MAX_RGB_MASK, > + I915_READ(PREC_PAL_GC_MAX(pipe, 0))); > + tmp_blob_data[521].green = REG_FIELD_GET(PREC_PAL_GC_MAX_RGB_MASK, > + I915_READ(PREC_PAL_GC_MAX(pipe, 1))); > + tmp_blob_data[521].blue = REG_FIELD_GET(PREC_PAL_GC_MAX_RGB_MASK, > + I915_READ(PREC_PAL_GC_MAX(pipe, 1))); In the above, I think you should just read the values directly to the right locations in the actual blob. Then all the indices match the programming side, and it's easier to review. > + > + blob_data = icl_compute_interpolated_gamma_blob(tmp_blob_data, blob_data, lut_size); And then you fill in the gaps in the interpolation function. > + > + return blob; > +} > + > +static void icl_read_luts(struct intel_crtc_state *crtc_state) > +{ > + if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > + GAMMA_MODE_MODE_8BIT) > + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); > + else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > + GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) > + crtc_state->base.gamma_lut = icl_read_lut_multi_seg(crtc_state); > + else > + crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); > +} > + > void intel_color_init(struct intel_crtc *crtc) > { > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > @@ -1785,16 +1987,17 @@ void intel_color_init(struct intel_crtc *crtc) > else > dev_priv->display.color_commit = ilk_color_commit; > > - if (INTEL_GEN(dev_priv) >= 11) > + if (INTEL_GEN(dev_priv) >= 11) { > dev_priv->display.load_luts = icl_load_luts; > - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { > + dev_priv->display.read_luts = icl_read_luts; > + } else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { > dev_priv->display.load_luts = glk_load_luts; > dev_priv->display.read_luts = glk_read_luts; > - } else if (INTEL_GEN(dev_priv) >= 8) > + } else if (INTEL_GEN(dev_priv) >= 8) { > dev_priv->display.load_luts = bdw_load_luts; > - else if (INTEL_GEN(dev_priv) >= 7) > + } else if (INTEL_GEN(dev_priv) >= 7) { > dev_priv->display.load_luts = ivb_load_luts; > - else { > + } else { Shouldn't the cleanup be part of patch 1? > dev_priv->display.load_luts = ilk_load_luts; > dev_priv->display.read_luts = ilk_read_luts; > } > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index bf37ece..844dd62 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -10378,6 +10378,7 @@ enum skl_power_gate { > > #define PREC_PAL_INDEX(pipe) _MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B) > #define PREC_PAL_DATA(pipe) _MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B) > +#define PREC_PAL_GC_MAX_RGB_MASK REG_GENMASK(15, 0) > #define PREC_PAL_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4) > #define PREC_PAL_EXT_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4) > #define PREC_PAL_EXT2_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT2_GC_MAX_A, _PAL_PREC_EXT2_GC_MAX_B) + (i) * 4) > @@ -10401,6 +10402,12 @@ enum skl_power_gate { > > #define _PAL_PREC_MULTI_SEG_DATA_A 0x4A40C > #define _PAL_PREC_MULTI_SEG_DATA_B 0x4AC0C > +#define PAL_PREC_MULTI_SEG_RED_LDW_MASK REG_GENMASK(29, 24) > +#define PAL_PREC_MULTI_SEG_RED_UDW_MASK REG_GENMASK(29, 20) > +#define PAL_PREC_MULTI_SEG_GREEN_LDW_MASK REG_GENMASK(19, 14) > +#define PAL_PREC_MULTI_SEG_GREEN_UDW_MASK REG_GENMASK(19, 10) > +#define PAL_PREC_MULTI_SEG_BLUE_LDW_MASK REG_GENMASK(9, 4) > +#define PAL_PREC_MULTI_SEG_BLUE_UDW_MASK REG_GENMASK(9, 0) > > #define PREC_PAL_MULTI_SEG_INDEX(pipe) _MMIO_PIPE(pipe, \ > _PAL_PREC_MULTI_SEG_INDEX_A, \ -- Jani Nikula, Intel Open Source Graphics Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx