>-----Original Message----- >From: Ville Syrjälä [mailto:ville.syrjala@xxxxxxxxxxxxxxx] >Sent: Friday, March 29, 2019 4:17 PM >To: Roper, Matthew D <matthew.d.roper@xxxxxxxxx> >Cc: intel-gfx@xxxxxxxxxxxxxxxxxxxxx; Shankar, Uma <uma.shankar@xxxxxxxxx> >Subject: Re: [PATCH 2/6] drm/i915: Don't use split gamma when we don't have to > >On Thu, Mar 28, 2019 at 05:16:03PM -0700, Matt Roper wrote: >> On Thu, Mar 28, 2019 at 11:05:01PM +0200, Ville Syrjala wrote: >> > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> >> > >> > Using the split gamma mode when we don't have to has the annoying >> > requirement of loading a linear LUT to the unused half. Instead >> > let's make life simpler by switching to the 10bit gamma mode and >> > duplicating each entry. >> > >> > This also allows us to load the software gamma LUT into the hardware >> > degamma LUT, thus removing some of the buggy configurations we >> > currently allow (YCbCr/limited range RGB >> > + gamma LUT). We do still have other configurations that are >> > also buggy, but those will need more complicated fixes or they just >> > need to be rejected. Sadly GLK doesn't have this flexibility anymore >> > and the degamma and gamma LUTs are very different so no help there. >> > >> > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> >> > --- >> > drivers/gpu/drm/i915/i915_reg.h | 1 + >> > drivers/gpu/drm/i915/intel_color.c | 159 >> > +++++++++++++++-------------- >> > 2 files changed, 86 insertions(+), 74 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/i915/i915_reg.h >> > b/drivers/gpu/drm/i915/i915_reg.h index c866379a521b..eb7e93354cfe >> > 100644 >> > --- a/drivers/gpu/drm/i915/i915_reg.h >> > +++ b/drivers/gpu/drm/i915/i915_reg.h >> > @@ -10127,6 +10127,7 @@ enum skl_power_gate { >> > #define PAL_PREC_SPLIT_MODE (1 << 31) >> > #define PAL_PREC_AUTO_INCREMENT (1 << 15) >> > #define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0) >> > +#define PAL_PREC_INDEX_VALUE(x) ((x) << 0) >> > #define _PAL_PREC_DATA_A 0x4A404 >> > #define _PAL_PREC_DATA_B 0x4AC04 >> > #define _PAL_PREC_DATA_C 0x4B404 >> > diff --git a/drivers/gpu/drm/i915/intel_color.c >> > b/drivers/gpu/drm/i915/intel_color.c >> > index d7c38a2bbd8f..ed4bd9bd15f5 100644 >> > --- a/drivers/gpu/drm/i915/intel_color.c >> > +++ b/drivers/gpu/drm/i915/intel_color.c >> > @@ -466,72 +466,32 @@ static void skl_color_commit(const struct >intel_crtc_state *crtc_state) >> > ilk_load_csc_matrix(crtc_state); >> > } >> > >> > -static void bdw_load_degamma_lut(const struct intel_crtc_state >> > *crtc_state) >> > +static void bdw_load_lut_10(struct intel_crtc *crtc, >> > + const struct drm_property_blob *blob, >> > + u32 prec_index, bool duplicate) >> > { >> > - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >> > - const struct drm_property_blob *degamma_lut = crtc_state- >>base.degamma_lut; >> > - u32 i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; >> > + const struct drm_color_lut *lut = blob->data; >> > + int i, lut_size = drm_color_lut_size(blob); >> > enum pipe pipe = crtc->pipe; >> > >> > - I915_WRITE(PREC_PAL_INDEX(pipe), >> > - PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); >> > - >> > - if (degamma_lut) { >> > - const struct drm_color_lut *lut = degamma_lut->data; >> > + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | >> > + PAL_PREC_AUTO_INCREMENT); >> > >> > - for (i = 0; i < lut_size; i++) >> > - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i])); >> > - } else { >> > + /* >> > + * We advertize the split gamma sizes. When not using split >> > + * gamma we just duplicate each entry. >> > + * >> > + * TODO: expose the full LUT to userspace >> >> Any reason not to just do this immediately? Throwing away half the >> table entries if we decide we need split mode doesn't seem any harder >> than duplicating the entries when we decide we don't. The color >> management kerneldoc already explicitly recommends this approach for >> hardware that can support multiple gamma modes, so I don't think we >> need any new ABI to handle it. > >Hmm. I guess that apporach could be doable. It might be a bit annoying for userspace >though if it expects a direct color visual. But at least for X we won't use >degamma/ctm anyway so seems like it should work out just fine. > >> >> > + */ >> > + if (duplicate) { >> > for (i = 0; i < lut_size; i++) { >> > - u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1); >> > - >> > - I915_WRITE(PREC_PAL_DATA(pipe), >> > - (v << 20) | (v << 10) | v); >> > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i])); >> > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i])); >> > } >> > - } >> > -} >> > - >> > -static void bdw_load_gamma_lut(const struct intel_crtc_state >> > *crtc_state, u32 offset) -{ >> > - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> > - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >> > - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; >> > - u32 i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; >> > - enum pipe pipe = crtc->pipe; >> > - >> > - WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); >> > - >> > - I915_WRITE(PREC_PAL_INDEX(pipe), >> > - (offset ? PAL_PREC_SPLIT_MODE : 0) | >> > - PAL_PREC_AUTO_INCREMENT | >> > - offset); >> > - >> > - if (gamma_lut) { >> > - const struct drm_color_lut *lut = gamma_lut->data; >> > - >> > + } else { >> > for (i = 0; i < lut_size; i++) >> > I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i])); >> > - >> > - /* Program the max register to clamp values > 1.0. */ >> > - i = lut_size - 1; >> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), >> > - drm_color_lut_extract(lut[i].red, 16)); >> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), >> > - drm_color_lut_extract(lut[i].green, 16)); >> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), >> > - drm_color_lut_extract(lut[i].blue, 16)); >> > - } else { >> > - for (i = 0; i < lut_size; i++) { >> > - u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1); >> > - >> > - I915_WRITE(PREC_PAL_DATA(pipe), >> > - (v << 20) | (v << 10) | v); >> > - } >> > - >> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16) - 1); >> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1); >> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1); >> > } >> > >> > /* >> > @@ -541,18 +501,43 @@ static void bdw_load_gamma_lut(const struct >intel_crtc_state *crtc_state, u32 of >> > I915_WRITE(PREC_PAL_INDEX(pipe), 0); } >> > >> > -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ >> > -static void broadwell_load_luts(const struct intel_crtc_state >> > *crtc_state) >> > +static void bdw_load_lut_10_max(struct intel_crtc *crtc, >> > + const struct drm_property_blob *blob) >> > { >> > - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >> > + const struct drm_color_lut *lut = blob->data; >> > + int i = drm_color_lut_size(blob) - 1; >> > + enum pipe pipe = crtc->pipe; >> > >> > - if (crtc_state_is_legacy_gamma(crtc_state)) { >> > + /* Program the max register to clamp values > 1.0. */ >> > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), >> > + drm_color_lut_extract(lut[i].red, 16)); >> > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), >> > + drm_color_lut_extract(lut[i].green, 16)); >> > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), >> > + drm_color_lut_extract(lut[i].blue, 16)); >> >> Are these the right registers to use? The "Pipe Palette and Gamma" >> page of the bspec indicates we should be using PAL_EXT_GC_MAX (e.g., >> 0x4A420 instead of 0x4A410) for both the 10-bit and split gamma modes. >> I only see PAL_GC_MAX mentioned for the interpolated gamma mode. > >Yeah, these aren't the registers we're looking for. Maybe Uma will send a patch to fix >this? I'm also thinking we should just program these to 1.0. There's also >EXT_GC_MAX2 on some more recent platforms. Yes, sent now to list :). Please review. Regards, Uma Shankar >> >> >> Matt >> >> > +} >> > + >> > +static void bdw_load_luts(const struct intel_crtc_state >> > +*crtc_state) { >> > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> > + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; >> > + const struct drm_property_blob *degamma_lut = >> > +crtc_state->base.degamma_lut; >> > + >> > + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { >> > i9xx_load_luts(crtc_state); >> > + } else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) { >> > + bdw_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE | >> > + PAL_PREC_INDEX_VALUE(0), false); >> > + bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE | >> > + PAL_PREC_INDEX_VALUE(512), false); >> > + bdw_load_lut_10_max(crtc, gamma_lut); >> > } else { >> > - bdw_load_degamma_lut(crtc_state); >> > - bdw_load_gamma_lut(crtc_state, >> > - INTEL_INFO(dev_priv)->color.degamma_lut_size); >> > + const struct drm_property_blob *blob = gamma_lut ?: degamma_lut; >> > + >> > + bdw_load_lut_10(crtc, blob, >> > + PAL_PREC_INDEX_VALUE(0), true); >> > + bdw_load_lut_10_max(crtc, blob); >> > } >> > } >> > >> > @@ -624,6 +609,9 @@ static void glk_load_degamma_lut_linear(const >> > struct intel_crtc_state *crtc_stat >> > >> > static void glk_load_luts(const struct intel_crtc_state >> > *crtc_state) { >> > + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; >> > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> > + >> > /* >> > * On GLK+ both pipe CSC and degamma LUT are controlled >> > * by csc_enable. Hence for the cases where the CSC is @@ -637,22 >> > +625,28 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) >> > else >> > glk_load_degamma_lut_linear(crtc_state); >> > >> > - if (crtc_state_is_legacy_gamma(crtc_state)) >> > + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { >> > i9xx_load_luts(crtc_state); >> > - else >> > - bdw_load_gamma_lut(crtc_state, 0); >> > + } else { >> > + bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0), >false); >> > + bdw_load_lut_10_max(crtc, gamma_lut); >> > + } >> > } >> > >> > static void icl_load_luts(const struct intel_crtc_state >> > *crtc_state) { >> > + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; >> > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> > + >> > if (crtc_state->base.degamma_lut) >> > glk_load_degamma_lut(crtc_state); >> > >> > - if (crtc_state_is_legacy_gamma(crtc_state)) >> > + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { >> > i9xx_load_luts(crtc_state); >> > - else >> > - /* ToDo: Add support for multi segment gamma LUT */ >> > - bdw_load_gamma_lut(crtc_state, 0); >> > + } else { >> > + bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0), >false); >> > + bdw_load_lut_10_max(crtc, gamma_lut); >> > + } >> > } >> > >> > static void cherryview_load_luts(const struct intel_crtc_state >> > *crtc_state) @@ -937,8 +931,25 @@ static u32 bdw_gamma_mode(const struct >intel_crtc_state *crtc_state) >> > if (!crtc_state->gamma_enable || >> > crtc_state_is_legacy_gamma(crtc_state)) >> > return GAMMA_MODE_MODE_8BIT; >> > - else >> > + else if (crtc_state->base.gamma_lut && >> > + crtc_state->base.degamma_lut) >> > return GAMMA_MODE_MODE_SPLIT; >> > + else >> > + return GAMMA_MODE_MODE_10BIT; >> > +} >> > + >> > +static u32 bdw_csc_mode(const struct intel_crtc_state *crtc_state) >> > +{ >> > + /* >> > + * CSC comes after the LUT in degamma, RGB->YCbCr, >> > + * and RGB full->limited range mode. >> > + */ >> > + if (crtc_state->base.degamma_lut || >> > + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB || >> > + crtc_state->limited_color_range) >> > + return 0; >> > + >> > + return CSC_POSITION_BEFORE_GAMMA; >> > } >> > >> > static int bdw_color_check(struct intel_crtc_state *crtc_state) @@ >> > -960,7 +971,7 @@ static int bdw_color_check(struct intel_crtc_state >> > *crtc_state) >> > >> > crtc_state->gamma_mode = bdw_gamma_mode(crtc_state); >> > >> > - crtc_state->csc_mode = 0; >> > + crtc_state->csc_mode = bdw_csc_mode(crtc_state); >> > >> > ret = intel_color_add_affected_planes(crtc_state); >> > if (ret) >> > @@ -1094,7 +1105,7 @@ void intel_color_init(struct intel_crtc *crtc) >> > else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) >> > dev_priv->display.load_luts = glk_load_luts; >> > else if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) >> > - dev_priv->display.load_luts = broadwell_load_luts; >> > + dev_priv->display.load_luts = bdw_load_luts; >> > else >> > dev_priv->display.load_luts = i9xx_load_luts; >> > } >> > -- >> > 2.19.2 >> > >> >> -- >> Matt Roper >> Graphics Software Engineer >> IoTG Platform Enabling & Development >> Intel Corporation >> (916) 356-2795 > >-- >Ville Syrjälä >Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx