On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote: > Regards > > Shashank > > On 5/3/2019 9:20 PM, Ville Syrjälä wrote: > > On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: > >> ICL introduces a new gamma correction mode in display engine, called > >> multi-segmented-gamma mode. This mode allows users to program the > >> darker region of the gamma curve with sueprfine precision. An > >> example use case for this is HDR curves (like PQ ST-2084). > >> > >> If we plot a gamma correction curve from value range between 0.0 to 1.0, > >> ICL's multi-segment has 3 different sections: > >> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) > >> - fine segment: 257 values, ranges between 0 - 1/(128) > >> - corase segment: 257 values, ranges between 0 - 1 > >> > >> This patch: > >> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), > >> so that userspace can program with highest precision supported. > >> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. > >> - Adds functions to program/detect multi-segment gamma. > >> > >> V2: Addressed review comments from Ville > >> - separate function for superfine and fine segments. > >> - remove enum for segments. > >> - reuse last entry of the LUT as gc_max value. > >> - replace if() ....cond with switch...case in icl_load_luts. > >> - add an entry variable, instead of 'word' > >> > >> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > >> Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> > >> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > >> > >> Suggested-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > >> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxxxx> > >> Signed-off-by: Uma Shankar <uma.shankar@xxxxxxxxx> > >> --- > >> drivers/gpu/drm/i915/i915_pci.c | 3 +- > >> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- > >> 2 files changed, 123 insertions(+), 5 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > >> index ffa2ee70a03d..83698951760b 100644 > >> --- a/drivers/gpu/drm/i915/i915_pci.c > >> +++ b/drivers/gpu/drm/i915/i915_pci.c > >> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { > >> GEN(11), \ > >> .ddb_size = 2048, \ > >> .has_logical_ring_elsq = 1, \ > >> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } > >> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } > > Ugh. Thats one big LUT. But looks correct. > > > >> + > > Bogus newline. > Got it. > >> > >> static const struct intel_device_info intel_icelake_11_info = { > >> GEN11_FEATURES, > >> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c > >> index 6c341bea514c..49831e8d02fb 100644 > >> --- a/drivers/gpu/drm/i915/intel_color.c > >> +++ b/drivers/gpu/drm/i915/intel_color.c > >> @@ -41,6 +41,9 @@ > >> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) > >> > >> #define LEGACY_LUT_LENGTH 256 > >> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) > >> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 > >> + > >> /* > >> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point > >> * format). This macro takes the coefficient we want transformed and the > >> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) > >> } > >> } > >> > >> +/* ilk+ "12.4" interpolated format (high 10 bits) */ > >> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) > >> +{ > >> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | > >> + (color->blue >> 6); > >> +} > >> + > >> +/* ilk+ "12.4" interpolated format (low 6 bits) */ > >> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) > >> +{ > >> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | > >> + (color->blue & 0x3f); > >> +} > >> + > >> +static void > >> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, > >> + const struct drm_color_lut *entry) > > Indentation looks off. Also s/entry/color/ to match the other similarish > > funcs maybe? > Sure. > >> +{ > >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >> + enum pipe pipe = crtc->pipe; > >> + > >> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ > >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); > >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); > >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); > >> +} > >> + > >> +static void > >> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) > >> +{ > >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >> + const struct drm_color_lut *lut = blob->data; > >> + enum pipe pipe = crtc->pipe; > >> + u32 i; > >> + > >> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) > >> + return; > > These checks aren't needed. Just dead code. > Will remove this and similars. > >> + > >> + /* > >> + * Every entry in the multi-segment LUT is corresponding to a superfine > >> + * segment step which is 1/(8 * 128 * 256). > >> + * > >> + * Superfine segment has 9 entries, corresponding to values > >> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). > >> + */ > >> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > >> + > >> + for (i = 0; i < 9; i++) { > >> + const struct drm_color_lut *entry = &lut[i]; > >> + > >> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >> + ilk_lut_12p4_udw(entry)); > > ldw should come before udw. > Got it. > >> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >> + ilk_lut_12p4_ldw(entry)); > >> + } > >> +} > >> + > >> +static void > >> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) > >> +{ > >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >> + const struct drm_color_lut *lut = blob->data; > >> + const struct drm_color_lut *entry; > > 'entry' declaration can be moved into the loops. > Its being used in multiple loops, IMO still better to move into the loops. We don't want to pass any information between the loops. > also being used for GCMax outside the > loop. Hmm. That might be an arguemnt for keeping it out. But the current gcmax usage looks broken. You're programming the same value into the last PAL_PREC index and GCMAX. > >> + enum pipe pipe = crtc->pipe; > >> + u32 i; > >> + > >> + if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH)) > >> + return; > > More checks that aren't needed. > Got it. > >> + > >> + /* > >> + * Every entry in the multi-segment LUT is corresponding to a superfine > >> + * segment step which is 1/(8*128*256). > >> + * > >> + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) > >> + * ... 256/(128*256). So in order to program fine segment of LUT we > >> + * need to pick every 8'th entry in LUT, and program 256 indexes. > >> + * Fine segment's index 0 is programmed in HW, and it starts from > >> + * index 1. > > The wording here is a bit confusing. I guess the problem is what to call > > things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point > > seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's > > implicit but IIRC I was told long ago that it's not actually used. > > > > Not sure how to word that in the best way. Maybe something like? > > > > /* > > * Fine segment (seg2) ... > > * > > * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], > > * with seg2[0] being unused by the hardware. > > */ > > > > Not sure that's any clearer. > Ok, will try to come up with something in similar lines. > >> + */ > >> + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > >> + for (i = 1; i < 257; i++) { > >> + entry = &lut[i * 8]; > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); > >> + } > >> + > >> + /* > >> + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, > >> + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT > >> + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and > >> + * program 256 of those. > >> + */ > > Could make a note here stating that seg3[0] and seg3[1] are also unused > > by the hardware, even though we have to program them to advance the > > index. I don't see it mentioned in the spec, but this one I definitely > > remember confirming from Art way back when. However I never verified > > that on actual hw. We could also consider just programming those two > > entries to 0 and start the actual coarse segment programming from index 2. > > Or we could skip them by reprogramming the index directly. > If they are not being used, does it matter what and if we program into > them ? We can add a note though, mentioning this. It shouldn't matter what we programing into them. But as mentioned I never actually confirmed this on actual hardware. Would be nice to double check that so we don't end up with incorrect comment. > >> + for (i = 0; i < 256; i++) { > >> + entry = &lut[i * 1024]; > > s/1024/8 * 128/ maybe? > > Sure. > > Regards > > Shashank > > > > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); > >> + } > >> + > >> + icl_load_gcmax(crtc_state, entry); > >> + ivb_load_lut_ext_max(crtc); > >> +} > >> + > >> static void icl_load_luts(const struct intel_crtc_state *crtc_state) > >> { > >> const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; > >> @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) > >> if (crtc_state->base.degamma_lut) > >> glk_load_degamma_lut(crtc_state); > >> > >> - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > >> - GAMMA_MODE_MODE_8BIT) { > >> + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { > >> + case GAMMA_MODE_MODE_8BIT: > >> i9xx_load_luts(crtc_state); > >> - } else { > >> + break; > >> + > >> + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: > >> + icl_program_gamma_superfine_segment(crtc_state); > >> + icl_program_gamma_multi_segment(crtc_state); > >> + break; > >> + > >> + default: > >> bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); > >> ivb_load_lut_ext_max(crtc); > >> } > >> @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) > >> crtc_state_is_legacy_gamma(crtc_state)) > >> gamma_mode |= GAMMA_MODE_MODE_8BIT; > >> else > >> - gamma_mode |= GAMMA_MODE_MODE_10BIT; > >> + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; > >> > >> return gamma_mode; > >> } > >> -- > >> 2.17.1 -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx