On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote: > ICL introduces a new gamma correction mode in display engine, called > multi-segmented-gamma mode. This mode allows users to program the > darker region of the gamma curve with sueprfine precision. An > example use case for this is HDR curves (like PQ ST-2084). > > If we plot a gamma correction curve from value range between 0.0 to 1.0, > ICL's multi-segment has 3 different sections: > - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) > - fine segment: 257 values, ranges between 0 - 1/(128) > - corase segment: 257 values, ranges between 0 - 1 > > This patch: > - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), > so that userspace can program with highest precision supported. > - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. > - Adds functions to program/detect multi-segment gamma. > > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > > Suggested-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxxxx> > Signed-off-by: Uma Shankar <uma.shankar@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_pci.c | 3 +- > drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++- > 2 files changed, 156 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index ffa2ee70a03d..83698951760b 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { > GEN(11), \ > .ddb_size = 2048, \ > .has_logical_ring_elsq = 1, \ > - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } > + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } > + > > static const struct intel_device_info intel_icelake_11_info = { > GEN11_FEATURES, > diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c > index ca341a9e47e6..d1fb79a5d764 100644 > --- a/drivers/gpu/drm/i915/intel_color.c > +++ b/drivers/gpu/drm/i915/intel_color.c > @@ -41,6 +41,7 @@ > #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) > > #define LEGACY_LUT_LENGTH 256 > +#define ICL_MULTISEG_LUT_LENGTH (256 * 128 * 8) > /* > * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point > * format). This macro takes the coefficient we want transformed and the > @@ -58,6 +59,12 @@ > > #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255) > > +enum icl_ms_gamma_segments { > + ICL_MS_GAMMA_SEG_SUPERFINE, > + ICL_MS_GAMMA_SEG_FINE, > + ICL_MS_GAMMA_SEG_COARSE, > +}; > + > static const u16 ilk_csc_off_zero[3] = {}; > > static const u16 ilk_csc_coeff_identity[9] = { > @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) > } > } > > +/* ilk+ "12.4" interpolated format (high 10 bits) */ > +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) > +{ > + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | > + (color->blue >> 6); > +} > + > +/* ilk+ "12.4" interpolated format (low 6 bits) */ > +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) > +{ > + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | > + (color->blue & 0x3f); > +} > + > +static void > +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state) > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > + enum pipe pipe = crtc->pipe; > + > + /* > + * Program the max register to clamp values > 1.0. > + * ToDo: Extend the ABI to be able to program values > + * from 1.0 > + */ > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16)); > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16)); > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16)); This one I think we want to program based on the provide LUT. It's the last entry that still gets used in interpolation for <1.0 values. Or at least that's the way it works with the 12p4 mode IIRC. I don't actually remember how it goes with the multi segment mode. > + > + /* > + * Program the max register to clamp values > 1.0. > + * ToDo: Extend the ABI to be able to program values > + * from 1.0 to 3.0 > + */ > + I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), (1 << 16)); > + I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), (1 << 16)); > + I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), (1 << 16)); > + > + /* > + * Program the gc max 2 register to clamp values > 1.0. > + * ToDo: Extend the ABI to be able to program values > + * from 3.0 to 7.0 > + */ > + I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), (1 << 16)); > + I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), (1 << 16)); > + I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), (1 << 16)); And these are just ivb_load_lut_10_max(). Might need to just s/10/ext/ in the name since I guess they apply to the 12p4 and multi segmment modes too. > +} > + > +static void > +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state, > + const struct drm_property_blob *blob, > + enum icl_ms_gamma_segments segment) > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > + const struct drm_color_lut *lut = blob->data; > + enum pipe pipe = crtc->pipe; > + u32 i, word, seg_size, seg_step, seg_start = 0; > + i915_reg_t index_reg, data_reg; > + > + if (!lut || (drm_color_lut_size(blob) < ICL_MULTISEG_LUT_LENGTH)) { > + DRM_DEBUG_KMS("Not programming gamma segment, invalid input\n"); > + return; > + } > + > + /* > + * Every entry in the multi-segment LUT is corresponding to a superfine > + * segment step which is 1/(8*128*256). > + * > + * Superfine segment has 9 entries, corresponding to values > + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). > + * > + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) > + * ... 256/(128*256). So in order to program fine segment of LUT we > + * need to pick every 8'th entry in LUT, and program 256 indexes. > + * Fine segment's index 0 is programmed in HW, and it starts from > + * index 1. > + * > + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, > + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT > + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and > + * program 256 of those. > + */ > + > + switch (segment) { > + case ICL_MS_GAMMA_SEG_SUPERFINE: > + seg_size = 9; > + seg_step = 1; > + index_reg = PREC_PAL_MULTI_SEG_INDEX(pipe); > + data_reg = PREC_PAL_MULTI_SEG_DATA(pipe); > + break; > + > + case ICL_MS_GAMMA_SEG_FINE: > + seg_start = 1; > + seg_size = 256; > + seg_step = 8; > + index_reg = PREC_PAL_INDEX(pipe); > + data_reg = PREC_PAL_DATA(pipe); > + break; > + > + case ICL_MS_GAMMA_SEG_COARSE: > + seg_size = 256; > + seg_step = 1024; > + index_reg = PREC_PAL_INDEX(pipe); > + data_reg = PREC_PAL_DATA(pipe); > + break; I'm not convinced this this enum is actually helpful. I would probably just try to provide separate functions for the segments. Not sure if we could share the same code for the fine/coarse segments and just pass in the differences as parameters. > + > + default: > + DRM_ERROR("Invalid gamma segment %d to program\n", segment); > + return; > + } > + > + I915_WRITE(index_reg, PAL_PREC_AUTO_INCREMENT); > + > + for (i = seg_start; i < seg_size; i++) { > + /* Even Index */ These comments don't really provide any value. > + word = ilk_lut_12p4_udw(&lut[i * seg_step]); and the 'word' variable isn't particularly useful either. We might want a local varianble for the lut entry like we have in the ivb/bdw functions, if only to avoid repeating the arithmetic. > + I915_WRITE(data_reg, word); > + > + /* Odd index */ > + word = ilk_lut_12p4_ldw(&lut[i * seg_step]); > + I915_WRITE(data_reg, word); > + } > +} > + > +static void > +icl_load_gamma_multi_seg_lut(const struct intel_crtc_state *crtc_state) > +{ > + const struct drm_property_blob *lut_blob = crtc_state->base.gamma_lut; > + > + if (!lut_blob) > + return; > + > + icl_program_gamma_multi_segment(crtc_state, lut_blob, > + ICL_MS_GAMMA_SEG_SUPERFINE); > + icl_program_gamma_multi_segment(crtc_state, lut_blob, > + ICL_MS_GAMMA_SEG_FINE); > + icl_program_gamma_multi_segment(crtc_state, lut_blob, > + ICL_MS_GAMMA_SEG_COARSE); > + icl_program_gamma_gcmax(crtc_state); > +} And this I would just inline into icl_load_luts() just like the rest. > + > static void icl_load_luts(const struct intel_crtc_state *crtc_state) > { > const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; > @@ -776,6 +926,9 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) > glk_load_degamma_lut(crtc_state); > > if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > + GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) { > + icl_load_gamma_multi_seg_lut(crtc_state); I'd put this last to keep things in consistent order. And probably want to use a switch statement now that we have three options. > + } else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > GAMMA_MODE_MODE_8BIT) { > i9xx_load_luts(crtc_state); > } else { > @@ -1187,7 +1340,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) > crtc_state_is_legacy_gamma(crtc_state)) > gamma_mode |= GAMMA_MODE_MODE_8BIT; > else > - gamma_mode |= GAMMA_MODE_MODE_10BIT; > + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; > > return gamma_mode; > } > -- > 2.17.1 -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx