Re: [PATCH v2 4/4] drm/i915/icl: Add Multi-segmented gamma support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Regards

Shashank

On 5/3/2019 9:20 PM, Ville Syrjälä wrote:
On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote:
ICL introduces a new gamma correction mode in display engine, called
multi-segmented-gamma mode. This mode allows users to program the
darker region of the gamma curve with sueprfine precision. An
example use case for this is HDR curves (like PQ ST-2084).

If we plot a gamma correction curve from value range between 0.0 to 1.0,
ICL's multi-segment has 3 different sections:
- superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
- fine segment: 257 values, ranges between 0 - 1/(128)
- corase segment: 257 values, ranges between 0 - 1

This patch:
- Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
   so that userspace can program with highest precision supported.
- Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
- Adds functions to program/detect multi-segment gamma.

V2: Addressed review comments from Ville
     - separate function for superfine and fine segments.
     - remove enum for segments.
     - reuse last entry of the LUT as gc_max value.
     - replace if() ....cond with switch...case in icl_load_luts.
     - add an entry variable, instead of 'word'

Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>

Suggested-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxxxx>
Signed-off-by: Uma Shankar <uma.shankar@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_pci.c    |   3 +-
  drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++-
  2 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ffa2ee70a03d..83698951760b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
  	GEN(11), \
  	.ddb_size = 2048, \
  	.has_logical_ring_elsq = 1, \
-	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
Ugh. Thats one big LUT. But looks correct.

+
Bogus newline.
Got it.
static const struct intel_device_info intel_icelake_11_info = {
  	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index 6c341bea514c..49831e8d02fb 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -41,6 +41,9 @@
  #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
#define LEGACY_LUT_LENGTH 256
+#define ICL_GAMMA_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
+#define ICL_GAMMA_SUPERFINE_SEG_LENGTH	9
+
  /*
   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
   * format). This macro takes the coefficient we want transformed and the
@@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
  	}
  }
+/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+		(color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+		(color->blue & 0x3f);
+}
+
+static void
+icl_load_gcmax(const struct intel_crtc_state *crtc_state,
+			const struct drm_color_lut *entry)
Indentation looks off. Also s/entry/color/ to match the other similarish
funcs maybe?
Sure.
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+
+	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red);
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green);
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue);
+}
+
+static void
+icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	enum pipe pipe = crtc->pipe;
+	u32 i;
+
+	if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH)
+		return;
These checks aren't needed. Just dead code.
Will remove this and similars.
+
+	/*
+	 * Every entry in the multi-segment LUT is corresponding to a superfine
+	 * segment step which is 1/(8 * 128 * 256).
+	 *
+	 * Superfine segment has 9 entries, corresponding to values
+	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
+	 */
+	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+
+	for (i = 0; i < 9; i++) {
+		const struct drm_color_lut *entry = &lut[i];
+
+		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
+			   ilk_lut_12p4_udw(entry));
ldw should come before udw.
Got it.
+		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
+			   ilk_lut_12p4_ldw(entry));
+	}
+}
+
+static void
+icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	const struct drm_color_lut *entry;
'entry' declaration can be moved into the loops.
Its being used in multiple loops, also being used for GCMax outside the loop.
+	enum pipe pipe = crtc->pipe;
+	u32 i;
+
+	if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH))
+		return;
More checks that aren't needed.
Got it.
+
+	/*
+	 * Every entry in the multi-segment LUT is corresponding to a superfine
+	 * segment step which is 1/(8*128*256).
+	 *
+	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
+	 * ... 256/(128*256). So in order to program fine segment of LUT we
+	 * need to pick every 8'th entry in LUT, and program 256 indexes.
+	 * Fine segment's index 0 is programmed in HW, and it starts from
+	 * index 1.
The wording here is a bit confusing. I guess the problem is what to call
things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point
seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's
implicit but IIRC I was told long ago that it's not actually used.

Not sure how to word that in the best way. Maybe something like?

/*
  * Fine segment (seg2) ...
  *
  * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
  * with seg2[0] being unused by the hardware.
  */

Not sure that's any clearer.
Ok, will try to come up with something in similar lines.
+	 */
+	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	for (i = 1; i < 257; i++) {
+		entry = &lut[i * 8];
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+	}
+
+	/*
+	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
+	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
+	 * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
+	 * program 256 of those.
+	 */
Could make a note here stating that seg3[0] and seg3[1] are also unused
by the hardware, even though we have to program them to advance the
index. I don't see it mentioned in the spec, but this one I definitely
remember confirming from Art way back when. However I never verified
that on actual hw. We could also consider just programming those two
entries to 0 and start the actual coarse segment programming from index 2.
Or we could skip them by reprogramming the index directly.
If they are not being used, does it matter what and if we program into them ? We can add a note though, mentioning this.
+	for (i = 0; i < 256; i++) {
+		entry = &lut[i * 1024];
s/1024/8 * 128/ maybe?

Sure.

Regards

Shashank


+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+	}
+
+	icl_load_gcmax(crtc_state, entry);
+	ivb_load_lut_ext_max(crtc);
+}
+
  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
  {
  	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
@@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
  	if (crtc_state->base.degamma_lut)
  		glk_load_degamma_lut(crtc_state);
- if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
-	    GAMMA_MODE_MODE_8BIT) {
+	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
+	case GAMMA_MODE_MODE_8BIT:
  		i9xx_load_luts(crtc_state);
-	} else {
+		break;
+
+	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
+		icl_program_gamma_superfine_segment(crtc_state);
+		icl_program_gamma_multi_segment(crtc_state);
+		break;
+
+	default:
  		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
  		ivb_load_lut_ext_max(crtc);
  	}
@@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
  	    crtc_state_is_legacy_gamma(crtc_state))
  		gamma_mode |= GAMMA_MODE_MODE_8BIT;
  	else
-		gamma_mode |= GAMMA_MODE_MODE_10BIT;
+		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
return gamma_mode;
  }
--
2.17.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux