Re: [PATCH v6 5/7] gpu: ipu-v3: ipu-ic: Add support for limited range encoding

Philipp Zabel <p.zabel@xxxxxxxxxxxxxx> · Fri, 08 Mar 2019 12:57:05 +0100

On Thu, 2019-03-07 at 15:33 -0800, Steve Longerbeam wrote:
> Add support for the following conversions:
> 
> - YUV full-range to YUV limited-range
> - YUV limited-range to YUV full-range
> - YUV limited-range to RGB full-range
> - RGB full-range to YUV limited-range
> 
> The last two conversions require operating on the YUV full-range
> encoding and inverse encoding coefficients, with the YUV-to-YUV
> limited<->full coefficients. The formula to convert is
> 
> M_c = M_a * M_b
> O_c = M_a * O_b + O_a
> 
> For calculating the RGB full-range to YUV limited-range coefficients:
> 
> [M_a, O_a] = YUV full-range to YUV limited-range coefficients.
> [M_b, O_b] = RGB full-range to YUV full-range coefficients.
> 
> For calculating the YUV limited-range to RGB full-range coefficients:
> 
> [M_a, O_a] = YUV full-range to RGB full-range coefficients.
> [M_b, O_b] = YUV limited-range to YUV full-range coefficients.
> 
> The calculation of [M_c, O_c] is carried out by the function
> transform_coeffs().
> 
> In the future if RGB limited range encoding is required, the same
> function can be used. And cascaded to create all combinations of
> encoding for YUV limited/full range <-> RGB limited/full range,
> passing the output coefficients from one call as the input for the
> next.
> 
> For example, to create YUV full-range to RGB limited-range coefficients:
> 
> [M_a, O_a] = RGB full-range to RGB limited-range coefficients.
> [M_b, O_b] = YUV full-range to RGB full-range coefficients.
> 
> and that output sent as input to create YUV limited-range to RGB
> limited-range coefficients:
> 
> [M_a, O_a] = YUV full-range to RGB limited-range coefficients.
> [M_b, O_b] = YUV limited-range to YUV full-range coefficients.
> 
> Signed-off-by: Steve Longerbeam <slongerbeam@xxxxxxxxx>

I'm not a big fan of this. Wouldn't it be much easier to compute all
necessary task parameter sets offline with high precision, and store the
precomputed sets in the compact representation?

regards
Philipp

> ---
>  drivers/gpu/ipu-v3/ipu-ic.c | 281 +++++++++++++++++++++++++++++++++---
>  1 file changed, 263 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
> index 1460901af9b5..a7dd85f8d832 100644
> --- a/drivers/gpu/ipu-v3/ipu-ic.c
> +++ b/drivers/gpu/ipu-v3/ipu-ic.c
> @@ -178,10 +178,10 @@ static inline void ipu_ic_write(struct ipu_ic *ic, u32 value, unsigned offset)
>  }
>  
>  struct ic_encode_coeff {
> -	s16 coeff[3][3];	/* signed 9-bit integer coefficients */
> -	s16 offset[3];		/* signed 11+2-bit fixed point offset */
> -	u8 scale:2;		/* scale coefficients * 2^(scale-1) */
> -	bool sat:1;		/* saturate to (16, 235(Y) / 240(U, V)) */
> +	int coeff[3][3];	/* signed 9-bit integer coefficients */
> +	int offset[3];		/* signed 13-bit integer offset */
> +	int scale;		/* scale coefficients * 2^(scale-1) */
> +	bool sat;		/* saturate to (16, 235(Y) / 240(U, V)) */
>  };
>  
>  /*
> @@ -277,6 +277,231 @@ static const struct ic_encode_coeff ic_encode_ycbcr2rgb_709 = {
>  	.scale = 2,
>  };
>  
> +/*
> + * YUV full range to YUV limited range:
> + *
> + * Y_lim  = 0.8588 * Y_full + 16
> + * Cb_lim = 0.8784 * (Cb_full - 128) + 128
> + * Cr_lim = 0.8784 * (Cr_full - 128) + 128
> + */
> +static const struct ic_encode_coeff ic_encode_ycbcr_full2lim = {
> +	.coeff = {
> +		{ 219, 0, 0 },
> +		{ 0, 224, 0 },
> +		{ 0, 0, 224 },
> +	},
> +	.offset = { 64, 62, 62 },
> +	.scale = 1,
> +};
> +
> +/*
> + * YUV limited range to YUV full range:
> + *
> + * Y_full  = 1.1644 * (Y_lim - 16)
> + * Cb_full = 1.1384 * (Cb_lim - 128) + 128
> + * Cr_full = 1.1384 * (Cr_lim - 128) + 128
> + */
> +static const struct ic_encode_coeff ic_encode_ycbcr_lim2full = {
> +	.coeff = {
> +		{ 149, 0, 0 },
> +		{ 0, 145, 0 },
> +		{ 0, 0, 145 },
> +	},
> +	.offset = { -37, -35, -35 },
> +	.scale = 2,
> +};
> +
> +/*
> + * RGB full range to RGB limited range:
> + *
> + * R_lim = 0.8588 * R_full + 16
> + * G_lim = 0.8588 * G_full + 16
> + * B_lim = 0.8588 * B_full + 16
> + */
> +static const struct ic_encode_coeff
> +ic_encode_rgb_full2lim __maybe_unused = {
> +	.coeff = {
> +		{ 220, 0, 0 },
> +		{ 0, 220, 0 },
> +		{ 0, 0, 220 },
> +	},
> +	.offset = { 64, 64, 64 },
> +	.scale = 1,
> +};
> +
> +/*
> + * RGB limited range to RGB full range:
> + *
> + * R_full = 1.1644 * (R_lim - 16)
> + * G_full = 1.1644 * (G_lim - 16)
> + * B_full = 1.1644 * (B_lim - 16)
> + */
> +static const struct ic_encode_coeff
> +ic_encode_rgb_lim2full __maybe_unused = {
> +	.coeff = {
> +		{ 149, 0, 0 },
> +		{ 0, 149, 0 },
> +		{ 0, 0, 149 },
> +	},
> +	.offset = { -37, -37, -37 },
> +	.scale = 2,
> +};
> +
> +/*
> + * Convert a coefficient and scale value in TPMEM register format
> + * to a signed int times 256 (fix the radix point). The TPMEM register
> + * coefficient format is a signed 9-bit value (sign bit at bit 8,
> + * mantissa = coeff * 2 ^ (8 - scale - 1)).
> + */
> +static int coeff_fix(int coeff, int scale)
> +{
> +	if (coeff >= 256)
> +		coeff -= 512;
> +	if (scale == 0)
> +		return DIV_ROUND_CLOSEST(coeff, 2);
> +	return coeff << (scale - 1);
> +}
> +
> +/*
> + * Convert a signed int coefficient times 256 to TPMEM register
> + * format, given a scale value = TPMEM scale - 1.
> + */
> +static int coeff_normalize(int coeff, int scale)
> +{
> +	coeff = DIV_ROUND_CLOSEST(coeff, 1 << scale);
> +	if (coeff < 0)
> +		coeff += 512;
> +	return coeff;
> +}
> +
> +/*
> + * Convert an offset and scale value in TPMEM register format to a
> + * signed int times 256 (fix the radix point). The TPMEM register
> + * offset format is a signed 13-bit value (sign bit at bit 12,
> + * mantissa = offset * 2 ^ (2 - (scale - 1)).
> + */
> +static int offset_fix(int offset, int scale)
> +{
> +	return offset << (8 - (2 - (scale - 1)));
> +}
> +
> +/*
> + * Convert a signed int offset times 256 to TPMEM register
> + * format, given a scale value = TPMEM scale - 1.
> + */
> +static int offset_normalize(int off, int scale)
> +{
> +	return DIV_ROUND_CLOSEST(off, 1 << (8 - (2 - scale)));
> +}
> +
> +/*
> + * Find the scale value that fits the given coefficient within
> + * the 8-bit TPMEM mantissa.
> + */
> +static int get_coeff_scale(int coeff)
> +{
> +	int scale = 0;
> +
> +	while (abs(coeff) >= 256 && scale <= 2) {
> +		coeff = DIV_ROUND_CLOSEST(coeff, 2);
> +		scale++;
> +	}
> +
> +	return scale;
> +}
> +
> +/*
> + * The above defined encoding coefficients all encode between
> + * full-range RGB and full-range YCbCr.
> + *
> + * This function calculates a matrix M_c and offset vector O_c, given
> + * input matrices M_a, M_b and offset vectors O_a, O_b, such that:
> + *
> + * M_c = M_a * M_b
> + * O_c = M_a * O_b + O_a
> + *
> + * This operation will transform the full-range coefficients to
> + * coefficients that encode to or from limited range YCbCr or RGB.
> + *
> + * For example, to transform ic_encode_rgb2ycbcr_601 to encode to
> + * limited-range YCbCr:
> + *
> + * [M_a, O_a] = ic_encode_ycbcr_full2lim
> + * [M_b, O_b] = ic_encode_rgb2ycbcr_601
> + *
> + * To transform the inverse coefficients ic_encode_ycbcr2rgb_601 to
> + * encode from limited-range YCbCr:
> + *
> + * [M_a, O_a] = ic_encode_ycbcr2rgb_601
> + * [M_b, O_b] = ic_encode_ycbcr_lim2full
> + *
> + * The function can also be used to create RGB limited range
> + * coefficients, and cascaded to create all combinations of
> + * encodings between YCbCr limited/full range <-> RGB limited/full
> + * range.
> + */
> +static void transform_coeffs(struct ic_encode_coeff *out,
> +			     const struct ic_encode_coeff *a,
> +			     const struct ic_encode_coeff *b)
> +{
> +	int c_a, c_b, c_out;
> +	int o_a, o_b, o_out;
> +	int outscale = 0;
> +	int i, j, k;
> +
> +	for (i = 0; i < 3; i++) {
> +		o_out = 0;
> +		for (j = 0; j < 3; j++) {
> +			int scale;
> +
> +			/* M_c[i,j] = M_a[i,k] * M_b[k,j] */
> +			c_out = 0;
> +			for (k = 0; k < 3; k++) {
> +				c_a = coeff_fix(a->coeff[i][k], a->scale);
> +				c_b = coeff_fix(b->coeff[k][j], b->scale);
> +				c_out += c_a * c_b;
> +			}
> +
> +			c_out = DIV_ROUND_CLOSEST(c_out, 1 << 8);
> +			out->coeff[i][j] = c_out;
> +
> +			/*
> +			 * get scale for this coefficient and update
> +			 * final output scale.
> +			 */
> +			scale = get_coeff_scale(c_out);
> +			outscale = max(outscale, scale);
> +
> +			/* M_a[i,j] * O_b[j] */
> +			c_a = coeff_fix(a->coeff[i][j], a->scale);
> +			o_b = offset_fix(b->offset[j], b->scale);
> +			o_out += DIV_ROUND_CLOSEST(c_a * o_b, 1 << 8);
> +		}
> +
> +		/* O_c[i] = (M_a * O_b)[i] + O_a[i] */
> +		o_a = offset_fix(a->offset[i], a->scale);
> +		o_out += o_a;
> +
> +		out->offset[i] = o_out;
> +	}
> +
> +	/*
> +	 * normalize output coefficients and offsets to TPMEM
> +	 * register format.
> +	 */
> +	for (i = 0; i < 3; i++) {
> +		for (j = 0; j < 3; j++) {
> +			c_out = out->coeff[i][j];
> +			out->coeff[i][j] = coeff_normalize(c_out, outscale);
> +		}
> +
> +		o_out = out->offset[i];
> +		out->offset[i] = offset_normalize(o_out, outscale);
> +	}
> +
> +	out->scale = outscale + 1;
> +}
> +
>  static int calc_csc_coeffs(struct ipu_ic_priv *priv,
>  			   struct ic_encode_coeff *coeff_out,
>  			   const struct ipu_ic_colorspace *in,
> @@ -290,14 +515,6 @@ static int calc_csc_coeffs(struct ipu_ic_priv *priv,
>  		return -ENOTSUPP;
>  	}
>  
> -	if ((in->cs == IPUV3_COLORSPACE_YUV &&
> -	     in->quant != V4L2_QUANTIZATION_FULL_RANGE) ||
> -	    (out->cs == IPUV3_COLORSPACE_YUV &&
> -	     out->quant != V4L2_QUANTIZATION_FULL_RANGE)) {
> -		dev_err(priv->ipu->dev, "Limited range YUV not supported\n");
> -		return -ENOTSUPP;
> -	}
> -
>  	if ((in->cs == IPUV3_COLORSPACE_RGB &&
>  	     in->quant != V4L2_QUANTIZATION_FULL_RANGE) ||
>  	    (out->cs == IPUV3_COLORSPACE_RGB &&
> @@ -307,7 +524,18 @@ static int calc_csc_coeffs(struct ipu_ic_priv *priv,
>  	}
>  
>  	if (in->cs == out->cs) {
> -		*coeff_out = ic_encode_identity;
> +		if (in->quant == out->quant) {
> +			*coeff_out = ic_encode_identity;
> +		} else if (in->quant == V4L2_QUANTIZATION_FULL_RANGE) {
> +			/* YUV full-range to YUV limited-range */
> +			*coeff_out = ic_encode_ycbcr_full2lim;
> +
> +			/* set saturation bit for YUV limited-range output */
> +			coeff_out->sat = true;
> +		} else {
> +			/* YUV limited-range to YUV full-range */
> +			*coeff_out = ic_encode_ycbcr_lim2full;
> +		}
>  
>  		return 0;
>  	}
> @@ -328,7 +556,24 @@ static int calc_csc_coeffs(struct ipu_ic_priv *priv,
>  		return -ENOTSUPP;
>  	}
>  
> -	*coeff_out = *encode_coeff;
> +	if (in->quant == out->quant) {
> +		/*
> +		 * YUV full-range to RGB full-range, or
> +		 * RGB full-range to YUV full-range.
> +		 */
> +		*coeff_out = *encode_coeff;
> +	} else if (inverse_encode) {
> +		/* YUV limited-range to RGB full-range */
> +		transform_coeffs(coeff_out, encode_coeff,
> +				 &ic_encode_ycbcr_lim2full);
> +	} else {
> +		/* RGB full-range to YUV limited-range */
> +		transform_coeffs(coeff_out, &ic_encode_ycbcr_full2lim,
> +				 encode_coeff);
> +
> +		/* set saturation bit for YUV limited-range output */
> +		coeff_out->sat = true;
> +	}
>  
>  	return 0;
>  }
> @@ -340,9 +585,9 @@ static int init_csc(struct ipu_ic *ic,
>  {
>  	struct ipu_ic_priv *priv = ic->priv;
>  	struct ic_encode_coeff coeff;
> +	const unsigned int (*c)[3];
> +	const unsigned int *a;
>  	u32 __iomem *base;
> -	const u16 (*c)[3];
> -	const u16 *a;
>  	u32 param;
>  	int ret;
>  
> @@ -354,8 +599,8 @@ static int init_csc(struct ipu_ic *ic,
>  		(priv->tpmem_base + ic->reg->tpmem_csc[csc_index]);
>  
>  	/* Cast to unsigned */
> -	c = (const u16 (*)[3])coeff.coeff;
> -	a = (const u16 *)coeff.offset;
> +	c = (const unsigned int (*)[3])coeff.coeff;
> +	a = (const unsigned int *)coeff.offset;
>  
>  	param = ((a[0] & 0x1f) << 27) | ((c[0][0] & 0x1ff) << 18) |
>  		((c[1][1] & 0x1ff) << 9) | (c[2][2] & 0x1ff);