Re: [PATCH v4 3/3] drm/i915/dsc: Add rc_range_parameter calculation for YCbCr420

"Kandpal, Suraj" <suraj.kandpal@xxxxxxxxx> · Mon, 10 Jul 2023 16:18:07 +0000



> 
> On 7/5/2023 10:45 AM, Suraj Kandpal wrote:
> > Some rc_range_parameter calculations were missed for YCbCr420, add
> > them to calculate_rc_param()
> >
> > --v2
> > -take into account the new formula to get bpp_i
> >
> > --v4
> > -Fix range_bpg_offset formula for YCbCr420 bpp <= 16 [Ankit]
> >
> > Cc: Vandita Kulkarni <vandita.kulkarni@xxxxxxxxx>
> > Cc: Ankit Nautiyal <ankit.k.nautiyal@xxxxxxxxx>
> > Cc: Uma Shankar <uma.shankar@xxxxxxxxx>
> > Signed-off-by: Suraj Kandpal <suraj.kandpal@xxxxxxxxx>
> > ---
> >   drivers/gpu/drm/i915/display/intel_vdsc.c | 144 ++++++++++++++++------
> >   1 file changed, 106 insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c
> > b/drivers/gpu/drm/i915/display/intel_vdsc.c
> > index cfcd463f66bb..8f0dac908e61 100644
> > --- a/drivers/gpu/drm/i915/display/intel_vdsc.c
> > +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
> > @@ -52,23 +52,36 @@ static bool is_pipe_dsc(struct intel_crtc *crtc, enum
> transcoder cpu_transcoder)
> >   	return true;
> >   }
> >
> > +static void
> > +intel_vdsc_set_min_max_qp(struct drm_dsc_config *vdsc_cfg, int buf,
> > +			  int bpp)
> > +{
> > +	int bpc = vdsc_cfg->bits_per_component;
> > +
> > +	/* Read range_minqp and range_max_qp from qp tables */
> > +	vdsc_cfg->rc_range_params[buf].range_min_qp =
> > +		intel_lookup_range_min_qp(bpc, buf, bpp, vdsc_cfg-
> >native_420);
> > +	vdsc_cfg->rc_range_params[buf].range_max_qp =
> > +		intel_lookup_range_max_qp(bpc, buf, bpp, vdsc_cfg-
> >native_420); }
> > +
> > +/*
> > + * Calculate RC Params using the below two methods:
> > + * 1. DSCParameterValuesVESA V1-2 spreadsheet
> > + * 2. VESA DSC 1.2a DSC Tools Application.
> > + * Note:
> > + * Above two methods use a common formula to derive values for any
> > +combination of DSC
> > + * variables. The formula approach may yield slight differences in
> > +the derived PPS
> > + * parameters from the original parameter sets. These differences are
> > +not consequential
> > + * to the coding performance because all parameter sets have been
> > +shown to produce
> > + * visually lossless quality (provides the same PPS values as
> > + * DSCParameterValuesVESA V1-2 spreadsheet)  */
> 
> 
> As I understand we are using the values of rc parameters from the tables
> given for DSC tools application from C-model for different bits_per_pixel and
> bpcs.
> 
> It would be good to mention the C-model used for these values.
> 
> 
> >   static void
> >   calculate_rc_params(struct drm_dsc_config *vdsc_cfg)
> >   {
> >   	int bpc = vdsc_cfg->bits_per_component;
> >   	int bpp = vdsc_cfg->bits_per_pixel >> 4;
> > -	static const s8 ofs_und6[] = {
> > -		0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12
> > -	};
> > -	static const s8 ofs_und8[] = {
> > -		2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> > -	};
> > -	static const s8 ofs_und12[] = {
> > -		2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> > -	};
> > -	static const s8 ofs_und15[] = {
> > -		10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12
> > -	};
> >   	int qp_bpc_modifier = (bpc - 8) * 2;
> >   	u32 res, buf_i, bpp_i;
> >
> > @@ -119,33 +132,88 @@ calculate_rc_params(struct drm_dsc_config
> *vdsc_cfg)
> >   	vdsc_cfg->rc_quant_incr_limit0 = 11 + qp_bpc_modifier;
> >   	vdsc_cfg->rc_quant_incr_limit1 = 11 + qp_bpc_modifier;
> >
> > -	bpp_i  = (2 * (bpp - 6));
> > -	for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) {
> > -		u8 range_bpg_offset;
> > -
> > -		/* Read range_minqp and range_max_qp from qp tables */
> > -		vdsc_cfg->rc_range_params[buf_i].range_min_qp =
> > -			intel_lookup_range_min_qp(bpc, buf_i, bpp_i,
> vdsc_cfg->native_420);
> > -		vdsc_cfg->rc_range_params[buf_i].range_max_qp =
> > -			intel_lookup_range_max_qp(bpc, buf_i, bpp_i,
> vdsc_cfg->native_420);
> > -
> > -		/* Calculate range_bpg_offset */
> > -		if (bpp <= 6) {
> > -			range_bpg_offset = ofs_und6[buf_i];
> > -		} else if (bpp <= 8) {
> > -			res = DIV_ROUND_UP(((bpp - 6) * (ofs_und8[buf_i] -
> ofs_und6[buf_i])), 2);
> > -			range_bpg_offset = ofs_und6[buf_i] + res;
> > -		} else if (bpp <= 12) {
> > -			range_bpg_offset = ofs_und8[buf_i];
> > -		} else if (bpp <= 15) {
> > -			res = DIV_ROUND_UP(((bpp - 12) * (ofs_und15[buf_i]
> - ofs_und12[buf_i])), 3);
> > -			range_bpg_offset = ofs_und12[buf_i] + res;
> > -		} else {
> > -			range_bpg_offset = ofs_und15[buf_i];
> > +	if (vdsc_cfg->native_420) {
> > +		static const s8 ofs_und4[] = {
> > +			2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12
> > +		};
> > +		static const s8 ofs_und5[] = {
> > +			2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> > +		};
> > +		static const s8 ofs_und6[] = {
> > +			2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> > +		};
> > +		static const s8 ofs_und8[] = {
> > +			10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12
> > +		};
> > +
> > +		bpp_i  = bpp - 8;
> > +		for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) {
> > +			u8 range_bpg_offset;
> > +
> > +			intel_vdsc_set_min_max_qp(vdsc_cfg, buf_i, bpp_i);
> > +
> > +			/* Calculate range_bpg_offset */
> > +			if (bpp <= 8) {
> > +				range_bpg_offset = ofs_und4[buf_i];
> > +			} else if (bpp <= 10) {
> > +				res = DIV_ROUND_UP(((bpp - 8) *
> > +						    (ofs_und5[buf_i] -
> ofs_und4[buf_i])), 2);
> > +				range_bpg_offset = ofs_und4[buf_i] + res;
> 
> 
> We can have a macro for this, as essentially we are using linear interpolation
> to get these values for a bpp from the table of nearby bpps.
> 
> But that perhaps would be a separate patch.

Sure I can create a separate patch and float that

Regards,
Suraj Kandpal
> 
> 
> With the suggested changes in note, this is:
> 
> Reviewed-by: Ankit Nautiyal <ankit.k.nautiyal@xxxxxxxxx>
> 
> 
> Regards,
> 
> Ankit
> 
> > +			} else if (bpp <= 12) {
> > +				res = DIV_ROUND_UP(((bpp - 10) *
> > +						    (ofs_und6[buf_i] -
> ofs_und5[buf_i])), 2);
> > +				range_bpg_offset = ofs_und5[buf_i] + res;
> > +			} else if (bpp <= 16) {
> > +				res = DIV_ROUND_UP(((bpp - 12) *
> > +						    (ofs_und8[buf_i] -
> ofs_und6[buf_i])), 4);
> > +				range_bpg_offset = ofs_und6[buf_i] + res;
> > +			} else {
> > +				range_bpg_offset = ofs_und8[buf_i];
> > +			}
> > +
> > +			vdsc_cfg->rc_range_params[buf_i].range_bpg_offset
> =
> > +				range_bpg_offset &
> DSC_RANGE_BPG_OFFSET_MASK;
> > +		}
> > +	} else {
> > +		static const s8 ofs_und6[] = {
> > +			0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12
> > +		};
> > +		static const s8 ofs_und8[] = {
> > +			2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> > +		};
> > +		static const s8 ofs_und12[] = {
> > +			2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -10, -12, -12, -12
> > +		};
> > +		static const s8 ofs_und15[] = {
> > +			10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10, -10, -12, -12, -12
> > +		};
> > +
> > +		bpp_i  = (2 * (bpp - 6));
> > +		for (buf_i = 0; buf_i < DSC_NUM_BUF_RANGES; buf_i++) {
> > +			u8 range_bpg_offset;
> > +
> > +			intel_vdsc_set_min_max_qp(vdsc_cfg, buf_i, bpp_i);
> > +
> > +			/* Calculate range_bpg_offset */
> > +			if (bpp <= 6) {
> > +				range_bpg_offset = ofs_und6[buf_i];
> > +			} else if (bpp <= 8) {
> > +				res = DIV_ROUND_UP(((bpp - 6) *
> > +						    (ofs_und8[buf_i] -
> ofs_und6[buf_i])), 2);
> > +				range_bpg_offset = ofs_und6[buf_i] + res;
> > +			} else if (bpp <= 12) {
> > +				range_bpg_offset = ofs_und8[buf_i];
> > +			} else if (bpp <= 15) {
> > +				res = DIV_ROUND_UP(((bpp - 12) *
> > +						    (ofs_und15[buf_i] -
> ofs_und12[buf_i])), 3);
> > +				range_bpg_offset = ofs_und12[buf_i] + res;
> > +			} else {
> > +				range_bpg_offset = ofs_und15[buf_i];
> > +			}
> > +
> > +			vdsc_cfg->rc_range_params[buf_i].range_bpg_offset
> =
> > +				range_bpg_offset &
> DSC_RANGE_BPG_OFFSET_MASK;
> >   		}
> > -
> > -		vdsc_cfg->rc_range_params[buf_i].range_bpg_offset =
> > -			range_bpg_offset &
> DSC_RANGE_BPG_OFFSET_MASK;
> >   	}
> >   }
> >