Re: [PATCH 1/2] drm/i915: Enable scanline read for gen9 dsi

"Shankar, Uma" <uma.shankar@xxxxxxxxx> · Tue, 19 Sep 2017 08:49:21 +0000

>-----Original Message-----
>From: Intel-gfx [mailto:intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx] On Behalf Of
>Maarten Lankhorst
>Sent: Tuesday, September 19, 2017 12:56 PM
>To: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
>Cc: intel-gfx@xxxxxxxxxxxxxxxxxxxxx; Srinivas, Vidya <vidya.srinivas@xxxxxxxxx>
>Subject: Re:  [PATCH 1/2] drm/i915: Enable scanline read for gen9 dsi
>
>Op 18-09-17 om 16:24 schreef Ville Syrjälä:
>> On Mon, Sep 18, 2017 at 03:57:38PM +0200, Maarten Lankhorst wrote:
>>> Op 18-09-17 om 15:32 schreef Vidya Srinivas:
>>>> From: Uma Shankar <uma.shankar@xxxxxxxxx>
>>>>
>>>> For gen9 platforms, dsi timings are driven from port instead of pipe
>>>> (unlike ddi). Thus, we can't rely on pipe registers to get the
>>>> timing information. Even scanline register read will not be functional.
>>>> This is causing vblank evasion logic to fail since it relies on
>>>> scanline, causing atomic update failure warnings.
>>>>
>>>> This patch uses pipe framestamp and current timestamp registers to
>>>> calculate scanline. This is an indirect way to get the scanline.
>>>> It helps resolve atomic update failure for gen9 dsi platforms.
>>>>
>>>> v2: Addressed Ville and Daniel's review comments. Updated the
>>>> register MACROs, handled race condition for register reads,
>>>> extracted timings from the hwmode. Removed the dependency on
>>>> crtc->config to get the encoder type.
>>>>
>>>> v3: Made get scanline function generic
>>>>
>>>> Credits-to: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
>>>> Signed-off-by: Uma Shankar <uma.shankar@xxxxxxxxx>
>>>> Signed-off-by: Chandra Konduru <chandra.konduru@xxxxxxxxx>
>>>> Signed-off-by: Vidya Srinivas <vidya.srinivas@xxxxxxxxx>
>>>> ---
>>>>  drivers/gpu/drm/i915/i915_drv.h      |  2 ++
>>>>  drivers/gpu/drm/i915/i915_irq.c      |  7 +++++
>>>>  drivers/gpu/drm/i915/i915_reg.h      | 11 +++++++
>>>>  drivers/gpu/drm/i915/intel_display.c | 60
>>>> ++++++++++++++++++++++++++++++++++++
>>>>  4 files changed, 80 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h index 1cc31a5..d9efe83 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -4085,6 +4085,8 @@ void intel_sbi_write(struct drm_i915_private
>>>> *dev_priv, u16 reg, u32 value,
>>>>  u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
>>>> void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg,
>>>> u32 val);
>>>>
>>>> +u32 gen9_get_scanline(struct intel_crtc *crtc);
>>>> +
>>>>  /* intel_dpio_phy.c */
>>>>  void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum
>port port,
>>>>  			     enum dpio_phy *phy, enum dpio_channel *ch); diff --
>git
>>>> a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>>> index 5d391e6..47668dd 100644
>>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>>> @@ -781,6 +781,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc
>*crtc)
>>>>  	struct drm_vblank_crtc *vblank;
>>>>  	enum pipe pipe = crtc->pipe;
>>>>  	int position, vtotal;
>>>> +	struct intel_encoder *encoder;
>>>>
>>>>  	if (!crtc->active)
>>>>  		return -1;
>>>> @@ -792,6 +793,12 @@ static int __intel_get_crtc_scanline(struct intel_crtc
>*crtc)
>>>>  	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
>>>>  		vtotal /= 2;
>>>>
>>>> +	if (IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) {
>>>> +		for_each_encoder_on_crtc(crtc->base.dev, &crtc->base,
>encoder)
>>>> +			if (encoder->type == INTEL_OUTPUT_DSI)
>>>> +				return gen9_get_scanline(crtc);
>>> I really think we shouldn't loop over all encoders for something as critical as
>__intel_get_crtc_scanline..
>>>> +	}
>>>> +
>>>>  	if (IS_GEN2(dev_priv))
>>>>  		position = I915_READ_FW(PIPEDSL(pipe)) &
>DSL_LINEMASK_GEN2;
>>>>  	else
>>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>>> b/drivers/gpu/drm/i915/i915_reg.h index 0b03260..85168ee 100644
>>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>>> @@ -8802,6 +8802,17 @@ enum skl_power_gate {
>>>>  #define MIPIO_TXESC_CLK_DIV2			_MMIO(0x160008)
>>>>  #define  GLK_TX_ESC_CLK_DIV2_MASK			0x3FF
>>>>
>>>> +/* Gen4+ Timestamp and Pipe Frame time stamp registers */
>>>> +#define GEN4_TIMESTAMP_CTR	_MMIO(MCHBAR_MIRROR_BASE +
>0x2358)
>>>> +#define GEN7_TIMESTAMP_CTR	_MMIO(0x44070)
>>>> +
>>>> +#define _PIPE_FRMTMSTMP_A		0x70048
>>>> +#define _PIPE_FRMTMSTMP_B		0x71048
>>>> +#define _IVB_PIPE_FRMTMSTMP_C	0x72048
>>>> +#define PIPE_FRMTMSTMP(pipe)		\
>>>> +			_MMIO_PIPE3((pipe), _PIPE_FRMTMSTMP_A, \
>>>> +				_PIPE_FRMTMSTMP_B,
>_IVB_PIPE_FRMTMSTMP_C)
>>>> +
>>>>  /* BXT MIPI clock controls */
>>>>  #define BXT_MAX_VAR_OUTPUT_KHZ			39500
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>>>> b/drivers/gpu/drm/i915/intel_display.c
>>>> index 0871807..601032f 100644
>>>> --- a/drivers/gpu/drm/i915/intel_display.c
>>>> +++ b/drivers/gpu/drm/i915/intel_display.c
>>>> @@ -10352,6 +10352,66 @@ static bool needs_scaling(const struct
>intel_plane_state *state)
>>>>  	return (src_w != dst_w || src_h != dst_h);  }
>>>>
>>>> +/*
>>>> + * For Gen9 DSI, pipe scanline register will not
>>>> + * work to get the scanline since the timings
>>>> + * are driven from the PORT (unlike DDI encoders).
>>>> + * This function will use Framestamp and current
>>>> + * timestamp registers to calculate the scanline.
>>>> + */
>>>> +u32 gen9_get_scanline(struct intel_crtc *crtc) {
>>>> +	struct drm_device *dev = crtc->base.dev;
>>>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>>>> +	u32 crtc_vblank_start = crtc->base.mode.crtc_vblank_start;
>>>> +	u32 crtc_vtotal = crtc->base.mode.crtc_vtotal;
>>>> +	u32 crtc_htotal = crtc->base.mode.crtc_htotal;
>>>> +	u32 crtc_clock = crtc->base.mode.crtc_clock;
>>>> +	u64 scanline = 0, scan_prev_time, scan_curr_time, scan_post_time;
>>> Aren't all those 0 since they're only set for crtc_state->adjusted_mode, while
>crtc->mode is assigned to crtc_state->mode? You'd probably need to look at dev-
>>vblank[crtc->pipe].hwmode for those, which should have been passed as
>parameter..
>>> With a bit of luck you could even use the derived ns values set in
>drm_calc_timestamping_constants, so you don't have to do the math here
>yourself.
>> IIRC the derived values ended up having quite a bit of rounding error
>> in them. Or maybe that just the pixel duration (which IIRC I nuked
>> already). I would have nuked the line duration too if it wasn't for
>> nouveau using it. So I think I'd rather not expand its use.
>>
>> Not sure we'd actually save anything by using the derived value anyway.
>> We'll still have to do the expensive division at least. Hmm. I guess
>> we could get rid of one multiplication.
>>

We can use the vblank->mode here to get the timings. Let's not use the
derived stuff, due to any rounding dependency. With the current stuff, we are
getting pretty accurate results. Hope this is ok ?

>>>> +	WARN_ON(!crtc_vtotal);
>>>> +	if (!crtc_vtotal)
>>>> +		return scanline;
>>>> +
>>>> +	/* To avoid the race condition where we might cross into the
>>>> +	 * next vblank just between the PIPE_FRMTMSTMP and TIMESTAMP_CTR
>>>> +	 * reads. We make sure we read PIPE_FRMTMSTMP and
>TIMESTAMP_CTR
>>>> +	 * during the same frame.
>>>> +	 */
>>>> +	do {
>>>> +		/*
>>>> +		 * This field provides read back of the display
>>>> +		 * pipe frame time stamp. The time stamp value
>>>> +		 * is sampled at every start of vertical blank.
>>>> +		 */
>>>> +		scan_prev_time = I915_READ_FW(PIPE_FRMTMSTMP(crtc-
>>pipe));
>>>> +
>>>> +		/*
>>>> +		 * The TIMESTAMP_CTR register has the current
>>>> +		 * time stamp value.
>>>> +		 */
>>>> +		scan_curr_time = I915_READ_FW(GEN7_TIMESTAMP_CTR);
>>>> +
>>>> +		scan_post_time = I915_READ_FW(PIPE_FRMTMSTMP(crtc-
>>pipe));
>>>> +	} while (scan_post_time != scan_prev_time);
>>>> +
>>>> +	/*
>>>> +	 * Since the register is 32 bit and the values
>>>> +	 * can overflow and wrap around, making sure
>>>> +	 * current time accounts for the register
>>>> +	 * wrap
>>>> +	 */
>>>> +	if (scan_curr_time < scan_prev_time)
>>>> +		scan_curr_time += 0x100000000;
>>>> +
>>>> +	scanline = div_u64(mul_u64_u32_shr((scan_curr_time -
>>>> +scan_prev_time),
>>> Isn't mul_u64_u32_div exactly what you want here?
>> I think we'd actually want a mul_u32_u32_div(). But that doesn't seem
>> to exist.
>Yeah, couldn't find it either. :(
>
>Why do we even use the macros, can't we simply do the multiplications and
>divide without?
>i915 is only on x86 anyway. :)

mul_u32_u32 handles it differently using x86 assembly instructions (arch/x86/include/asm/div64.h).
I guess it's more precise than normal u32*u32 multiplication.  

Regards,
Uma Shankar

>>>> +					crtc_clock, 0), 1000 * crtc_htotal);
>>>> +	scanline = min(scanline, (u64)(crtc_vtotal - 1));
>>>> +	scanline = (scanline + crtc_vblank_start) % crtc_vtotal;
>>>> +
>>>> +	return scanline;
>>>> +}
>>>> +
>>>>  int intel_plane_atomic_calc_changes(const struct intel_crtc_state
>*old_crtc_state,
>>>>  				    struct drm_crtc_state *crtc_state,
>>>>  				    const struct intel_plane_state
>*old_plane_state,
>>>
>>> _______________________________________________
>>> Intel-gfx mailing list
>>> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
>>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx