Re: [PATCH RFC]OMAP3:PM:Dynamic Calculation of SDRC stall latency during DVFS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



"ext Reddy, Teerth" <teerth@xxxxxx> writes:

> From: Teerth Reddy <teerth@xxxxxx>
>
> Dynamic Calculation of SDRC stall latency during DVFS
>
> The patch has the changes to calculate the dpll3 clock stabilization delay dynamically. The SRAM delay is calibrated during bootup using the gptimers and used while calculating the stabilization delay. By using the dynamic method the dependency on the type of cache being used is removed. Hence there is no need of loop based calculation.
>
> The wait time for L3 clock stabilization is calculated using the formula : 4*REFCLK + 8*CLKOUTX2, which uses the M, N and M2 read from the registers.Since this value gives slightly less value, 2us is added as buffer for safety.
> This works fine for omap3. 

I think you could make a difference on 3630 in this patch. 3630 has
different formula to calculate needed delay after setting m2 divider.

>
> Signed-off-by: Teerth Reddy <teerth@xxxxxx>
> Signed-off-by: Romit Dasgupta <romit@xxxxxx>
> ---
>  arch/arm/mach-omap2/clkt34xx_dpll3m2.c    |   52 ++++++++++++++++++++++++-----
>  arch/arm/mach-omap2/clock34xx.h           |    2 +
>  arch/arm/mach-omap2/clock34xx_data.c      |   11 ++++++
>  arch/arm/mach-omap2/sram34xx.S            |   17 +++++++++
>  arch/arm/plat-omap/dmtimer.c              |    6 +++
>  arch/arm/plat-omap/include/plat/dmtimer.h |    1 +
>  arch/arm/plat-omap/include/plat/sram.h    |    5 +++
>  arch/arm/plat-omap/sram.c                 |   51 ++++++++++++++++++++++++++++
>  8 files changed, 136 insertions(+), 9 deletions(-)
>
> diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
> index 8716a01..2e6d774 100644
> --- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
> +++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
> @@ -24,13 +24,22 @@
>  #include <plat/clock.h>
>  #include <plat/sram.h>
>  #include <plat/sdrc.h>
> +#include <plat/prcm.h>
>  
>  #include "clock.h"
>  #include "clock34xx.h"
>  #include "sdrc.h"
> +#include "cm.h"
>  
>  #define CYCLES_PER_MHZ			1000000
>  
> +#define	DPLL_M_MASK	0x7ff
> +#define	DPLL_N_MASK	0x7f
> +#define	DPLL_M2_MASK	0x1f
> +#define	SHIFT_DPLL_M	16
> +#define	SHIFT_DPLL_N	8
> +#define	SHIFT_DPLL_M2	27
> +
>  /*
>   * CORE DPLL (DPLL3) M2 divider rate programming functions
>   *
> @@ -55,6 +64,11 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
>  	struct omap_sdrc_params *sdrc_cs0;
>  	struct omap_sdrc_params *sdrc_cs1;
>  	int ret;
> +	u32 clk_sel_regval;
> +	u32 core_dpll_mul_m, core_dpll_div_n, core_dpll_clkoutdiv_m2;
> +	u32 sys_clk_rate, sdrc_clk_stab;
> +	u32 nr1, nr2, nr, dr;
> +	unsigned int delay_sram;
>  
>  	if (!clk || !rate)
>  		return -EINVAL;
> @@ -78,16 +92,36 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
>  		unlock_dll = 1;
>  	}
>  
> -	/*
> -	 * XXX This only needs to be done when the CPU frequency changes
> -	 */
> +	clk_sel_regval = cm_read_mod_reg(PLL_MOD, CM_CLKSEL);
> +
> +	/* Get the M, N and M2 values required for getting sdrc clk stab */
> +	core_dpll_mul_m = (clk_sel_regval >> SHIFT_DPLL_M) & DPLL_M_MASK;
> +	core_dpll_div_n = (clk_sel_regval >> SHIFT_DPLL_N) & DPLL_N_MASK;
> +	core_dpll_clkoutdiv_m2 = (clk_sel_regval >> SHIFT_DPLL_M2) &
> +								DPLL_M2_MASK;
> +	sys_clk_rate = clk_get_rate(clk_get(NULL, "osc_sys_ck"));
> +
> +	sys_clk_rate = sys_clk_rate / 1000000;
> +
> +	/* wait time for L3 clk stabilization = 4*REFCLK + 8*CLKOUTX2 */
> +	nr1 = (4 * (core_dpll_div_n + 1) * 2 * core_dpll_clkoutdiv_m2 *
> +							 core_dpll_mul_m);
> +	nr2 = 8 * (core_dpll_div_n + 1);
> +	nr = nr1 + nr2;
> +
> +	dr = 2 * sys_clk_rate * core_dpll_mul_m * core_dpll_clkoutdiv_m2;
> +
> +	sdrc_clk_stab = nr / dr;
> +
> +	/* Adding 2us to sdrc clk stab */
> +	sdrc_clk_stab = sdrc_clk_stab + 2;
> +
> +	delay_sram = delay_sram_val();
> +
> +	/* Calculate the number of MPU cycles to wait for SDRC to stabilize */
>  	_mpurate = arm_fck_p->rate / CYCLES_PER_MHZ;
> -	c = (_mpurate << SDRC_MPURATE_SCALE) >> SDRC_MPURATE_BASE_SHIFT;
> -	c += 1;  /* for safety */
> -	c *= SDRC_MPURATE_LOOPS;
> -	c >>= SDRC_MPURATE_SCALE;
> -	if (c == 0)
> -		c = 1;
> +
> +	c = ((sdrc_clk_stab * _mpurate) / (delay_sram * 2));
>  
>  	pr_debug("clock: changing CORE DPLL rate from %lu to %lu\n", clk->rate,
>  		 validrate);
> diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h index 313efc0..97afe34 100644
> --- a/arch/arm/mach-omap2/clock34xx.h
> +++ b/arch/arm/mach-omap2/clock34xx.h
> @@ -22,4 +22,6 @@ extern const struct clkops clkops_omap3430es2_hsotgusb_wait;  extern const struct clkops clkops_omap3430es2_dss_usbhost_wait;
>  extern const struct clkops omap3_clkops_noncore_dpll_ops;
>  
> +unsigned int delay_sram_val(void);
> +
>  #endif
> diff --git a/arch/arm/mach-omap2/clock34xx_data.c b/arch/arm/mach-omap2/clock34xx_data.c
> index 8728f1f..cf7384b 100644
> --- a/arch/arm/mach-omap2/clock34xx_data.c
> +++ b/arch/arm/mach-omap2/clock34xx_data.c
> @@ -22,6 +22,7 @@
>  
>  #include <plat/control.h>
>  #include <plat/clkdev_omap.h>
> +#include <plat/sram.h>
>  
>  #include "clock.h"
>  #include "clock34xx.h"
> @@ -52,6 +53,8 @@
>  static struct clk dpll1_fck;
>  static struct clk dpll2_fck;
>  
> +unsigned int delay_sram;
> +
>  /* PRM CLOCKS */
>  
>  /* According to timer32k.c, this is a 32768Hz clock, not a 32000Hz clock. */ @@ -3275,5 +3278,13 @@ int __init omap3xxx_clk_init(void)
>  	sdrc_ick_p = clk_get(NULL, "sdrc_ick");
>  	arm_fck_p = clk_get(NULL, "arm_fck");
>  
> +	/* Measure sram delay */
> +	delay_sram = measure_sram_delay(10000);
> +	pr_debug("SRAM delay: %d\n", delay_sram);
>  	return 0;
>  }
> +
> +unsigned int delay_sram_val(void)
> +{
> +	return delay_sram;
> +}
> diff --git a/arch/arm/mach-omap2/sram34xx.S b/arch/arm/mach-omap2/sram34xx.S index de99ba2..bbeef26 100644
> --- a/arch/arm/mach-omap2/sram34xx.S
> +++ b/arch/arm/mach-omap2/sram34xx.S
> @@ -313,3 +313,20 @@ core_m2_mask_val:
>  ENTRY(omap3_sram_configure_core_dpll_sz)
>  	.word	. - omap3_sram_configure_core_dpll
>  
> +ENTRY(__sram_wait_delay)
> +	stmfd	sp!, {r1-r12, lr}	@ store regs to stack
> +	ldr	r2, [r0]
> +
> +loop1:
> +	subs 	r1, r1, #1
> +	bne	loop1
> +
> +	isb
> +	ldr	r3, [r0]
> +	subs	r4, r3, r2
> +
> +	mov 	r0, r4 			@ return value
> +	ldmfd	sp!, {r1-r12, pc}	@ restore regs and return
> +
> +ENTRY(__sram_wait_delay_sz)
> +	.word	. - __sram_wait_delay
> diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index 24bf692..d00a44a 100644
> --- a/arch/arm/plat-omap/dmtimer.c
> +++ b/arch/arm/plat-omap/dmtimer.c
> @@ -712,6 +712,12 @@ void omap_dm_timer_write_counter(struct omap_dm_timer *timer, unsigned int value  }  EXPORT_SYMBOL_GPL(omap_dm_timer_write_counter);
>  
> +unsigned int omap_dm_timer_get_phys_base(unsigned int gptimer) {
> +	return dm_timers[gptimer - 1].phys_base; } 
> +EXPORT_SYMBOL_GPL(omap_dm_timer_get_phys_base);
> +
>  int omap_dm_timers_active(void)
>  {
>  	int i;
> diff --git a/arch/arm/plat-omap/include/plat/dmtimer.h b/arch/arm/plat-omap/include/plat/dmtimer.h
> index 20f1054..f75d43e 100644
> --- a/arch/arm/plat-omap/include/plat/dmtimer.h
> +++ b/arch/arm/plat-omap/include/plat/dmtimer.h
> @@ -55,6 +55,7 @@ void omap_dm_timer_free(struct omap_dm_timer *timer);  void omap_dm_timer_enable(struct omap_dm_timer *timer);  void omap_dm_timer_disable(struct omap_dm_timer *timer);
>  
> +unsigned int omap_dm_timer_get_phys_base(unsigned int gptimer);
>  int omap_dm_timer_get_irq(struct omap_dm_timer *timer);
>  
>  u32 omap_dm_timer_modify_idlect_mask(u32 inputmask); diff --git a/arch/arm/plat-omap/include/plat/sram.h b/arch/arm/plat-omap/include/plat/sram.h
> index 16a1b45..3ee366c 100644
> --- a/arch/arm/plat-omap/include/plat/sram.h
> +++ b/arch/arm/plat-omap/include/plat/sram.h
> @@ -69,6 +69,11 @@ extern u32 omap3_sram_configure_core_dpll(
>  			u32 sdrc_actim_ctrl_b_1, u32 sdrc_mr_1);  extern unsigned long omap3_sram_configure_core_dpll_sz;
>  
> +extern unsigned int measure_sram_delay(unsigned int);
> +
> +extern u32 __sram_wait_delay(unsigned int, unsigned int); extern 
> +unsigned long __sram_wait_delay_sz;
> +
>  #ifdef CONFIG_PM
>  extern void omap_push_sram_idle(void);
>  #else
> diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 51f4dfb..e541e8f 100644
> --- a/arch/arm/plat-omap/sram.c
> +++ b/arch/arm/plat-omap/sram.c
> @@ -30,6 +30,9 @@
>  #include <plat/cpu.h>
>  #include <plat/vram.h>
>  
> +#include <linux/clk.h>
> +#include <plat/dmtimer.h>
> +#include <plat/io.h>
>  #include <plat/control.h>
>  
>  #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) @@ -74,6 +77,9 @@
>  
>  #define ROUND_DOWN(value,boundary)	((value) & (~((boundary)-1)))
>  
> +/* GPT10 TCRR register offset */
> +#define OMAP_TIMER_COUNTER_OFFSET	0x28
> +
>  static unsigned long omap_sram_start;
>  static unsigned long omap_sram_base;
>  static unsigned long omap_sram_size;
> @@ -437,11 +443,56 @@ static inline int omap34xx_sram_init(void)  }  #endif
>  
> +
> +#ifdef CONFIG_ARCH_OMAP3
> +unsigned long (*_omap3_sram_delay)(void * __iomem, unsigned int); 
> +unsigned int  measure_sram_delay(unsigned int loop) {
> +	static struct omap_dm_timer *gpt;
> +	unsigned long flags, diff = 0, gt_rate, mpurate;
> +	unsigned int delay_sram, error_gain;
> +	void * __iomem gpt10_counter_reg;
> +
> +	omap_dm_timer_init();
> +	gpt = omap_dm_timer_request_specific(10);
> +	if (!gpt)
> +		pr_err("Could not get the gptimer\n");
> +	omap_dm_timer_set_source(gpt, OMAP_TIMER_SRC_SYS_CLK);
> +
> +	gpt10_counter_reg =
> +			OMAP2_L4_IO_ADDRESS(omap_dm_timer_get_phys_base(10) +
> +					OMAP_TIMER_COUNTER_OFFSET);
> +
> +	gt_rate = clk_get_rate(omap_dm_timer_get_fclk(gpt));
> +	omap_dm_timer_set_load_start(gpt, 0, 0);
> +
> +	local_irq_save(flags);
> +	diff = _omap3_sram_delay(gpt10_counter_reg, loop);
> +	local_irq_restore(flags);
> +
> +	omap_dm_timer_stop(gpt);
> +	omap_dm_timer_free(gpt);
> +
> +	mpurate = clk_get_rate(clk_get(NULL, "arm_fck"));
> +
> +	/* calculate the sram delay */
> +	delay_sram = (((mpurate / gt_rate) * diff) / (loop * 2));
> +
> +	error_gain = mpurate / gt_rate;
> +	delay_sram = delay_sram + error_gain;
> +
> +	return delay_sram;
> +}
> +#endif
> +
>  int __init omap_sram_init(void)
>  {
>  	omap_detect_sram();
>  	omap_map_sram();
>  
> +	_omap3_sram_delay = omap_sram_push(__sram_wait_delay,
> +						__sram_wait_delay_sz);
> +
>  	if (!(cpu_class_is_omap2()))
>  		omap1_sram_init();
>  	else if (cpu_is_omap242x())
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Jouni Högander
--
To unsubscribe from this list: send the line "unsubscribe linux-omap" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Arm (vger)]     [ARM Kernel]     [ARM MSM]     [Linux Tegra]     [Linux WPAN Networking]     [Linux Wireless Networking]     [Maemo Users]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux