On Mon, Dec 20, 2010 at 7:43 AM, Santosh Shilimkar <santosh.shilimkar@xxxxxx> wrote: >> -----Original Message----- >> From: linux-omap-owner@xxxxxxxxxxxxxxx [mailto:linux-omap- >> owner@xxxxxxxxxxxxxxx] On Behalf Of Nishanth Menon >> Sent: Sunday, December 19, 2010 4:24 AM >> To: linux-omap; linux-arm >> Cc: Jean Pihet; Kevin; Tony >> Subject: [PATCH v4 1/7] OMAP3: PM: Update clean_l2 to use >> v7_flush_dcache_all >> >> From: Richard Woodruff <r-woodruff2@xxxxxx> >> >> Analysis in TI kernel with ETM showed that using cache mapped flush >> in kernel instead of SO mapped flush cost drops by 65% (3.39mS down >> to 1.17mS) for clean_l2 which is used during sleep sequences. >> Overall: >> - speed up >> - unfortunately there isn't a good alternative flush method today >> - code reduction and less maintenance and potential bug in >> unmaintained code >> >> This also fixes the bug with the clean_l2 function usage. >> >> Reported-by: Tony Lindgren <tony@xxxxxxxxxxx> >> >> Cc: Kevin Hilman <khilman@xxxxxxxxxxxxxxxxxxx> >> Cc: Tony Lindgren <tony@xxxxxxxxxxx> >> >> [nm@xxxxxx: ported rkw's proposal to 2.6.37-rc2] >> Signed-off-by: Nishanth Menon <nm@xxxxxx> >> Signed-off-by: Richard Woodruff <r-woodruff2@xxxxxx> >> --- >> (no change in this series, posted for completeness) >> v2: https://patchwork.kernel.org/patch/365222/ >> v1: http://marc.info/?l=linux-omap&m=129013171325210&w=2 >> arch/arm/mach-omap2/sleep34xx.S | 79 > ++++++---------------------------- >> ---- >> 1 files changed, 13 insertions(+), 66 deletions(-) >> >> diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach- >> omap2/sleep34xx.S >> index 2fb205a..2c20fcf 100644 >> --- a/arch/arm/mach-omap2/sleep34xx.S >> +++ b/arch/arm/mach-omap2/sleep34xx.S >> @@ -520,72 +520,17 @@ clean_caches: >> cmp r9, #1 /* Check whether L2 inval is required or not*/ >> bne skip_l2_inval >> clean_l2: >> - /* read clidr */ >> - mrc p15, 1, r0, c0, c0, 1 >> - /* extract loc from clidr */ >> - ands r3, r0, #0x7000000 >> - /* left align loc bit field */ >> - mov r3, r3, lsr #23 >> - /* if loc is 0, then no need to clean */ >> - beq finished >> - /* start clean at cache level 0 */ >> - mov r10, #0 >> -loop1: >> - /* work out 3x current cache level */ >> - add r2, r10, r10, lsr #1 >> - /* extract cache type bits from clidr*/ >> - mov r1, r0, lsr r2 >> - /* mask of the bits for current cache only */ >> - and r1, r1, #7 >> - /* see what cache we have at this level */ >> - cmp r1, #2 >> - /* skip if no cache, or just i-cache */ >> - blt skip >> - /* select current cache level in cssr */ >> - mcr p15, 2, r10, c0, c0, 0 >> - /* isb to sych the new cssr&csidr */ >> - isb >> - /* read the new csidr */ >> - mrc p15, 1, r1, c0, c0, 0 >> - /* extract the length of the cache lines */ >> - and r2, r1, #7 >> - /* add 4 (line length offset) */ >> - add r2, r2, #4 >> - ldr r4, assoc_mask >> - /* find maximum number on the way size */ >> - ands r4, r4, r1, lsr #3 >> - /* find bit position of way size increment */ >> - clz r5, r4 >> - ldr r7, numset_mask >> - /* extract max number of the index size*/ >> - ands r7, r7, r1, lsr #13 >> -loop2: >> - mov r9, r4 >> - /* create working copy of max way size*/ >> -loop3: >> - /* factor way and cache number into r11 */ >> - orr r11, r10, r9, lsl r5 >> - /* factor index number into r11 */ >> - orr r11, r11, r7, lsl r2 >> - /*clean & invalidate by set/way */ >> - mcr p15, 0, r11, c7, c10, 2 >> - /* decrement the way*/ >> - subs r9, r9, #1 >> - bge loop3 >> - /*decrement the index */ >> - subs r7, r7, #1 >> - bge loop2 >> -skip: >> - add r10, r10, #2 >> - /* increment cache number */ >> - cmp r3, r10 >> - bgt loop1 >> -finished: >> - /*swith back to cache level 0 */ >> - mov r10, #0 >> - /* select current cache level in cssr */ >> - mcr p15, 2, r10, c0, c0, 0 >> - isb >> + /* >> + * Jump out to kernel flush routine >> + * - reuse that code is better >> + * - it executes in a cached space so is faster than refetch per- >> block >> + * - should be faster and will change with kernel >> + * - 'might' have to copy address, load and jump to it > Would be good to clarify that this is needed to maintain the 'lr' > when code is executed from SRAM > Agree on that. Some comments have been posted at http://marc.info/?l=linux-omap&m=129016170719489&w=2. >> + */ >> + ldr r1, kernel_flush >> + mov lr, pc >> + bx r1 >> + >> skip_l2_inval: >> /* Data memory barrier and Data sync barrier */ >> mov r1, #0 >> @@ -668,5 +613,7 @@ cache_pred_disable_mask: >> .word 0xFFFFE7FB >> control_stat: >> .word CONTROL_STAT >> +kernel_flush: >> + .word v7_flush_dcache_all >> ENTRY(omap34xx_cpu_suspend_sz) >> .word . - omap34xx_cpu_suspend > > O.w > Acked-by: Santosh Shilimkar <santosh.shilimkar@xxxxxx> Acked-by: Jean Pihet <j-pihet@xxxxxx> > >> -- >> 1.6.3.3 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-omap" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html > Jean -- To unsubscribe from this list: send the line "unsubscribe linux-omap" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html