On Tue, Jun 13, 2017 at 4:40 PM, Ralf Baechle <ralf@xxxxxxxxxxxxxx> wrote: > On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote: > >> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can >> only flush Node-0's scache. So we add r4k_blast_scache_node() by using >> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address. >> >> Cc: stable@xxxxxxxxxxxxxxx >> Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx> >> --- >> arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++ >> arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++- >> 2 files changed, 58 insertions(+), 1 deletion(-) >> >> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h >> index 7f12d7e..aa615e3 100644 >> --- a/arch/mips/include/asm/r4kcache.h >> +++ b/arch/mips/include/asm/r4kcache.h >> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , ) >> __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , ) >> __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , ) >> >> +#ifdef CONFIG_CPU_LOONGSON3 >> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \ >> +static inline void blast_##pfx##cache##lsize##_node(long node) \ >> +{ \ >> + unsigned long start = CAC_BASE | (node << 44); \ >> + unsigned long end = start + current_cpu_data.desc.waysize; \ >> + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \ >> + unsigned long ws_end = current_cpu_data.desc.ways << \ >> + current_cpu_data.desc.waybit; \ >> + unsigned long ws, addr; \ >> + \ >> + __##pfx##flush_prologue \ >> + \ >> + for (ws = 0; ws < ws_end; ws += ws_inc) \ >> + for (addr = start; addr < end; addr += lsize * 32) \ >> + cache##lsize##_unroll32(addr|ws, indexop); \ >> + \ >> + __##pfx##flush_epilogue \ >> +} >> + >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32) >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) >> +#endif > > This all expand to just inline functions which generate no code if they're > unused, so you can drop the #ifdef. > > However a comment explaining why this function is only required for > Loongson 3 would be great! Address space is very specific to cpu-type. I don't know whether other cpus need r4k_blast_scache_node(), and I don't know how to implement r4k_blast_scache_node() for other cpus either (if they really need this). So, I use #ifdefs. > >> + >> #endif /* _ASM_R4KCACHE_H */ >> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c >> index 3fe99cb..0a49af0 100644 >> --- a/arch/mips/mm/c-r4k.c >> +++ b/arch/mips/mm/c-r4k.c >> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void) >> r4k_blast_scache = blast_scache128; >> } >> >> +static void (* r4k_blast_scache_node)(long node); >> + >> +static void r4k_blast_scache_node_setup(void) >> +{ >> + unsigned long sc_lsize = cpu_scache_line_size(); >> + >> + r4k_blast_scache_node = (void *)cache_noop; >> +#ifdef CONFIG_CPU_LOONGSON3 >> + if (sc_lsize == 16) >> + r4k_blast_scache_node = blast_scache16_node; >> + else if (sc_lsize == 32) >> + r4k_blast_scache_node = blast_scache32_node; >> + else if (sc_lsize == 64) >> + r4k_blast_scache_node = blast_scache64_node; >> + else if (sc_lsize == 128) >> + r4k_blast_scache_node = blast_scache128_node; >> +#endif > > No #idefs please. Instead you can check the CPU type with something like > > if (current_cpu_type() = CPU_LOONGSON3) { > ... > } > > __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC > knows it can optimize things for the CPU type(s) in use. > >> + >> static inline void local_r4k___flush_cache_all(void * args) >> { >> switch (current_cpu_type()) { >> case CPU_LOONGSON2: >> - case CPU_LOONGSON3: >> case CPU_R4000SC: >> case CPU_R4000MC: >> case CPU_R4400SC: >> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args) >> r4k_blast_scache(); >> break; >> >> + case CPU_LOONGSON3: >> + r4k_blast_scache_node(get_ebase_cpunum() >> 2); >> + break; >> + >> case CPU_BMIPS5000: >> r4k_blast_scache(); >> __sync(); >> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) >> preempt_disable(); >> if (cpu_has_inclusive_pcaches) { >> if (size >= scache_size) >> +#ifndef CONFIG_CPU_LOONGSON3 >> r4k_blast_scache(); >> +#else >> + r4k_blast_scache_node((addr >> 44) & 0xF); >> +#endif > > Ditto. > >> else >> blast_scache_range(addr, addr + size); >> preempt_enable(); >> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) >> preempt_disable(); >> if (cpu_has_inclusive_pcaches) { >> if (size >= scache_size) >> +#ifndef CONFIG_CPU_LOONGSON3 >> r4k_blast_scache(); >> +#else >> + r4k_blast_scache_node((addr >> 44) & 0xF); >> +#endif > > Ditto. > >> else { >> /* >> * There is no clearly documented alignment requirement >> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void) >> r4k_blast_scache_page_setup(); >> r4k_blast_scache_page_indexed_setup(); >> r4k_blast_scache_setup(); >> + r4k_blast_scache_node_setup(); >> #ifdef CONFIG_EVA >> r4k_blast_dcache_user_page_setup(); >> r4k_blast_icache_user_page_setup(); > > Ralf >