On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote: > For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can > only flush Node-0's scache. So we add r4k_blast_scache_node() by using > (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address. > > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx> > --- > arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++ > arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++- > 2 files changed, 58 insertions(+), 1 deletion(-) > > diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h > index 7f12d7e..aa615e3 100644 > --- a/arch/mips/include/asm/r4kcache.h > +++ b/arch/mips/include/asm/r4kcache.h > @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , ) > __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , ) > __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , ) > > +#ifdef CONFIG_CPU_LOONGSON3 > +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \ > +static inline void blast_##pfx##cache##lsize##_node(long node) \ > +{ \ > + unsigned long start = CAC_BASE | (node << 44); \ > + unsigned long end = start + current_cpu_data.desc.waysize; \ > + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \ > + unsigned long ws_end = current_cpu_data.desc.ways << \ > + current_cpu_data.desc.waybit; \ > + unsigned long ws, addr; \ > + \ > + __##pfx##flush_prologue \ > + \ > + for (ws = 0; ws < ws_end; ws += ws_inc) \ > + for (addr = start; addr < end; addr += lsize * 32) \ > + cache##lsize##_unroll32(addr|ws, indexop); \ > + \ > + __##pfx##flush_epilogue \ > +} > + > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32) > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) > +#endif This all expand to just inline functions which generate no code if they're unused, so you can drop the #ifdef. However a comment explaining why this function is only required for Loongson 3 would be great! > + > #endif /* _ASM_R4KCACHE_H */ > diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c > index 3fe99cb..0a49af0 100644 > --- a/arch/mips/mm/c-r4k.c > +++ b/arch/mips/mm/c-r4k.c > @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void) > r4k_blast_scache = blast_scache128; > } > > +static void (* r4k_blast_scache_node)(long node); > + > +static void r4k_blast_scache_node_setup(void) > +{ > + unsigned long sc_lsize = cpu_scache_line_size(); > + > + r4k_blast_scache_node = (void *)cache_noop; > +#ifdef CONFIG_CPU_LOONGSON3 > + if (sc_lsize == 16) > + r4k_blast_scache_node = blast_scache16_node; > + else if (sc_lsize == 32) > + r4k_blast_scache_node = blast_scache32_node; > + else if (sc_lsize == 64) > + r4k_blast_scache_node = blast_scache64_node; > + else if (sc_lsize == 128) > + r4k_blast_scache_node = blast_scache128_node; > +#endif No #idefs please. Instead you can check the CPU type with something like if (current_cpu_type() = CPU_LOONGSON3) { ... } __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC knows it can optimize things for the CPU type(s) in use. > + > static inline void local_r4k___flush_cache_all(void * args) > { > switch (current_cpu_type()) { > case CPU_LOONGSON2: > - case CPU_LOONGSON3: > case CPU_R4000SC: > case CPU_R4000MC: > case CPU_R4400SC: > @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args) > r4k_blast_scache(); > break; > > + case CPU_LOONGSON3: > + r4k_blast_scache_node(get_ebase_cpunum() >> 2); > + break; > + > case CPU_BMIPS5000: > r4k_blast_scache(); > __sync(); > @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) > preempt_disable(); > if (cpu_has_inclusive_pcaches) { > if (size >= scache_size) > +#ifndef CONFIG_CPU_LOONGSON3 > r4k_blast_scache(); > +#else > + r4k_blast_scache_node((addr >> 44) & 0xF); > +#endif Ditto. > else > blast_scache_range(addr, addr + size); > preempt_enable(); > @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) > preempt_disable(); > if (cpu_has_inclusive_pcaches) { > if (size >= scache_size) > +#ifndef CONFIG_CPU_LOONGSON3 > r4k_blast_scache(); > +#else > + r4k_blast_scache_node((addr >> 44) & 0xF); > +#endif Ditto. > else { > /* > * There is no clearly documented alignment requirement > @@ -1903,6 +1933,7 @@ void r4k_cache_init(void) > r4k_blast_scache_page_setup(); > r4k_blast_scache_page_indexed_setup(); > r4k_blast_scache_setup(); > + r4k_blast_scache_node_setup(); > #ifdef CONFIG_EVA > r4k_blast_dcache_user_page_setup(); > r4k_blast_icache_user_page_setup(); Ralf