Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 13, 2017 at 4:40 PM, Ralf Baechle <ralf@xxxxxxxxxxxxxx> wrote:
> On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote:
>
>> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
>> only flush Node-0's scache. So we add r4k_blast_scache_node() by using
>> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.
>>
>> Cc: stable@xxxxxxxxxxxxxxx
>> Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx>
>> ---
>>  arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
>>  arch/mips/mm/c-r4k.c             | 33 ++++++++++++++++++++++++++++++++-
>>  2 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
>> index 7f12d7e..aa615e3 100644
>> --- a/arch/mips/include/asm/r4kcache.h
>> +++ b/arch/mips/include/asm/r4kcache.h
>> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
>>  __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
>>  __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
>>
>> +#ifdef CONFIG_CPU_LOONGSON3
>> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize)   \
>> +static inline void blast_##pfx##cache##lsize##_node(long node)               \
>> +{                                                                    \
>> +     unsigned long start = CAC_BASE | (node << 44);                  \
>> +     unsigned long end = start + current_cpu_data.desc.waysize;      \
>> +     unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit;     \
>> +     unsigned long ws_end = current_cpu_data.desc.ways <<            \
>> +                            current_cpu_data.desc.waybit;            \
>> +     unsigned long ws, addr;                                         \
>> +                                                                     \
>> +     __##pfx##flush_prologue                                         \
>> +                                                                     \
>> +     for (ws = 0; ws < ws_end; ws += ws_inc)                         \
>> +             for (addr = start; addr < end; addr += lsize * 32)      \
>> +                     cache##lsize##_unroll32(addr|ws, indexop);      \
>> +                                                                     \
>> +     __##pfx##flush_epilogue                                         \
>> +}
>> +
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
>> +#endif
>
> This all expand to just inline functions which generate no code if they're
> unused, so you can drop the #ifdef.
>
> However a comment explaining why this function is only required for
> Loongson 3 would be great!
Address space is very specific to cpu-type. I don't know whether other
cpus need r4k_blast_scache_node(), and I don't know how to implement
r4k_blast_scache_node() for other cpus either (if they really need
this). So, I use #ifdefs.

>
>> +
>>  #endif /* _ASM_R4KCACHE_H */
>> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
>> index 3fe99cb..0a49af0 100644
>> --- a/arch/mips/mm/c-r4k.c
>> +++ b/arch/mips/mm/c-r4k.c
>> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
>>               r4k_blast_scache = blast_scache128;
>>  }
>>
>> +static void (* r4k_blast_scache_node)(long node);
>> +
>> +static void r4k_blast_scache_node_setup(void)
>> +{
>> +     unsigned long sc_lsize = cpu_scache_line_size();
>> +
>> +     r4k_blast_scache_node = (void *)cache_noop;
>> +#ifdef CONFIG_CPU_LOONGSON3
>> +     if (sc_lsize == 16)
>> +             r4k_blast_scache_node = blast_scache16_node;
>> +     else if (sc_lsize == 32)
>> +             r4k_blast_scache_node = blast_scache32_node;
>> +     else if (sc_lsize == 64)
>> +             r4k_blast_scache_node = blast_scache64_node;
>> +     else if (sc_lsize == 128)
>> +             r4k_blast_scache_node = blast_scache128_node;
>> +#endif
>
> No #idefs please.  Instead you can check the CPU type with something like
>
>         if (current_cpu_type() = CPU_LOONGSON3) {
>                 ...
>         }
>
> __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC
> knows it can optimize things for the CPU type(s) in use.
>
>> +
>>  static inline void local_r4k___flush_cache_all(void * args)
>>  {
>>       switch (current_cpu_type()) {
>>       case CPU_LOONGSON2:
>> -     case CPU_LOONGSON3:
>>       case CPU_R4000SC:
>>       case CPU_R4000MC:
>>       case CPU_R4400SC:
>> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
>>               r4k_blast_scache();
>>               break;
>>
>> +     case CPU_LOONGSON3:
>> +             r4k_blast_scache_node(get_ebase_cpunum() >> 2);
>> +             break;
>> +
>>       case CPU_BMIPS5000:
>>               r4k_blast_scache();
>>               __sync();
>> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
>>       preempt_disable();
>>       if (cpu_has_inclusive_pcaches) {
>>               if (size >= scache_size)
>> +#ifndef CONFIG_CPU_LOONGSON3
>>                       r4k_blast_scache();
>> +#else
>> +                     r4k_blast_scache_node((addr >> 44) & 0xF);
>> +#endif
>
> Ditto.
>
>>               else
>>                       blast_scache_range(addr, addr + size);
>>               preempt_enable();
>> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
>>       preempt_disable();
>>       if (cpu_has_inclusive_pcaches) {
>>               if (size >= scache_size)
>> +#ifndef CONFIG_CPU_LOONGSON3
>>                       r4k_blast_scache();
>> +#else
>> +                     r4k_blast_scache_node((addr >> 44) & 0xF);
>> +#endif
>
> Ditto.
>
>>               else {
>>                       /*
>>                        * There is no clearly documented alignment requirement
>> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
>>       r4k_blast_scache_page_setup();
>>       r4k_blast_scache_page_indexed_setup();
>>       r4k_blast_scache_setup();
>> +     r4k_blast_scache_node_setup();
>>  #ifdef CONFIG_EVA
>>       r4k_blast_dcache_user_page_setup();
>>       r4k_blast_icache_user_page_setup();
>
>   Ralf
>



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]