On Tue, Jan 26, 2016 at 09:26:24PM +0800, Huacai Chen wrote: > New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A R1, > Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as FTLB, > L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User Local > register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer), Fast > TLB refill support, etc. > > This patch introduce a config option, CONFIG_LOONGSON3_ENHANCEMENT, to > enable those enhancements which cannot be probed at run time. If you > want a generic kernel to run on all Loongson 3 machines, please say 'N' > here. If you want a high-performance kernel to run on new Loongson 3 > machines only, please say 'Y' here. > > Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx> > --- > arch/mips/Kconfig | 18 ++++++++++++++++++ > arch/mips/include/asm/hazards.h | 7 ++++--- > arch/mips/include/asm/io.h | 10 +++++----- > arch/mips/include/asm/irqflags.h | 5 +++++ > .../include/asm/mach-loongson64/kernel-entry-init.h | 12 ++++++++++++ > arch/mips/mm/c-r4k.c | 3 +++ > arch/mips/mm/page.c | 9 +++++++++ > 7 files changed, 56 insertions(+), 8 deletions(-) > > diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig > index 15faaf0..e6d6f7b 100644 > --- a/arch/mips/Kconfig > +++ b/arch/mips/Kconfig > @@ -1349,6 +1349,24 @@ config CPU_LOONGSON3 > The Loongson 3 processor implements the MIPS64R2 instruction > set with many extensions. > > +config LOONGSON3_ENHANCEMENT > + bool "New Loongson 3 CPU Enhancements" > + default n no need, n is the default. > + select CPU_MIPSR2 > + select CPU_HAS_PREFETCH > + depends on CPU_LOONGSON3 > + help > + New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A > + R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as > + FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User > + Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer), > + Fast TLB refill support, etc. > + > + This option enable those enhancements which cannot be probed at run > + time. If you want a generic kernel to run on all Loongson 3 machines, > + please say 'N' here. If you want a high-performance kernel to run on > + new Loongson 3 machines only, please say 'Y' here. > + > config CPU_LOONGSON2E > bool "Loongson 2E" > depends on SYS_HAS_CPU_LOONGSON2E > diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h > index 7b99efd..dbb1eb6 100644 > --- a/arch/mips/include/asm/hazards.h > +++ b/arch/mips/include/asm/hazards.h > @@ -22,7 +22,8 @@ > /* > * TLB hazards > */ > -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) && !defined(CONFIG_CPU_CAVIUM_OCTEON) > +#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \ > + !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT) > > /* > * MIPSR2 defines ehb for hazard avoidance > @@ -155,8 +156,8 @@ do { \ > } while (0) > > #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \ > - defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_CPU_R10000) || \ > - defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR) > + defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \ > + defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR) > > /* > * R10000 rocks - all hazards handled in hardware, so this becomes a nobrainer. > diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h > index 2b4dc7a..ecabc00 100644 > --- a/arch/mips/include/asm/io.h > +++ b/arch/mips/include/asm/io.h > @@ -304,10 +304,10 @@ static inline void iounmap(const volatile void __iomem *addr) > #undef __IS_KSEG1 > } > > -#ifdef CONFIG_CPU_CAVIUM_OCTEON > -#define war_octeon_io_reorder_wmb() wmb() > +#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT) > +#define war_io_reorder_wmb() wmb() > #else > -#define war_octeon_io_reorder_wmb() do { } while (0) > +#define war_io_reorder_wmb() do { } while (0) > #endif Doesn't this slow things down when enabled, or is it required due to STFill buffer being enabled or something? > > #define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq) \ > @@ -318,7 +318,7 @@ static inline void pfx##write##bwlq(type val, \ > volatile type *__mem; \ > type __val; \ > \ > - war_octeon_io_reorder_wmb(); \ > + war_io_reorder_wmb(); \ > \ > __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ > \ > @@ -387,7 +387,7 @@ static inline void pfx##out##bwlq##p(type val, unsigned long port) \ > volatile type *__addr; \ > type __val; \ > \ > - war_octeon_io_reorder_wmb(); \ > + war_io_reorder_wmb(); \ > \ > __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \ > \ > diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h > index 65c351e..12f80b5 100644 > --- a/arch/mips/include/asm/irqflags.h > +++ b/arch/mips/include/asm/irqflags.h > @@ -41,7 +41,12 @@ static inline unsigned long arch_local_irq_save(void) > " .set push \n" > " .set reorder \n" > " .set noat \n" > +#if defined(CONFIG_LOONGSON3_ENHANCEMENT) > + " mfc0 %[flags], $12 \n" > + " di \n" Does this somehow help performance, or is it necessary when STFill buffer is enabled? > +#else > " di %[flags] \n" > +#endif > " andi %[flags], 1 \n" > " " __stringify(__irq_disable_hazard) " \n" > " .set pop \n" > diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h > index da83482..8393bc54 100644 > --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h > +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h > @@ -26,6 +26,12 @@ > mfc0 t0, $5, 1 > or t0, (0x1 << 29) > mtc0 t0, $5, 1 > +#ifdef CONFIG_LOONGSON3_ENHANCEMENT > + /* Enable STFill Buffer */ > + mfc0 t0, $16, 6 > + or t0, 0x100 > + mtc0 t0, $16, 6 > +#endif > _ehb > .set pop > #endif > @@ -46,6 +52,12 @@ > mfc0 t0, $5, 1 > or t0, (0x1 << 29) > mtc0 t0, $5, 1 > +#ifdef CONFIG_LOONGSON3_ENHANCEMENT > + /* Enable STFill Buffer */ > + mfc0 t0, $16, 6 > + or t0, 0x100 > + mtc0 t0, $16, 6 > +#endif What does the STFill buffer do? Given that you can get a portable kernel without this, can this not be done from C code depending on the PRid? > _ehb > .set pop > #endif > diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c > index 65fb28c..903d8da 100644 > --- a/arch/mips/mm/c-r4k.c > +++ b/arch/mips/mm/c-r4k.c > @@ -1170,6 +1170,9 @@ static void probe_pcache(void) > c->dcache.ways * > c->dcache.linesz; > c->dcache.waybit = 0; > +#ifdef CONFIG_CPU_HAS_PREFETCH > + c->options |= MIPS_CPU_PREFETCH; > +#endif Can't do that based on PRid? Cheers James > break; > > case CPU_CAVIUM_OCTEON3: > diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c > index 885d73f..c41953c 100644 > --- a/arch/mips/mm/page.c > +++ b/arch/mips/mm/page.c > @@ -188,6 +188,15 @@ static void set_prefetch_parameters(void) > } > break; > > + case CPU_LOONGSON3: > + /* Loongson-3 only support the Pref_Load/Pref_Store. */ > + pref_bias_clear_store = 128; > + pref_bias_copy_load = 128; > + pref_bias_copy_store = 128; > + pref_src_mode = Pref_Load; > + pref_dst_mode = Pref_Store; > + break; > + > default: > pref_bias_clear_store = 128; > pref_bias_copy_load = 256; > -- > 2.4.6 > > > > >
Attachment:
signature.asc
Description: Digital signature