On Wed, Jan 27, 2016 at 01:02:38PM +0800, Huacai Chen wrote:
> STFill Buffer locate between core and L1 cache, it causes memory
> access out of order, so writel/outl need a barrier. Loongson 3 has a
> bug that di cannot save irqflag, so we need a mfc0.
Shouldn't it use that even without CONFIG_LOONGSON3_ENHANCEMENT then, so
as not to break the "generic kernel to run on all Loongson 3 machines"?
Cheers
James
>
> On Tue, Jan 26, 2016 at 10:19 PM, James Hogan <james.hogan@xxxxxxxxxx> wrote:
> > On Tue, Jan 26, 2016 at 09:26:24PM +0800, Huacai Chen wrote:
> >> New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A R1,
> >> Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as FTLB,
> >> L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User Local
> >> register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer), Fast
> >> TLB refill support, etc.
> >>
> >> This patch introduce a config option, CONFIG_LOONGSON3_ENHANCEMENT, to
> >> enable those enhancements which cannot be probed at run time. If you
> >> want a generic kernel to run on all Loongson 3 machines, please say 'N'
> >> here. If you want a high-performance kernel to run on new Loongson 3
> >> machines only, please say 'Y' here.
> >>
> >> Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx>
> >> ---
> >> arch/mips/Kconfig | 18 ++++++++++++++++++
> >> arch/mips/include/asm/hazards.h | 7 ++++---
> >> arch/mips/include/asm/io.h | 10 +++++-----
> >> arch/mips/include/asm/irqflags.h | 5 +++++
> >> .../include/asm/mach-loongson64/kernel-entry-init.h | 12 ++++++++++++
> >> arch/mips/mm/c-r4k.c | 3 +++
> >> arch/mips/mm/page.c | 9 +++++++++
> >> 7 files changed, 56 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> >> index 15faaf0..e6d6f7b 100644
> >> --- a/arch/mips/Kconfig
> >> +++ b/arch/mips/Kconfig
> >> @@ -1349,6 +1349,24 @@ config CPU_LOONGSON3
> >> The Loongson 3 processor implements the MIPS64R2 instruction
> >> set with many extensions.
> >>
> >> +config LOONGSON3_ENHANCEMENT
> >> + bool "New Loongson 3 CPU Enhancements"
> >> + default n
> >
> > no need, n is the default.
> >
> >> + select CPU_MIPSR2
> >> + select CPU_HAS_PREFETCH
> >> + depends on CPU_LOONGSON3
> >> + help
> >> + New Loongson 3 CPU (since Loongson-3A R2, as opposed to Loongson-3A
> >> + R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
> >> + FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPv2 ASE, User
> >> + Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
> >> + Fast TLB refill support, etc.
> >> +
> >> + This option enable those enhancements which cannot be probed at run
> >> + time. If you want a generic kernel to run on all Loongson 3 machines,
> >> + please say 'N' here. If you want a high-performance kernel to run on
> >> + new Loongson 3 machines only, please say 'Y' here.
> >> +
> >> config CPU_LOONGSON2E
> >> bool "Loongson 2E"
> >> depends on SYS_HAS_CPU_LOONGSON2E
> >> diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
> >> index 7b99efd..dbb1eb6 100644
> >> --- a/arch/mips/include/asm/hazards.h
> >> +++ b/arch/mips/include/asm/hazards.h
> >> @@ -22,7 +22,8 @@
> >> /*
> >> * TLB hazards
> >> */
> >> -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) && !defined(CONFIG_CPU_CAVIUM_OCTEON)
> >> +#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \
> >> + !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_LOONGSON3_ENHANCEMENT)
> >>
> >> /*
> >> * MIPSR2 defines ehb for hazard avoidance
> >> @@ -155,8 +156,8 @@ do { \
> >> } while (0)
> >>
> >> #elif defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_CPU_CAVIUM_OCTEON) || \
> >> - defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_CPU_R10000) || \
> >> - defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
> >> + defined(CONFIG_CPU_LOONGSON2) || defined(CONFIG_LOONGSON3_ENHANCEMENT) || \
> >> + defined(CONFIG_CPU_R10000) || defined(CONFIG_CPU_R5500) || defined(CONFIG_CPU_XLR)
> >>
> >> /*
> >> * R10000 rocks - all hazards handled in hardware, so this becomes a nobrainer.
> >> diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
> >> index 2b4dc7a..ecabc00 100644
> >> --- a/arch/mips/include/asm/io.h
> >> +++ b/arch/mips/include/asm/io.h
> >> @@ -304,10 +304,10 @@ static inline void iounmap(const volatile void __iomem *addr)
> >> #undef __IS_KSEG1
> >> }
> >>
> >> -#ifdef CONFIG_CPU_CAVIUM_OCTEON
> >> -#define war_octeon_io_reorder_wmb() wmb()
> >> +#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT)
> >> +#define war_io_reorder_wmb() wmb()
> >> #else
> >> -#define war_octeon_io_reorder_wmb() do { } while (0)
> >> +#define war_io_reorder_wmb() do { } while (0)
> >> #endif
> >
> > Doesn't this slow things down when enabled, or is it required due to
> > STFill buffer being enabled or something?
> >
> >>
> >> #define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq) \
> >> @@ -318,7 +318,7 @@ static inline void pfx##write##bwlq(type val, \
> >> volatile type *__mem; \
> >> type __val; \
> >> \
> >> - war_octeon_io_reorder_wmb(); \
> >> + war_io_reorder_wmb(); \
> >> \
> >> __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \
> >> \
> >> @@ -387,7 +387,7 @@ static inline void pfx##out##bwlq##p(type val, unsigned long port) \
> >> volatile type *__addr; \
> >> type __val; \
> >> \
> >> - war_octeon_io_reorder_wmb(); \
> >> + war_io_reorder_wmb(); \
> >> \
> >> __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
> >> \
> >> diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
> >> index 65c351e..12f80b5 100644
> >> --- a/arch/mips/include/asm/irqflags.h
> >> +++ b/arch/mips/include/asm/irqflags.h
> >> @@ -41,7 +41,12 @@ static inline unsigned long arch_local_irq_save(void)
> >> " .set push \n"
> >> " .set reorder \n"
> >> " .set noat \n"
> >> +#if defined(CONFIG_LOONGSON3_ENHANCEMENT)
> >> + " mfc0 %[flags], $12 \n"
> >> + " di \n"
> >
> > Does this somehow help performance, or is it necessary when STFill
> > buffer is enabled?
> >
> >> +#else
> >> " di %[flags] \n"
> >> +#endif
> >> " andi %[flags], 1 \n"
> >> " " __stringify(__irq_disable_hazard) " \n"
> >> " .set pop \n"
> >> diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
> >> index da83482..8393bc54 100644
> >> --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
> >> +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
> >> @@ -26,6 +26,12 @@
> >> mfc0 t0, $5, 1
> >> or t0, (0x1 << 29)
> >> mtc0 t0, $5, 1
> >> +#ifdef CONFIG_LOONGSON3_ENHANCEMENT
> >> + /* Enable STFill Buffer */
> >> + mfc0 t0, $16, 6
> >> + or t0, 0x100
> >> + mtc0 t0, $16, 6
> >> +#endif
> >> _ehb
> >> .set pop
> >> #endif
> >> @@ -46,6 +52,12 @@
> >> mfc0 t0, $5, 1
> >> or t0, (0x1 << 29)
> >> mtc0 t0, $5, 1
> >> +#ifdef CONFIG_LOONGSON3_ENHANCEMENT
> >> + /* Enable STFill Buffer */
> >> + mfc0 t0, $16, 6
> >> + or t0, 0x100
> >> + mtc0 t0, $16, 6
> >> +#endif
> >
> > What does the STFill buffer do?
> >
> > Given that you can get a portable kernel without this, can this not be
> > done from C code depending on the PRid?
> >
> >> _ehb
> >> .set pop
> >> #endif
> >> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
> >> index 65fb28c..903d8da 100644
> >> --- a/arch/mips/mm/c-r4k.c
> >> +++ b/arch/mips/mm/c-r4k.c
> >> @@ -1170,6 +1170,9 @@ static void probe_pcache(void)
> >> c->dcache.ways *
> >> c->dcache.linesz;
> >> c->dcache.waybit = 0;
> >> +#ifdef CONFIG_CPU_HAS_PREFETCH
> >> + c->options |= MIPS_CPU_PREFETCH;
> >> +#endif
> >
> > Can't do that based on PRid?
> >
> > Cheers
> > James
> >
> >> break;
> >>
> >> case CPU_CAVIUM_OCTEON3:
> >> diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
> >> index 885d73f..c41953c 100644
> >> --- a/arch/mips/mm/page.c
> >> +++ b/arch/mips/mm/page.c
> >> @@ -188,6 +188,15 @@ static void set_prefetch_parameters(void)
> >> }
> >> break;
> >>
> >> + case CPU_LOONGSON3:
> >> + /* Loongson-3 only support the Pref_Load/Pref_Store. */
> >> + pref_bias_clear_store = 128;
> >> + pref_bias_copy_load = 128;
> >> + pref_bias_copy_store = 128;
> >> + pref_src_mode = Pref_Load;
> >> + pref_dst_mode = Pref_Store;
> >> + break;
> >> +
> >> default:
> >> pref_bias_clear_store = 128;
> >> pref_bias_copy_load = 256;
> >> --
> >> 2.4.6
> >>
> >>
> >>
> >>
> >>
Attachment:
signature.asc
Description: Digital signature
