Parisc uses a global spinlock to protect pagetable updates in the TLB fault handlers. When multiple cores are taking TLB faults simultaneously, the cache line containing the spinlock becomes a bottleneck. This patch embeds the spinlock in the top level page directory, so that every process has its own lock. It improves performance by 30% when doing parallel compilations. (please test it on 32-bit kernels - I don't have a machine for that) Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> --- arch/parisc/include/asm/pgalloc.h | 1 + arch/parisc/include/asm/pgtable.h | 35 +++++++++++++++++++++++------------ arch/parisc/include/asm/tlbflush.h | 6 +++--- arch/parisc/kernel/cache.c | 2 +- arch/parisc/kernel/entry.S | 8 ++------ 5 files changed, 30 insertions(+), 22 deletions(-) Index: linux-5.1-rc3/arch/parisc/include/asm/pgtable.h =================================================================== --- linux-5.1-rc3.orig/arch/parisc/include/asm/pgtable.h 2019-04-06 11:12:03.000000000 +0200 +++ linux-5.1-rc3/arch/parisc/include/asm/pgtable.h 2019-04-06 11:12:22.000000000 +0200 @@ -17,7 +17,7 @@ #include <asm/processor.h> #include <asm/cache.h> -extern spinlock_t pa_tlb_lock; +static inline spinlock_t *pgd_spinlock(pgd_t *); /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -59,11 +59,11 @@ static inline void purge_tlb_entries(str do { \ pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_tlb_lock, flags); \ + spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\ old_pte = *ptep; \ set_pte(ptep, pteval); \ purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_tlb_lock, flags); \ + spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\ } while (0) #endif /* !__ASSEMBLY__ */ @@ -88,10 +88,10 @@ static inline void purge_tlb_entries(str #if CONFIG_PGTABLE_LEVELS == 3 #define PGD_ORDER 1 /* Number of pages per pgd */ #define PMD_ORDER 1 /* Number of pages per pmd */ -#define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ +#define PGD_ALLOC_ORDER (2 + 1) /* first pgd contains pmd */ #else #define PGD_ORDER 1 /* Number of pages per pgd */ -#define PGD_ALLOC_ORDER PGD_ORDER +#define PGD_ALLOC_ORDER (PGD_ORDER + 1) #endif /* Definitions for 3rd level (we use PLD here for Page Lower directory @@ -459,6 +459,17 @@ extern void update_mmu_cache(struct vm_a #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +static inline spinlock_t *pgd_spinlock(pgd_t *pgd) +{ + extern spinlock_t pa_tlb_flush_lock; + + if (unlikely(pgd == swapper_pg_dir)) + return &pa_tlb_flush_lock; + return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1))); +} + + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; @@ -467,15 +478,15 @@ static inline int ptep_test_and_clear_yo if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_tlb_lock, flags); + spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags); return 1; } @@ -485,11 +496,11 @@ static inline pte_t ptep_get_and_clear(s pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_tlb_lock, flags); + spin_lock_irqsave(pgd_spinlock(mm->pgd), flags); old_pte = *ptep; set_pte(ptep, __pte(0)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags); return old_pte; } @@ -497,10 +508,10 @@ static inline pte_t ptep_get_and_clear(s static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_tlb_lock, flags); + spin_lock_irqsave(pgd_spinlock(mm->pgd), flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) Index: linux-5.1-rc3/arch/parisc/kernel/cache.c =================================================================== --- linux-5.1-rc3.orig/arch/parisc/kernel/cache.c 2019-04-06 11:12:03.000000000 +0200 +++ linux-5.1-rc3/arch/parisc/kernel/cache.c 2019-04-06 11:12:03.000000000 +0200 @@ -45,7 +45,7 @@ void flush_icache_page_asm(unsigned long * by software. We put a spinlock around all TLB flushes to * ensure this. */ -DEFINE_SPINLOCK(pa_tlb_lock); +DEFINE_SPINLOCK(pa_tlb_flush_lock); struct pdc_cache_info cache_info __read_mostly; #ifndef CONFIG_PA20 Index: linux-5.1-rc3/arch/parisc/include/asm/tlbflush.h =================================================================== --- linux-5.1-rc3.orig/arch/parisc/include/asm/tlbflush.h 2019-04-06 11:12:03.000000000 +0200 +++ linux-5.1-rc3/arch/parisc/include/asm/tlbflush.h 2019-04-06 11:12:03.000000000 +0200 @@ -18,10 +18,10 @@ * It is also used to ensure PTE updates are atomic and consistent * with the TLB. */ -extern spinlock_t pa_tlb_lock; +extern spinlock_t pa_tlb_flush_lock; -#define purge_tlb_start(flags) spin_lock_irqsave(&pa_tlb_lock, flags) -#define purge_tlb_end(flags) spin_unlock_irqrestore(&pa_tlb_lock, flags) +#define purge_tlb_start(flags) spin_lock_irqsave(&pa_tlb_flush_lock, flags) +#define purge_tlb_end(flags) spin_unlock_irqrestore(&pa_tlb_flush_lock, flags) extern void flush_tlb_all(void); extern void flush_tlb_all_local(void *); Index: linux-5.1-rc3/arch/parisc/kernel/entry.S =================================================================== --- linux-5.1-rc3.orig/arch/parisc/kernel/entry.S 2019-04-06 11:12:03.000000000 +0200 +++ linux-5.1-rc3/arch/parisc/kernel/entry.S 2019-04-06 11:12:22.000000000 +0200 @@ -50,12 +50,8 @@ .import pa_tlb_lock,data .macro load_pa_tlb_lock reg -#if __PA_LDCW_ALIGNMENT > 4 - load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg - depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg -#else - load32 PA(pa_tlb_lock), \reg -#endif + mfctl %cr25,\reg + addil L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg .endm /* space_to_prot macro creates a prot id from a space id */ Index: linux-5.1-rc3/arch/parisc/include/asm/pgalloc.h =================================================================== --- linux-5.1-rc3.orig/arch/parisc/include/asm/pgalloc.h 2019-04-06 11:12:03.000000000 +0200 +++ linux-5.1-rc3/arch/parisc/include/asm/pgalloc.h 2019-04-06 11:12:03.000000000 +0200 @@ -41,6 +41,7 @@ static inline pgd_t *pgd_alloc(struct mm __pgd_val_set(*pgd, PxD_FLAG_ATTACHED); #endif } + spin_lock_init(pgd_spinlock(actual_pgd)); return actual_pgd; }