[PATCH] parisc: Add alternative coding when running UP

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds the necessary code to patch a running SMP kernel
at runtime to improve performance when running on a single CPU.

The current implementation offers two patching variants:
- Unwanted assembler statements like locking functions are overwritten
  with NOPs
- Some pdtlb and pitlb instructions are patched to become pdtlb,l and
  pitlb,l which only flushes the CPU-local tlb entries instead of
  broadcasting the flush to other CPUs in the system and thus may
  improve performance.

Signed-off-by: Helge Deller <deller@xxxxxx>

diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h
new file mode 100644
index 000000000000..b8632b0fe55e
--- /dev/null
+++ b/arch/parisc/include/asm/alternative.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PARISC_ALTERNATIVE_H
+#define __ASM_PARISC_ALTERNATIVE_H
+
+#define INSN_PxTLB   0x01	 /* pdtlb, pitlb */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+
+struct alt_instr {
+        u32 from_addr;		/* offset to original instructions */
+	u32 to_addr;		/* end of original instructions */
+	u32 replacement;	/* replacement instruction or code */
+};
+
+void set_kernel_text_rw(int enable_read_write);
+// int __init apply_alternatives_all(void);
+
+/* Alternative SMP implementation. */
+#define ALTERNATIVE(replacement) 		"!0:"	\
+	".section .altinstructions,\"aw\"	!"	\
+	".word (0b-4-.), (0b-4-.), " __stringify(replacement) "	!"	\
+	".previous"
+
+#else
+
+#define ALTERNATIVE(from_addr, to_addr, ... )	\
+	.section .altinstructions,"aw"	!	\
+	.word (from_addr - .), (to_addr - .) !	\
+	__VA_ARGS__			!	\
+	.previous
+
+#endif  /*  __ASSEMBLY__  */
+
+#endif /* __ASM_PARISC_ALTERNATIVE_H */
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 150b7f30ea90..ef2057802215 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -43,6 +43,8 @@ void parisc_setup_cache_timing(void);
 
 #define pdtlb(addr)         asm volatile("pdtlb 0(%%sr1,%0)" : : "r" (addr));
 #define pitlb(addr)         asm volatile("pitlb 0(%%sr1,%0)" : : "r" (addr));
+#define pdtlb_alt(addr)	asm volatile("pdtlb 0(%%sr1,%0)" ALTERNATIVE(INSN_PxTLB) : : "r" (addr));
+#define pitlb_alt(addr)	asm volatile("pitlb 0(%%sr1,%0)" ALTERNATIVE(INSN_PxTLB) : : "r" (addr));
 #define pdtlb_kernel(addr)  asm volatile("pdtlb 0(%0)" : : "r" (addr));
 
 #endif /* ! __ASSEMBLY__ */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 4209b74ce63c..576aff095ec8 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -28,6 +28,7 @@
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/shmparam.h>
+#include <asm/alternative.h>
 
 int split_tlb __read_mostly;
 int dcache_stride __read_mostly;
@@ -483,7 +484,7 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
 		while (start < end) {
 			purge_tlb_start(flags);
 			mtsp(sid, 1);
-			pdtlb(start);
+			pdtlb_alt(start);
 			purge_tlb_end(flags);
 			start += PAGE_SIZE;
 		}
@@ -494,8 +495,8 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
 	while (start < end) {
 		purge_tlb_start(flags);
 		mtsp(sid, 1);
-		pdtlb(start);
-		pitlb(start);
+		pdtlb_alt(start);
+		pitlb_alt(start);
 		purge_tlb_end(flags);
 		start += PAGE_SIZE;
 	}
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 0d662f0e7b70..9cd6eda4d2df 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -38,6 +38,7 @@
 #include <asm/ldcw.h>
 #include <asm/traps.h>
 #include <asm/thread_info.h>
+#include <asm/alternative.h>
 
 #include <linux/linkage.h>
 
@@ -464,7 +465,7 @@
 	/* Acquire pa_tlb_lock lock and check page is present. */
 	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
-	cmpib,COND(=),n	0,\spc,2f
+98:	cmpib,COND(=),n	0,\spc,2f
 	load_pa_tlb_lock \tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
@@ -473,6 +474,7 @@
 	bb,<,n		\pte,_PAGE_PRESENT_BIT,3f
 	b		\fault
 	stw,ma		\spc,0(\tmp)
+99:	ALTERNATIVE(98b, 99b, nop)
 #endif
 2:	LDREG		0(\ptp),\pte
 	bb,>=,n		\pte,_PAGE_PRESENT_BIT,\fault
@@ -482,15 +484,17 @@
 	/* Release pa_tlb_lock lock without reloading lock address. */
 	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
+98:	or,COND(=)	%r0,\spc,%r0
 	stw,ma		\spc,0(\tmp)
+99:	ALTERNATIVE(98b, 99b, nop)
 #endif
 	.endm
 
 	/* Release pa_tlb_lock lock. */
 	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-	load_pa_tlb_lock \tmp
+98:	load_pa_tlb_lock \tmp
+99:	ALTERNATIVE(98b, 99b, nop)
 	tlb_unlock0	\spc,\tmp
 #endif
 	.endm
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index f33bf2d306d6..1d689502da90 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/cache.h>
 #include <asm/ldcw.h>
+#include <asm/alternative.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
 
@@ -312,6 +313,7 @@ ENDPROC_CFI(flush_data_cache_local)
 
 	.macro	tlb_lock	la,flags,tmp
 #ifdef CONFIG_SMP
+98:
 #if __PA_LDCW_ALIGNMENT > 4
 	load32		pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
 	depi		0,31,__PA_LDCW_ALIGN_ORDER, \la
@@ -326,15 +328,17 @@ ENDPROC_CFI(flush_data_cache_local)
 	nop
 	b,n		2b
 3:
+99:	ALTERNATIVE(98b, 99b, nop)
 #endif
 	.endm
 
 	.macro	tlb_unlock	la,flags,tmp
 #ifdef CONFIG_SMP
-	ldi		1,\tmp
+98:	ldi		1,\tmp
 	sync
 	stw		\tmp,0(\la)
 	mtsm		\flags
+99:	ALTERNATIVE(98b, 99b, nop)
 #endif
 	.endm
 
@@ -596,9 +600,11 @@ ENTRY_CFI(copy_user_page_asm)
 	pdtlb,l		%r0(%r29)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		%r0(%r28)
-	pdtlb		%r0(%r29)
+0:	pdtlb		%r0(%r28)
+1:	pdtlb		%r0(%r29)
 	tlb_unlock	%r20,%r21,%r22
+	ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
+	ALTERNATIVE(1b, 1b, .word INSN_PxTLB)
 #endif
 
 #ifdef CONFIG_64BIT
@@ -736,8 +742,9 @@ ENTRY_CFI(clear_user_page_asm)
 	pdtlb,l		%r0(%r28)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		%r0(%r28)
+0:	pdtlb		%r0(%r28)
 	tlb_unlock	%r20,%r21,%r22
+	ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
 #endif
 
 #ifdef CONFIG_64BIT
@@ -813,8 +820,9 @@ ENTRY_CFI(flush_dcache_page_asm)
 	pdtlb,l		%r0(%r28)
 #else
 	tlb_lock	%r20,%r21,%r22
-	pdtlb		%r0(%r28)
+0:	pdtlb		%r0(%r28)
 	tlb_unlock	%r20,%r21,%r22
+	ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
 #endif
 
 	ldil		L%dcache_stride, %r1
@@ -877,9 +885,11 @@ ENTRY_CFI(flush_icache_page_asm)
 	pitlb,l         %r0(%sr4,%r28)
 #else
 	tlb_lock        %r20,%r21,%r22
-	pdtlb		%r0(%r28)
-	pitlb           %r0(%sr4,%r28)
+0:	pdtlb		%r0(%r28)
+1:	pitlb           %r0(%sr4,%r28)
 	tlb_unlock      %r20,%r21,%r22
+	ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
+	ALTERNATIVE(1b, 1b, .word INSN_PxTLB)
 #endif
 
 	ldil		L%icache_stride, %r1
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 4e87c35c22b7..0acb49bbbc6d 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -40,6 +40,7 @@
 #include <linux/sched/clock.h>
 #include <linux/start_kernel.h>
 
+#include <asm/alternative.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/pdc.h>
@@ -305,6 +306,51 @@ static int __init parisc_init_resources(void)
 	return 0;
 }
 
+static int __init apply_alternatives_all(void)
+{
+	extern struct alt_instr __start___alt_table[];
+	extern struct alt_instr __stop___alt_table;
+	struct alt_instr *entry;
+	int *from, *to;
+	int ret = 0, replacement;
+
+	/* replace only when not running SMP CPUs */
+	if (num_online_cpus() > 1)
+		return 0;
+
+	pr_info("Patching kernel for non-SMP usage.\n");
+
+	set_kernel_text_rw(1);
+
+	entry = &__start___alt_table[0];
+	while (entry < &__stop___alt_table) {
+		from = (int *)((unsigned long)&entry->from_addr + entry->from_addr);
+		to = (int *)((unsigned long)&entry->to_addr + entry->to_addr);
+		pr_info("Replace from %px to %px with 0x%x\n", from, to, entry->replacement);
+
+		replacement = entry->replacement;
+
+		/* Want to replace pdtlb by a pdtlb,l instruction? */
+		if (replacement == INSN_PxTLB) {
+			replacement = *from;
+			if (boot_cpu_data.cpu_type >= pcxu) /* >= pa2.0 ? */
+				replacement |= (1 << 10); /* set el bit */
+		}
+
+		local_irq_disable();
+		do {
+			*from++ = replacement;
+		} while (from < to);
+		local_irq_enable();
+
+		entry++;
+	}
+
+	set_kernel_text_rw(0);
+
+	return ret;
+}
+
 extern void gsc_init(void);
 extern void processor_init(void);
 extern void ccio_init(void);
@@ -346,6 +392,7 @@ static int __init parisc_init(void)
 			boot_cpu_data.cpu_hz / 1000000,
 			boot_cpu_data.cpu_hz % 1000000	);
 
+	apply_alternatives_all();
 	parisc_setup_cache_timing();
 
 	/* These are in a non-obvious order, will fix when we have an iotree */
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index da2e31190efa..a49810f10f6c 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -25,6 +25,14 @@
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+
+#define ALTERNATIVE_TABLE(align)					\
+	. = ALIGN(align);						\
+	__alt_table : AT(ADDR(__alt_table) - LOAD_OFFSET) {		\
+		__start___alt_table = .;				\
+		KEEP(*(.altinstructions))				\
+		__stop___alt_table = .;					\
+	}
 	
 /* ld script to make hppa Linux kernel */
 #ifndef CONFIG_64BIT
@@ -61,6 +69,7 @@ SECTIONS
 		EXIT_DATA
 	}
 	PERCPU_SECTION(8)
+	ALTERNATIVE_TABLE(4)
 	. = ALIGN(HUGEPAGE_SIZE);
 	__init_end = .;
 	/* freed after init ends here */
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 74842d28a7a1..ff80ffdd09c7 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -515,6 +512,21 @@ static void __init map_pages(unsigned long start_vaddr,
 	}
 }
 
+void __init set_kernel_text_rw(int enable_read_write)
+{
+	unsigned long start = (unsigned long)_stext;
+	unsigned long end   = (unsigned long)_etext;
+
+	map_pages(start, __pa(start), end-start,
+		PAGE_KERNEL_RWX, enable_read_write ? 1:0);
+
+	/* force the kernel to see the new TLB entries */
+	__flush_tlb_range(0, start, end);
+
+	/* dump old cached instructions */
+	flush_icache_range(start, end);
+}
+
 void __ref free_initmem(void)
 {
 	unsigned long init_begin = (unsigned long)__init_begin;



[Index of Archives]     [Linux SoC]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux