Re: [PATCH] parisc: Add alternative coding when running UP

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 13.10.2018 23:52, John David Anglin wrote:
> On 2018-10-12 4:49 PM, Helge Deller wrote:
>> This patch adds the necessary code to patch a running SMP kernel
>> at runtime to improve performance when running on a single CPU.
>>
>> The current implementation offers two patching variants:
>> - Unwanted assembler statements like locking functions are overwritten
>>    with NOPs
>> - Some pdtlb and pitlb instructions are patched to become pdtlb,l and
>>    pitlb,l which only flushes the CPU-local tlb entries instead of
>>    broadcasting the flush to other CPUs in the system and thus may
>>    improve performance.
>>
>> Signed-off-by: Helge Deller <deller@xxxxxx>
>>
>> diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h
>> new file mode 100644
>> index 000000000000..b8632b0fe55e
>> --- /dev/null
>> +++ b/arch/parisc/include/asm/alternative.h
>> @@ -0,0 +1,39 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __ASM_PARISC_ALTERNATIVE_H
>> +#define __ASM_PARISC_ALTERNATIVE_H
>> +
>> +#define INSN_PxTLB   0x01     /* pdtlb, pitlb */
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <linux/init.h>
>> +#include <linux/types.h>
>> +#include <linux/stddef.h>
>> +#include <linux/stringify.h>
>> +
>> +struct alt_instr {
>> +        u32 from_addr;        /* offset to original instructions */
>> +    u32 to_addr;        /* end of original instructions */
>> +    u32 replacement;    /* replacement instruction or code */
>> +};
>> +
>> +void set_kernel_text_rw(int enable_read_write);
>> +// int __init apply_alternatives_all(void);
>> +
>> +/* Alternative SMP implementation. */
>> +#define ALTERNATIVE(replacement)         "!0:"    \
>> +    ".section .altinstructions,\"aw\"    !"    \
>> +    ".word (0b-4-.), (0b-4-.), " __stringify(replacement) "    !"    \
>> +    ".previous"
>> +
>> +#else
>> +
>> +#define ALTERNATIVE(from_addr, to_addr, ... )    \
>> +    .section .altinstructions,"aw"    !    \
>> +    .word (from_addr - .), (to_addr - .) !    \
>> +    __VA_ARGS__            !    \
>> +    .previous
>> +
>> +#endif  /*  __ASSEMBLY__  */
>> +
>> +#endif /* __ASM_PARISC_ALTERNATIVE_H */
>> diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
>> index 150b7f30ea90..ef2057802215 100644
>> --- a/arch/parisc/include/asm/cache.h
>> +++ b/arch/parisc/include/asm/cache.h
>> @@ -43,6 +43,8 @@ void parisc_setup_cache_timing(void);
>>     #define pdtlb(addr)         asm volatile("pdtlb 0(%%sr1,%0)" : : "r" (addr));
>>   #define pitlb(addr)         asm volatile("pitlb 0(%%sr1,%0)" : : "r" (addr));
>> +#define pdtlb_alt(addr)    asm volatile("pdtlb 0(%%sr1,%0)" ALTERNATIVE(INSN_PxTLB) : : "r" (addr));
>> +#define pitlb_alt(addr)    asm volatile("pitlb 0(%%sr1,%0)" ALTERNATIVE(INSN_PxTLB) : : "r" (addr));
>>   #define pdtlb_kernel(addr)  asm volatile("pdtlb 0(%0)" : : "r" (addr));
>>     #endif /* ! __ASSEMBLY__ */
>> diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
>> index 4209b74ce63c..576aff095ec8 100644
>> --- a/arch/parisc/kernel/cache.c
>> +++ b/arch/parisc/kernel/cache.c
>> @@ -28,6 +28,7 @@
>>   #include <asm/processor.h>
>>   #include <asm/sections.h>
>>   #include <asm/shmparam.h>
>> +#include <asm/alternative.h>
>>     int split_tlb __read_mostly;
>>   int dcache_stride __read_mostly;
>> @@ -483,7 +484,7 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
>>           while (start < end) {
>>               purge_tlb_start(flags);
>>               mtsp(sid, 1);
>> -            pdtlb(start);
>> +            pdtlb_alt(start);
>>               purge_tlb_end(flags);
>>               start += PAGE_SIZE;
>>           }
>> @@ -494,8 +495,8 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
>>       while (start < end) {
>>           purge_tlb_start(flags);
>>           mtsp(sid, 1);
>> -        pdtlb(start);
>> -        pitlb(start);
>> +        pdtlb_alt(start);
>> +        pitlb_alt(start);
>>           purge_tlb_end(flags);
>>           start += PAGE_SIZE;
>>       }
>> diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
>> index 0d662f0e7b70..9cd6eda4d2df 100644
>> --- a/arch/parisc/kernel/entry.S
>> +++ b/arch/parisc/kernel/entry.S
>> @@ -38,6 +38,7 @@
>>   #include <asm/ldcw.h>
>>   #include <asm/traps.h>
>>   #include <asm/thread_info.h>
>> +#include <asm/alternative.h>
>>     #include <linux/linkage.h>
>>   @@ -464,7 +465,7 @@
>>       /* Acquire pa_tlb_lock lock and check page is present. */
>>       .macro        tlb_lock    spc,ptp,pte,tmp,tmp1,fault
>>   #ifdef CONFIG_SMP
>> -    cmpib,COND(=),n    0,\spc,2f
>> +98:    cmpib,COND(=),n    0,\spc,2f
>>       load_pa_tlb_lock \tmp
>>   1:    LDCW        0(\tmp),\tmp1
>>       cmpib,COND(=)    0,\tmp1,1b
>> @@ -473,6 +474,7 @@
>>       bb,<,n        \pte,_PAGE_PRESENT_BIT,3f
>>       b        \fault
>>       stw,ma        \spc,0(\tmp)
>> +99:    ALTERNATIVE(98b, 99b, nop)
>>   #endif
>>   2:    LDREG        0(\ptp),\pte
>>       bb,>=,n        \pte,_PAGE_PRESENT_BIT,\fault
>> @@ -482,15 +484,17 @@
>>       /* Release pa_tlb_lock lock without reloading lock address. */
>>       .macro        tlb_unlock0    spc,tmp
>>   #ifdef CONFIG_SMP
>> -    or,COND(=)    %r0,\spc,%r0
>> +98:    or,COND(=)    %r0,\spc,%r0
>>       stw,ma        \spc,0(\tmp)
>> +99:    ALTERNATIVE(98b, 99b, nop)
>>   #endif
>>       .endm
>>         /* Release pa_tlb_lock lock. */
>>       .macro        tlb_unlock1    spc,tmp
>>   #ifdef CONFIG_SMP
>> -    load_pa_tlb_lock \tmp
>> +98:    load_pa_tlb_lock \tmp
>> +99:    ALTERNATIVE(98b, 99b, nop)
>>       tlb_unlock0    \spc,\tmp
>>   #endif
>>       .endm

> I'm not sure that optimizing the TLB handler is safe.  There is no guarantee that a TLB exception
> won't occur when updating the handler.

At this stage in the boot process, we only have enabled the ITC timer
and done a system inventory. No drivers are running which could trigger
some kind of IRQ or similiar.
Additionally, we have already read the instruction at the target address (which
triggered a TLB exception) before we start patching the code.
I think it's quite safe at this stage.
 
> Possibly, the best way is to generate both up and smp exception tables and switch to the up table
> when there is only one CPU online.

Those are macros being used in-place in the (mis-)handlers.
I think it's quite hard to get it right (and most likely 
unneccesary as explained above).


>> diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
>> index f33bf2d306d6..1d689502da90 100644
>> --- a/arch/parisc/kernel/pacache.S
>> +++ b/arch/parisc/kernel/pacache.S
>> @@ -37,6 +37,7 @@
>>   #include <asm/pgtable.h>
>>   #include <asm/cache.h>
>>   #include <asm/ldcw.h>
>> +#include <asm/alternative.h>
>>   #include <linux/linkage.h>
>>   #include <linux/init.h>
>>   @@ -312,6 +313,7 @@ ENDPROC_CFI(flush_data_cache_local)
>>         .macro    tlb_lock    la,flags,tmp
>>   #ifdef CONFIG_SMP
>> +98:
>>   #if __PA_LDCW_ALIGNMENT > 4
>>       load32        pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
>>       depi        0,31,__PA_LDCW_ALIGN_ORDER, \la
>> @@ -326,15 +328,17 @@ ENDPROC_CFI(flush_data_cache_local)
>>       nop
>>       b,n        2b
>>   3:
>> +99:    ALTERNATIVE(98b, 99b, nop)
>>   #endif
>>       .endm
>>         .macro    tlb_unlock    la,flags,tmp
>>   #ifdef CONFIG_SMP
>> -    ldi        1,\tmp
>> +98:    ldi        1,\tmp
>>       sync
>>       stw        \tmp,0(\la)
>>       mtsm        \flags
>> +99:    ALTERNATIVE(98b, 99b, nop)
>>   #endif
>>       .endm
>>   @@ -596,9 +600,11 @@ ENTRY_CFI(copy_user_page_asm)
>>       pdtlb,l        %r0(%r29)
>>   #else
>>       tlb_lock    %r20,%r21,%r22
>> -    pdtlb        %r0(%r28)
>> -    pdtlb        %r0(%r29)
>> +0:    pdtlb        %r0(%r28)
>> +1:    pdtlb        %r0(%r29)
>>       tlb_unlock    %r20,%r21,%r22
>> +    ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
>> +    ALTERNATIVE(1b, 1b, .word INSN_PxTLB)
>>   #endif
>>     #ifdef CONFIG_64BIT
>> @@ -736,8 +742,9 @@ ENTRY_CFI(clear_user_page_asm)
>>       pdtlb,l        %r0(%r28)
>>   #else
>>       tlb_lock    %r20,%r21,%r22
>> -    pdtlb        %r0(%r28)
>> +0:    pdtlb        %r0(%r28)
>>       tlb_unlock    %r20,%r21,%r22
>> +    ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
>>   #endif
>>     #ifdef CONFIG_64BIT
>> @@ -813,8 +820,9 @@ ENTRY_CFI(flush_dcache_page_asm)
>>       pdtlb,l        %r0(%r28)
>>   #else
>>       tlb_lock    %r20,%r21,%r22
>> -    pdtlb        %r0(%r28)
>> +0:    pdtlb        %r0(%r28)
>>       tlb_unlock    %r20,%r21,%r22
>> +    ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
>>   #endif
>>         ldil        L%dcache_stride, %r1
>> @@ -877,9 +885,11 @@ ENTRY_CFI(flush_icache_page_asm)
>>       pitlb,l         %r0(%sr4,%r28)
>>   #else
>>       tlb_lock        %r20,%r21,%r22
>> -    pdtlb        %r0(%r28)
>> -    pitlb           %r0(%sr4,%r28)
>> +0:    pdtlb        %r0(%r28)
>> +1:    pitlb           %r0(%sr4,%r28)
>>       tlb_unlock      %r20,%r21,%r22
>> +    ALTERNATIVE(0b, 0b, .word INSN_PxTLB)
>> +    ALTERNATIVE(1b, 1b, .word INSN_PxTLB)
>>   #endif
>>         ldil        L%icache_stride, %r1
>> diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
>> index 4e87c35c22b7..0acb49bbbc6d 100644
>> --- a/arch/parisc/kernel/setup.c
>> +++ b/arch/parisc/kernel/setup.c
>> @@ -40,6 +40,7 @@
>>   #include <linux/sched/clock.h>
>>   #include <linux/start_kernel.h>
>>   +#include <asm/alternative.h>
>>   #include <asm/processor.h>
>>   #include <asm/sections.h>
>>   #include <asm/pdc.h>
>> @@ -305,6 +306,51 @@ static int __init parisc_init_resources(void)
>>       return 0;
>>   }
>>   +static int __init apply_alternatives_all(void)
>> +{
>> +    extern struct alt_instr __start___alt_table[];
>> +    extern struct alt_instr __stop___alt_table;
>> +    struct alt_instr *entry;
>> +    int *from, *to;
>> +    int ret = 0, replacement;
>> +
>> +    /* replace only when not running SMP CPUs */
>> +    if (num_online_cpus() > 1)
>> +        return 0;
>> +
>> +    pr_info("Patching kernel for non-SMP usage.\n");
>> +
>> +    set_kernel_text_rw(1);
>> +
>> +    entry = &__start___alt_table[0];
>> +    while (entry < &__stop___alt_table) {
>> +        from = (int *)((unsigned long)&entry->from_addr + entry->from_addr);
>> +        to = (int *)((unsigned long)&entry->to_addr + entry->to_addr);
>> +        pr_info("Replace from %px to %px with 0x%x\n", from, to, entry->replacement);
>> +
>> +        replacement = entry->replacement;
>> +
>> +        /* Want to replace pdtlb by a pdtlb,l instruction? */
>> +        if (replacement == INSN_PxTLB) {
>> +            replacement = *from;
>> +            if (boot_cpu_data.cpu_type >= pcxu) /* >= pa2.0 ? */
>> +                replacement |= (1 << 10); /* set el bit */
>> +        }
>> +
>> +        local_irq_disable();
> Disabling interrupts doesn't protect against TLB faults.  The only way is to go to real mode.

Yep. Discussed above.
In the new patch I simple removed those irq functions.

>> +        do {
>> +            *from++ = replacement;
>> +        } while (from < to);
>> +        local_irq_enable();
>> +
>> +        entry++;
>> +    }
>> +
>> +    set_kernel_text_rw(0);
>> +
>> +    return ret;
>> +}
>> +
>>   extern void gsc_init(void);
>>   extern void processor_init(void);
>>   extern void ccio_init(void);
>> @@ -346,6 +392,7 @@ static int __init parisc_init(void)
>>               boot_cpu_data.cpu_hz / 1000000,
>>               boot_cpu_data.cpu_hz % 1000000    );
>>   +    apply_alternatives_all();
>>       parisc_setup_cache_timing();
>>         /* These are in a non-obvious order, will fix when we have an iotree */
>> diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
>> index da2e31190efa..a49810f10f6c 100644
>> --- a/arch/parisc/kernel/vmlinux.lds.S
>> +++ b/arch/parisc/kernel/vmlinux.lds.S
>> @@ -25,6 +25,14 @@
>>   #include <asm/page.h>
>>   #include <asm/asm-offsets.h>
>>   #include <asm/thread_info.h>
>> +
>> +#define ALTERNATIVE_TABLE(align)                    \
>> +    . = ALIGN(align);                        \
>> +    __alt_table : AT(ADDR(__alt_table) - LOAD_OFFSET) {        \
>> +        __start___alt_table = .;                \
>> +        KEEP(*(.altinstructions))                \
>> +        __stop___alt_table = .;                    \
>> +    }
>>      
>>   /* ld script to make hppa Linux kernel */
>>   #ifndef CONFIG_64BIT
>> @@ -61,6 +69,7 @@ SECTIONS
>>           EXIT_DATA
>>       }
>>       PERCPU_SECTION(8)
>> +    ALTERNATIVE_TABLE(4)
>>       . = ALIGN(HUGEPAGE_SIZE);
>>       __init_end = .;
>>       /* freed after init ends here */
>> diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
>> index 74842d28a7a1..ff80ffdd09c7 100644
>> --- a/arch/parisc/mm/init.c
>> +++ b/arch/parisc/mm/init.c
>> @@ -515,6 +512,21 @@ static void __init map_pages(unsigned long start_vaddr,
>>       }
>>   }
>>   +void __init set_kernel_text_rw(int enable_read_write)
>> +{
>> +    unsigned long start = (unsigned long)_stext;
>> +    unsigned long end   = (unsigned long)_etext;
>> +
>> +    map_pages(start, __pa(start), end-start,
>> +        PAGE_KERNEL_RWX, enable_read_write ? 1:0);
>> +
>> +    /* force the kernel to see the new TLB entries */
>> +    __flush_tlb_range(0, start, end);
>> +
>> +    /* dump old cached instructions */
>> +    flush_icache_range(start, end);
> I think the data cache needs to be flushed as well.  See programming note on page 7-151 of the PA 2.0 arch.

True.
See definition of flush_kernel_dcache_range_asm() in 
arch/parisc/include/asm/cacheflush.h
It flushes the d-cache before the i-cache already.

Helge



[Index of Archives]     [Linux SoC]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux