Similar with x86, when VM is detected, revert to a simple test-and-set lock to avoid the horrors of queue preemption. Tested on 3C5000 Dual-way machine with 32 cores and 2 numa nodes, test case is kcbench on kernel mainline 6.10, the detailed command is "kcbench --src /root/src/linux" Performance on host machine kernel compile time performance impact Original 150.29 seconds With patch 150.19 seconds almost no impact Performance on virtual machine: 1. 1 VM with 32 vCPUs and 2 numa node, numa node pinned kernel compile time performance impact Original 170.87 seconds With patch 171.73 seconds almost no impact 2. 2 VMs, each VM with 32 vCPUs and 2 numa node, numa node pinned kernel compile time performance impact Original 2362.04 seconds With patch 354.73 seconds +565% Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx> --- arch/loongarch/include/asm/Kbuild | 1 - arch/loongarch/include/asm/paravirt.h | 3 ++ arch/loongarch/include/asm/qspinlock.h | 41 ++++++++++++++++++++++++++ arch/loongarch/kernel/paravirt.c | 9 ++++++ arch/loongarch/kernel/setup.c | 5 ++++ arch/loongarch/kernel/smp.c | 2 ++ 6 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/qspinlock.h diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 2bb3676429c0..4635b755b2b4 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -6,7 +6,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += user.h generic-y += ioctl.h generic-y += statfs.h diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h index dddec49671ae..dcc2b46d31fe 100644 --- a/arch/loongarch/include/asm/paravirt.h +++ b/arch/loongarch/include/asm/paravirt.h @@ -19,6 +19,7 @@ static inline u64 paravirt_steal_clock(int cpu) int __init pv_ipi_init(void); int __init pv_time_init(void); +void __init pv_spinlock_init(void); #else @@ -31,5 +32,7 @@ static inline int pv_time_init(void) { return 0; } + +static inline void pv_spinlock_init(void) { } #endif // CONFIG_PARAVIRT #endif diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h new file mode 100644 index 000000000000..b2d53b8c6679 --- /dev/null +++ b/arch/loongarch/include/asm/qspinlock.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_QSPINLOCK_H +#define _ASM_LOONGARCH_QSPINLOCK_H + +#include <linux/jump_label.h> +#include <asm/cpu-features.h> + +#ifdef CONFIG_PARAVIRT + +DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); + +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + int val; + + if (static_branch_likely(&virt_spin_lock_key)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + +__retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } + + return true; +} + +#endif /* CONFIG_PARAVIRT */ + +#include <asm-generic/qspinlock.h> + +#endif // _ASM_LOONGARCH_QSPINLOCK_H diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index 9c9b75b76f62..49ebc54bdbcb 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -9,6 +9,7 @@ #include <linux/static_call.h> #include <asm/paravirt.h> +DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); static int has_steal_clock; struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; @@ -300,3 +301,11 @@ int __init pv_time_init(void) return 0; } + +void __init pv_spinlock_init(void) +{ + if (!cpu_has_hypervisor) + return; + + static_branch_disable(&virt_spin_lock_key); +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 0f0740f0be27..70a670efe3cf 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -599,6 +599,11 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); reserve_initrd_mem(); + /* + * Initialise the static keys early as they may be enabled by the + * cpufeature code and early parameters. + */ + jump_label_init(); platform_init(); arch_mem_init(cmdline_p); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index ca405ab86aae..f499bff1050b 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -509,6 +509,8 @@ void smp_prepare_boot_cpu(void) rr_node = next_node_in(rr_node, node_online_map); } } + + pv_spinlock_init(); } /* called from main before smp_init() */ base-commit: 94ede2a3e9135764736221c080ac7c0ad993dc2d -- 2.39.3