On Tue, Jun 04, 2019 at 07:21:13PM -0400, Alex Kogan wrote: > Trying to resume this work, I am looking for concrete steps required > to integrate CNA with the paravirt patching. > > Looking at alternative_instructions(), I wonder if I need to add > another call, something like apply_numa() similar to apply_paravirt(), > and do the patch work there. Or perhaps I should “just" initialize > the pv_ops structure with the corresponding > numa_queued_spinlock_slowpath() in paravirt.c? Yeah, just initialize the pv_ops.lock.* thingies to contain the numa variant before apply_paravirt() happens. > Also, the paravirt code is under arch/x86, while CNA is generic (not > x86-specific). Do you still want to see CNA-related patching residing > under arch/x86? > > We still need a config option (something like NUMA_AWARE_SPINLOCKS) to > enable CNA patching under this config only, correct? There is the static_call() stuff that could be generic; I posted a new version of that today (x86 only for now, but IIRC there's arm64 patches for that around somewhere too). https://lkml.kernel.org/r/20190605130753.327195108@xxxxxxxxxxxxx Which would allow something a little like this: diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index bd5ac6cc37db..01feaf912bd7 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -63,29 +63,7 @@ static inline bool vcpu_is_preempted(long cpu) #endif #ifdef CONFIG_PARAVIRT -DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); - void native_pv_lock_init(void) __init; - -#define virt_spin_lock virt_spin_lock -static inline bool virt_spin_lock(struct qspinlock *lock) -{ - if (!static_branch_likely(&virt_spin_lock_key)) - return false; - - /* - * On hypervisors without PARAVIRT_SPINLOCKS support we fall - * back to a Test-and-Set spinlock, because fair locks have - * horrible lock 'holder' preemption issues. - */ - - do { - while (atomic_read(&lock->val) != 0) - cpu_relax(); - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); - - return true; -} #else static inline void native_pv_lock_init(void) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5169b8cc35bb..78be9e474e94 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -531,7 +531,7 @@ static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); if (kvm_para_has_hint(KVM_HINTS_REALTIME)) - static_branch_disable(&virt_spin_lock_key); + static_call_update(queued_spin_lock_slowpath, __queued_spin_lock_slowpath); } static void __init kvm_smp_prepare_boot_cpu(void) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 98039d7fb998..ae6d15f84867 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -105,12 +105,10 @@ static unsigned paravirt_patch_jmp(void *insn_buff, const void *target, } #endif -DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); - void __init native_pv_lock_init(void) { - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) - static_branch_disable(&virt_spin_lock_key); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_call_update(queued_spin_lock_slowpath, __tas_spin_lock_slowpath); } unsigned paravirt_patch_default(u8 type, void *insn_buff, diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3776122c87cc..86808127b6e6 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -70,7 +70,7 @@ void xen_init_lock_cpu(int cpu) if (!xen_pvspin) { if (cpu == 0) - static_branch_disable(&virt_spin_lock_key); + static_call_update(queued_spin_lock_slowpath, __queued_spin_lock_slowpath); return; } diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index fde943d180e0..8ca4dd9db931 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -65,7 +65,9 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)); } -extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +DECLARE_STATIC_CALL(queued_spin_lock_slowpath, __queued_spin_lock_slowpath); /** * queued_spin_lock - acquire a queued spinlock @@ -78,7 +80,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) return; - queued_spin_lock_slowpath(lock, val); + static_call(queued_spin_lock_slowpath, lock, val); } #ifndef queued_spin_unlock @@ -95,13 +97,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#ifndef virt_spin_lock -static __always_inline bool virt_spin_lock(struct qspinlock *lock) -{ - return false; -} -#endif - /* * Remapping spinlock architecture specific functions to the corresponding * queued spinlock functions. diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 2473f10c6956..0e9e61637d56 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -290,6 +290,20 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #endif /* _GEN_PV_LOCK_SLOWPATH */ +void __tas_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); +} + /** * queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure @@ -311,7 +325,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : * queue : ^--' : */ -void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +void __queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { struct mcs_spinlock *prev, *next, *node; u32 old, tail; @@ -322,9 +336,6 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) if (pv_enabled()) goto pv_queue; - if (virt_spin_lock(lock)) - return; - /* * Wait for in-progress pending->locked hand-overs with a bounded * number of spins so that we guarantee forward progress. @@ -558,7 +569,9 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) */ __this_cpu_dec(qnodes[0].mcs.count); } -EXPORT_SYMBOL(queued_spin_lock_slowpath); +EXPORT_SYMBOL(__queued_spin_lock_slowpath); + +DEFINE_STATIC_CALL(queued_spin_lock_slowpath, __queued_spin_lock_slowpath); /* * Generate the paravirt code for queued_spin_unlock_slowpath().