Prohibit moving certain threads (e.g., in irq and nmi contexts) to the secondary queue. Those prioritized threads will always stay in the primary queue, and so will have a shorter wait time for the lock. Signed-off-by: Alex Kogan <alex.kogan@xxxxxxxxxx> Reviewed-by: Steve Sistare <steven.sistare@xxxxxxxxxx> Reviewed-by: Waiman Long <longman@xxxxxxxxxx> --- kernel/locking/qspinlock_cna.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock_cna.h b/kernel/locking/qspinlock_cna.h index 0b991c340fb1..ffc5c3301f0f 100644 --- a/kernel/locking/qspinlock_cna.h +++ b/kernel/locking/qspinlock_cna.h @@ -6,6 +6,7 @@ #include <linux/topology.h> #include <linux/sched/clock.h> #include <linux/moduleparam.h> +#include <linux/sched/rt.h> /* * Implement a NUMA-aware version of MCS (aka CNA, or compact NUMA-aware lock). @@ -37,7 +38,8 @@ * running on the same NUMA node. If it is not, that waiter is detached from the * main queue and moved into the tail of the secondary queue. This way, we * gradually filter the primary queue, leaving only waiters running on the same - * preferred NUMA node. + * preferred NUMA node. Note that certain priortized waiters (e.g., in + * irq and nmi contexts) are excluded from being moved to the secondary queue. * * We change the NUMA node preference after a waiter at the head of the * secondary queue spins for a certain amount of time (1ms, by default). @@ -53,6 +55,8 @@ #define FLUSH_SECONDARY_QUEUE 1 +#define CNA_PRIORITY_NODE 0xffff + struct cna_node { struct mcs_spinlock mcs; u16 numa_node; @@ -111,9 +115,10 @@ static int __init cna_init_nodes(void) static __always_inline void cna_init_node(struct mcs_spinlock *node) { + bool priority = !in_task() || irqs_disabled() || rt_task(current); struct cna_node *cn = (struct cna_node *)node; - cn->numa_node = cn->real_numa_node; + cn->numa_node = priority ? CNA_PRIORITY_NODE : cn->real_numa_node; cn->start_time = 0; } @@ -252,7 +257,7 @@ static int cna_order_queue(struct mcs_spinlock *node) numa_node = cn->numa_node; next_numa_node = ((struct cna_node *)next)->numa_node; - if (next_numa_node != numa_node) { + if (next_numa_node != numa_node && next_numa_node != CNA_PRIORITY_NODE) { struct mcs_spinlock *nnext = READ_ONCE(next->next); if (nnext) @@ -272,6 +277,13 @@ static __always_inline u32 cna_wait_head_or_lock(struct qspinlock *lock, struct cna_node *cn = (struct cna_node *)node; if (!cn->start_time || !intra_node_threshold_reached(cn)) { + /* + * We are at the head of the wait queue, no need to use + * the fake NUMA node ID. + */ + if (cn->numa_node == CNA_PRIORITY_NODE) + cn->numa_node = cn->real_numa_node; + /* * Try and put the time otherwise spent spin waiting on * _Q_LOCKED_PENDING_MASK to use by sorting our lists. -- 2.24.3 (Apple Git-128)