Some setups, notably NOHZ_FULL CPUs, are too busy to handle the per-cpu drain work queued by __lru_add_drain_all(). So introduce new a mechanism to remotely drain the per-cpu lists. It is made possible by using a more constraining locking scheme, which is disabled by default and can be enabled through the 'remote_pcpu_cache_access' static key. Regular users shouldn't see any functional or performance change. Upcoming patches will make use of the key. Based on previous work by Thomas Gleixner, Anna-Maria Gleixner, and Sebastian Andrzej Siewior[1]. [1] https://patchwork.kernel.org/project/linux-mm/cover/20190424111208.24459-1-bigeasy@xxxxxxxxxxxxx/ Signed-off-by: Nicolas Saenz Julienne <nsaenzju@xxxxxxxxxx> --- mm/swap.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index bcf73bd563a6..59e96a2520d5 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -915,19 +915,29 @@ inline void __lru_add_drain_all(bool force_all_cpus) WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1); smp_mb(); - cpumask_clear(&has_work); - for_each_online_cpu(cpu) { - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); - - if (force_all_cpus || lru_cpu_needs_drain(cpu)) { - INIT_WORK(work, lru_add_drain_per_cpu); - queue_work_on(cpu, mm_percpu_wq, work); - __cpumask_set_cpu(cpu, &has_work); + if (static_branch_unlikely(&remote_pcpu_cache_access)) { + for_each_online_cpu(cpu) { + if (force_all_cpus || lru_cpu_needs_drain(cpu)) { + lru_cache_lock_cpu(&lru_pvecs.locks, cpu); + lru_add_drain_cpu(cpu); + lru_cache_unlock_cpu(&lru_pvecs.locks, cpu); + } + } + } else { + cpumask_clear(&has_work); + for_each_online_cpu(cpu) { + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); + + if (force_all_cpus || lru_cpu_needs_drain(cpu)) { + INIT_WORK(work, lru_add_drain_per_cpu); + queue_work_on(cpu, mm_percpu_wq, work); + __cpumask_set_cpu(cpu, &has_work); + } } - } - for_each_cpu(cpu, &has_work) - flush_work(&per_cpu(lru_add_drain_work, cpu)); + for_each_cpu(cpu, &has_work) + flush_work(&per_cpu(lru_add_drain_work, cpu)); + } done: mutex_unlock(&lock); -- 2.31.1