It is not necessary to queue work item to run refresh_vm_stats on a remote CPU if that CPU has no dirty stats and no per-CPU allocations for remote nodes. This fixes sosreport hang (which uses vmstat_refresh) with spinning SCHED_FIFO process. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: linux-2.6/mm/vmstat.c =================================================================== --- linux-2.6.orig/mm/vmstat.c +++ linux-2.6/mm/vmstat.c @@ -1917,6 +1917,31 @@ static const struct seq_operations vmsta #ifdef CONFIG_SMP #ifdef CONFIG_PROC_FS +static bool need_drain_remote_zones(int cpu) +{ +#ifdef CONFIG_NUMA + struct zone *zone; + + for_each_populated_zone(zone) { + struct per_cpu_pages *pcp; + + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + if (!pcp->count) + continue; + + if (!pcp->expire) + continue; + + if (zone_to_nid(zone) == cpu_to_node(cpu)) + continue; + + return true; + } +#endif + + return false; +} + static void refresh_vm_stats(struct work_struct *work) { refresh_cpu_vm_stats(true); @@ -1926,8 +1951,12 @@ int vmstat_refresh(struct ctl_table *tab void *buffer, size_t *lenp, loff_t *ppos) { long val; - int err; - int i; + int i, cpu; + struct work_struct __percpu *works; + + works = alloc_percpu(struct work_struct); + if (!works) + return -ENOMEM; /* * The regular update, every sysctl_stat_interval, may come later @@ -1941,9 +1970,21 @@ int vmstat_refresh(struct ctl_table *tab * transiently negative values, report an error here if any of * the stats is negative, so we know to go looking for imbalance. */ - err = schedule_on_each_cpu(refresh_vm_stats); - if (err) - return err; + cpus_read_lock(); + for_each_online_cpu(cpu) { + struct work_struct *work = per_cpu_ptr(works, cpu); + struct vmstat_dirty *vms = per_cpu_ptr(&vmstat_dirty_pcpu, cpu); + + INIT_WORK(work, refresh_vm_stats); + + if (vms->dirty || need_drain_remote_zones(cpu)) + schedule_work_on(cpu, work); + } + for_each_online_cpu(cpu) + flush_work(per_cpu_ptr(works, cpu)); + cpus_read_unlock(); + free_percpu(works); + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { /* * Skip checking stats known to go negative occasionally.