Draining of pages from the local pcp for a remote zone should not be necessary, since once the system is low on memory (or compaction on a zone is in effect), drain_all_pages should be called freeing any unused pcps. For reference, the original commit which introduces remote node draining is 4037d452202e34214e8a939fa5621b2b3bbb45b7. Acked-by: David Hildenbrand <david@xxxxxxxxxx> Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Index: linux-vmstat-remote/include/linux/mmzone.h =================================================================== --- linux-vmstat-remote.orig/include/linux/mmzone.h +++ linux-vmstat-remote/include/linux/mmzone.h @@ -679,9 +679,6 @@ struct per_cpu_pages { int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ short free_factor; /* batch scaling factor during free */ -#ifdef CONFIG_NUMA - short expire; /* When 0, remote pagesets are drained */ -#endif /* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[NR_PCP_LISTS]; Index: linux-vmstat-remote/mm/vmstat.c =================================================================== --- linux-vmstat-remote.orig/mm/vmstat.c +++ linux-vmstat-remote/mm/vmstat.c @@ -803,20 +803,16 @@ static int fold_diff(int *zone_diff, int * * The function returns the number of global counters updated. */ -static int refresh_cpu_vm_stats(bool do_pagesets) +static int refresh_cpu_vm_stats(void) { struct pglist_data *pgdat; struct zone *zone; int i; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; - int changes = 0; for_each_populated_zone(zone) { struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; -#ifdef CONFIG_NUMA - struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset; -#endif for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { int v; @@ -826,44 +822,8 @@ static int refresh_cpu_vm_stats(bool do_ atomic_long_add(v, &zone->vm_stat[i]); global_zone_diff[i] += v; -#ifdef CONFIG_NUMA - /* 3 seconds idle till flush */ - __this_cpu_write(pcp->expire, 3); -#endif } } -#ifdef CONFIG_NUMA - - if (do_pagesets) { - cond_resched(); - /* - * Deal with draining the remote pageset of this - * processor - * - * Check if there are pages remaining in this pageset - * if not then there is nothing to expire. - */ - if (!__this_cpu_read(pcp->expire) || - !__this_cpu_read(pcp->count)) - continue; - - /* - * We never drain zones local to this processor. - */ - if (zone_to_nid(zone) == numa_node_id()) { - __this_cpu_write(pcp->expire, 0); - continue; - } - - if (__this_cpu_dec_return(pcp->expire)) - continue; - - if (__this_cpu_read(pcp->count)) { - drain_zone_pages(zone, this_cpu_ptr(pcp)); - changes++; - } - } -#endif } for_each_online_pgdat(pgdat) { @@ -880,8 +840,7 @@ static int refresh_cpu_vm_stats(bool do_ } } - changes += fold_diff(global_zone_diff, global_node_diff); - return changes; + return fold_diff(global_zone_diff, global_node_diff); } /* @@ -1867,7 +1826,7 @@ int sysctl_stat_interval __read_mostly = #ifdef CONFIG_PROC_FS static void refresh_vm_stats(struct work_struct *work) { - refresh_cpu_vm_stats(true); + refresh_cpu_vm_stats(); } int vmstat_refresh(struct ctl_table *table, int write, @@ -1877,6 +1836,8 @@ int vmstat_refresh(struct ctl_table *tab int err; int i; + drain_all_pages(NULL); + /* * The regular update, every sysctl_stat_interval, may come later * than expected: leaving a significant amount in per_cpu buckets. @@ -1931,7 +1892,7 @@ int vmstat_refresh(struct ctl_table *tab static void vmstat_update(struct work_struct *w) { - if (refresh_cpu_vm_stats(true)) { + if (refresh_cpu_vm_stats()) { /* * Counters were updated so we expect more updates * to occur in the future. Keep on running the @@ -1994,7 +1955,7 @@ void quiet_vmstat(void) * it would be too expensive from this path. * vmstat_shepherd will take care about that for us. */ - refresh_cpu_vm_stats(false); + refresh_cpu_vm_stats(); } /* Index: linux-vmstat-remote/mm/page_alloc.c =================================================================== --- linux-vmstat-remote.orig/mm/page_alloc.c +++ linux-vmstat-remote/mm/page_alloc.c @@ -3176,26 +3176,6 @@ static int rmqueue_bulk(struct zone *zon return allocated; } -#ifdef CONFIG_NUMA -/* - * Called from the vmstat counter updater to drain pagesets of this - * currently executing processor on remote nodes after they have - * expired. - */ -void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) -{ - int to_drain, batch; - - batch = READ_ONCE(pcp->batch); - to_drain = min(pcp->count, batch); - if (to_drain > 0) { - spin_lock(&pcp->lock); - free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock(&pcp->lock); - } -} -#endif - /* * Drain pcplists of the indicated processor and zone. */