[PATCH 5/6] mm/gup: /proc/vmstat support for get/put user pages

john.hubbard@xxxxxxxxx · Sun, 3 Feb 2019 21:21:34 -0800

From: John Hubbard <jhubbard@xxxxxxxxxx>

Add five new /proc/vmstat items, to provide some
visibility into what get_user_pages() and put_user_page()
are doing.

After booting and running fio (https://github.com/axboe/fio)
a few times on an NVMe device, as a way to get lots of
get_user_pages_fast() calls, the counters look like this:

$ cat /proc/vmstat |grep gup
nr_gup_slow_pages_requested 21319
nr_gup_fast_pages_requested 11533792
nr_gup_fast_page_backoffs 0
nr_gup_page_count_overflows 0
nr_gup_pages_returned 11555104

Interpretation of the above:
   Total gup requests (slow + fast): 11555111
   Total put_user_page calls:        11555104

This shows 7 more calls to get_user_pages(), than to
put_user_page(). That may, or may not, represent a
problem worth investigating.

Normally, those last two numbers should be equal, but a
couple of things may cause them to differ:

1) Inherent race condition in reading /proc/vmstat values.

2) Bugs at any of the get_user_pages*() call sites. Those
sites need to match get_user_pages() and put_user_page() calls.

Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
---
 include/linux/mmzone.h |  5 +++++
 mm/gup.c               | 20 ++++++++++++++++++++
 mm/swap.c              |  1 +
 mm/vmstat.c            |  5 +++++
 4 files changed, 31 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 842f9189537b..f20c14958a2b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -183,6 +183,11 @@ enum node_stat_item {
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
+	NR_GUP_SLOW_PAGES_REQUESTED,	/* via: get_user_pages() */
+	NR_GUP_FAST_PAGES_REQUESTED,	/* via: get_user_pages_fast() */
+	NR_GUP_FAST_PAGE_BACKOFFS,	/* gup_fast() lost to page_mkclean() */
+	NR_GUP_PAGE_COUNT_OVERFLOWS,	/* gup count overflowed: gup() failed */
+	NR_GUP_PAGES_RETURNED,		/* via: put_user_page() */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/gup.c b/mm/gup.c
index 3291da342f9c..848ee7899831 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -37,6 +37,8 @@ int get_gup_pin_page(struct page *page)
 	page = compound_head(page);
 
 	if (page_ref_count(page) >= (UINT_MAX - GUP_PIN_COUNTING_BIAS)) {
+		mod_node_page_state(page_pgdat(page),
+				    NR_GUP_PAGE_COUNT_OVERFLOWS, 1);
 		WARN_ONCE(1, "get_user_pages pin count overflowed");
 		return -EOVERFLOW;
 	}
@@ -184,6 +186,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = ERR_PTR(ret);
 			goto out;
 		}
+		mod_node_page_state(page_pgdat(page),
+				    NR_GUP_SLOW_PAGES_REQUESTED, 1);
 	}
 	if (flags & FOLL_TOUCH) {
 		if ((flags & FOLL_WRITE) &&
@@ -527,6 +531,8 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 	ret = get_gup_pin_page(*page);
 	if (ret)
 		goto unmap;
+
+	mod_node_page_state(page_pgdat(*page), NR_GUP_SLOW_PAGES_REQUESTED, 1);
 out:
 	ret = 0;
 unmap:
@@ -1461,7 +1467,12 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 		if (!page_cache_gup_pin_speculative(head))
 			goto pte_unmap;
 
+		mod_node_page_state(page_pgdat(head),
+				    NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+			mod_node_page_state(page_pgdat(head),
+					    NR_GUP_FAST_PAGE_BACKOFFS, 1);
 			put_user_page(head);
 			goto pte_unmap;
 		}
@@ -1522,6 +1533,9 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
 			return 0;
 		}
 
+		mod_node_page_state(page_pgdat(page),
+				    NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 		(*nr)++;
 		pfn++;
 	} while (addr += PAGE_SIZE, addr != end);
@@ -1607,6 +1621,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 		return 0;
 	}
 
+	mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 	if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
 		*nr -= refs;
 		put_user_page(head);
@@ -1644,6 +1660,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 		return 0;
 	}
 
+	mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 	if (unlikely(pud_val(orig) != pud_val(*pudp))) {
 		*nr -= refs;
 		put_user_page(head);
@@ -1680,6 +1698,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 		return 0;
 	}
 
+	mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 	if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
 		*nr -= refs;
 		put_user_page(head);
diff --git a/mm/swap.c b/mm/swap.c
index 39b0ddd35933..49e192f242d4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -150,6 +150,7 @@ void put_user_page(struct page *page)
 
 	VM_BUG_ON_PAGE(page_ref_count(page) < GUP_PIN_COUNTING_BIAS, page);
 
+	mod_node_page_state(page_pgdat(page), NR_GUP_PAGES_RETURNED, 1);
 	page_ref_sub(page, GUP_PIN_COUNTING_BIAS);
 }
 EXPORT_SYMBOL(put_user_page);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 83b30edc2f7f..18a1a4a2dd29 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1164,6 +1164,11 @@ const char * const vmstat_text[] = {
 	"nr_dirtied",
 	"nr_written",
 	"nr_kernel_misc_reclaimable",
+	"nr_gup_slow_pages_requested",
+	"nr_gup_fast_pages_requested",
+	"nr_gup_fast_page_backoffs",
+	"nr_gup_page_count_overflows",
+	"nr_gup_pages_returned",
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
-- 
2.20.1