From: John Hubbard <jhubbard@xxxxxxxxxx> Add five new /proc/vmstat items, to provide some visibility into what get_user_pages() and put_user_page() are doing. After booting and running fio (https://github.com/axboe/fio) a few times on an NVMe device, as a way to get lots of get_user_pages_fast() calls, the counters look like this: $ cat /proc/vmstat |grep gup nr_gup_slow_pages_requested 21319 nr_gup_fast_pages_requested 11533792 nr_gup_fast_page_backoffs 0 nr_gup_page_count_overflows 0 nr_gup_pages_returned 11555104 Interpretation of the above: Total gup requests (slow + fast): 11555111 Total put_user_page calls: 11555104 This shows 7 more calls to get_user_pages(), than to put_user_page(). That may, or may not, represent a problem worth investigating. Normally, those last two numbers should be equal, but a couple of things may cause them to differ: 1) Inherent race condition in reading /proc/vmstat values. 2) Bugs at any of the get_user_pages*() call sites. Those sites need to match get_user_pages() and put_user_page() calls. Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx> --- include/linux/mmzone.h | 5 +++++ mm/gup.c | 20 ++++++++++++++++++++ mm/swap.c | 1 + mm/vmstat.c | 5 +++++ 4 files changed, 31 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 842f9189537b..f20c14958a2b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -183,6 +183,11 @@ enum node_stat_item { NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ + NR_GUP_SLOW_PAGES_REQUESTED, /* via: get_user_pages() */ + NR_GUP_FAST_PAGES_REQUESTED, /* via: get_user_pages_fast() */ + NR_GUP_FAST_PAGE_BACKOFFS, /* gup_fast() lost to page_mkclean() */ + NR_GUP_PAGE_COUNT_OVERFLOWS, /* gup count overflowed: gup() failed */ + NR_GUP_PAGES_RETURNED, /* via: put_user_page() */ NR_VM_NODE_STAT_ITEMS }; diff --git a/mm/gup.c b/mm/gup.c index 3291da342f9c..848ee7899831 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -37,6 +37,8 @@ int get_gup_pin_page(struct page *page) page = compound_head(page); if (page_ref_count(page) >= (UINT_MAX - GUP_PIN_COUNTING_BIAS)) { + mod_node_page_state(page_pgdat(page), + NR_GUP_PAGE_COUNT_OVERFLOWS, 1); WARN_ONCE(1, "get_user_pages pin count overflowed"); return -EOVERFLOW; } @@ -184,6 +186,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, page = ERR_PTR(ret); goto out; } + mod_node_page_state(page_pgdat(page), + NR_GUP_SLOW_PAGES_REQUESTED, 1); } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && @@ -527,6 +531,8 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, ret = get_gup_pin_page(*page); if (ret) goto unmap; + + mod_node_page_state(page_pgdat(*page), NR_GUP_SLOW_PAGES_REQUESTED, 1); out: ret = 0; unmap: @@ -1461,7 +1467,12 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, if (!page_cache_gup_pin_speculative(head)) goto pte_unmap; + mod_node_page_state(page_pgdat(head), + NR_GUP_FAST_PAGES_REQUESTED, 1); + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + mod_node_page_state(page_pgdat(head), + NR_GUP_FAST_PAGE_BACKOFFS, 1); put_user_page(head); goto pte_unmap; } @@ -1522,6 +1533,9 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, return 0; } + mod_node_page_state(page_pgdat(page), + NR_GUP_FAST_PAGES_REQUESTED, 1); + (*nr)++; pfn++; } while (addr += PAGE_SIZE, addr != end); @@ -1607,6 +1621,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } + mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1); + if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { *nr -= refs; put_user_page(head); @@ -1644,6 +1660,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } + mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1); + if (unlikely(pud_val(orig) != pud_val(*pudp))) { *nr -= refs; put_user_page(head); @@ -1680,6 +1698,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, return 0; } + mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1); + if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { *nr -= refs; put_user_page(head); diff --git a/mm/swap.c b/mm/swap.c index 39b0ddd35933..49e192f242d4 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -150,6 +150,7 @@ void put_user_page(struct page *page) VM_BUG_ON_PAGE(page_ref_count(page) < GUP_PIN_COUNTING_BIAS, page); + mod_node_page_state(page_pgdat(page), NR_GUP_PAGES_RETURNED, 1); page_ref_sub(page, GUP_PIN_COUNTING_BIAS); } EXPORT_SYMBOL(put_user_page); diff --git a/mm/vmstat.c b/mm/vmstat.c index 83b30edc2f7f..18a1a4a2dd29 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1164,6 +1164,11 @@ const char * const vmstat_text[] = { "nr_dirtied", "nr_written", "nr_kernel_misc_reclaimable", + "nr_gup_slow_pages_requested", + "nr_gup_fast_pages_requested", + "nr_gup_fast_page_backoffs", + "nr_gup_page_count_overflows", + "nr_gup_pages_returned", /* enum writeback_stat_item counters */ "nr_dirty_threshold", -- 2.20.1