Re: [patch]THP: add split tail pages to shrink page list in page reclaim

Wanpeng Li <liwanp@xxxxxxxxxxxxxxxxxx> · Fri, 5 Apr 2013 07:50:01 +0800

On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
>In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
>list. Since we are reclaiming a huge page, it's better we reclaim all subpages
>of the huge page instead of just the head page. This patch adds split tail
>pages to shrink page list so the tail pages can be reclaimed soon.
>
>Before this patch, run a swap workload:
>thp_fault_alloc 3492
>thp_fault_fallback 608
>thp_collapse_alloc 6
>thp_collapse_alloc_failed 0
>thp_split 916
>
>With this patch:
>thp_fault_alloc 4085
>thp_fault_fallback 16
>thp_collapse_alloc 90
>thp_collapse_alloc_failed 0
>thp_split 1272
>
>fallback allocation is reduced a lot.
>
>Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx>

Nice!

Reviewed-by: Wanpeng Li <liwanp@xxxxxxxxxxxxxxxxxx>

>---
> include/linux/huge_mm.h |   11 ++++++++++-
> include/linux/swap.h    |    4 ++--
> mm/huge_memory.c        |   14 ++++++++------
> mm/swap.c               |   11 ++++++++---
> mm/swap_state.c         |    4 ++--
> mm/vmscan.c             |    2 +-
> 6 files changed, 31 insertions(+), 15 deletions(-)
>
>Index: linux/include/linux/huge_mm.h
>===================================================================
>--- linux.orig/include/linux/huge_mm.h	2013-04-01 20:16:23.822120955 +0800
>+++ linux/include/linux/huge_mm.h	2013-04-01 20:18:22.668627309 +0800
>@@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_stru
> extern int handle_pte_fault(struct mm_struct *mm,
> 			    struct vm_area_struct *vma, unsigned long address,
> 			    pte_t *pte, pmd_t *pmd, unsigned int flags);
>-extern int split_huge_page(struct page *page);
>+extern int split_huge_page_to_list(struct page *page, struct list_head *list);
>+static inline int split_huge_page(struct page *page)
>+{
>+	return split_huge_page_to_list(page, NULL);
>+}
> extern void __split_huge_page_pmd(struct vm_area_struct *vma,
> 		unsigned long address, pmd_t *pmd);
> #define split_huge_page_pmd(__vma, __address, __pmd)			\
>@@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct
> #define transparent_hugepage_enabled(__vma) 0
>
> #define transparent_hugepage_flags 0UL
>+static inline int
>+split_huge_page_to_list(struct page *page, struct list_head *list)
>+{
>+	return 0;
>+}
> static inline int split_huge_page(struct page *page)
> {
> 	return 0;
>Index: linux/include/linux/swap.h
>===================================================================
>--- linux.orig/include/linux/swap.h	2013-04-01 20:16:23.810121105 +0800
>+++ linux/include/linux/swap.h	2013-04-01 20:18:22.668627309 +0800
>@@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_p
> extern void __lru_cache_add(struct page *, enum lru_list lru);
> extern void lru_cache_add_lru(struct page *, enum lru_list lru);
> extern void lru_add_page_tail(struct page *page, struct page *page_tail,
>-			      struct lruvec *lruvec);
>+			 struct lruvec *lruvec, struct list_head *head);
> extern void activate_page(struct page *);
> extern void mark_page_accessed(struct page *);
> extern void lru_add_drain(void);
>@@ -343,7 +343,7 @@ extern struct address_space swapper_spac
> #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
> extern unsigned long total_swapcache_pages(void);
> extern void show_swap_cache_info(void);
>-extern int add_to_swap(struct page *);
>+extern int add_to_swap(struct page *, struct list_head *list);
> extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
> extern void __delete_from_swap_cache(struct page *);
> extern void delete_from_swap_cache(struct page *);
>Index: linux/mm/huge_memory.c
>===================================================================
>--- linux.orig/mm/huge_memory.c	2013-04-01 20:16:23.798121258 +0800
>+++ linux/mm/huge_memory.c	2013-04-01 20:18:43.020371209 +0800
>@@ -1560,7 +1560,8 @@ static int __split_huge_page_splitting(s
> 	return ret;
> }
>
>-static void __split_huge_page_refcount(struct page *page)
>+static void __split_huge_page_refcount(struct page *page,
>+				       struct list_head *list)
> {
> 	int i;
> 	struct zone *zone = page_zone(page);
>@@ -1646,7 +1647,7 @@ static void __split_huge_page_refcount(s
> 		BUG_ON(!PageDirty(page_tail));
> 		BUG_ON(!PageSwapBacked(page_tail));
>
>-		lru_add_page_tail(page, page_tail, lruvec);
>+		lru_add_page_tail(page, page_tail, lruvec, list);
> 	}
> 	atomic_sub(tail_count, &page->_count);
> 	BUG_ON(atomic_read(&page->_count) <= 0);
>@@ -1753,7 +1754,8 @@ static int __split_huge_page_map(struct
>
> /* must be called with anon_vma->root->rwsem held */
> static void __split_huge_page(struct page *page,
>-			      struct anon_vma *anon_vma)
>+			      struct anon_vma *anon_vma,
>+			      struct list_head *list)
> {
> 	int mapcount, mapcount2;
> 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
>@@ -1784,7 +1786,7 @@ static void __split_huge_page(struct pag
> 		       mapcount, page_mapcount(page));
> 	BUG_ON(mapcount != page_mapcount(page));
>
>-	__split_huge_page_refcount(page);
>+	__split_huge_page_refcount(page, list);
>
> 	mapcount2 = 0;
> 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
>@@ -1799,7 +1801,7 @@ static void __split_huge_page(struct pag
> 	BUG_ON(mapcount != mapcount2);
> }
>
>-int split_huge_page(struct page *page)
>+int split_huge_page_to_list(struct page *page, struct list_head *list)
> {
> 	struct anon_vma *anon_vma;
> 	int ret = 1;
>@@ -1824,7 +1826,7 @@ int split_huge_page(struct page *page)
> 		goto out_unlock;
>
> 	BUG_ON(!PageSwapBacked(page));
>-	__split_huge_page(page, anon_vma);
>+	__split_huge_page(page, anon_vma, list);
> 	count_vm_event(THP_SPLIT);
>
> 	BUG_ON(PageCompound(page));
>Index: linux/mm/swap.c
>===================================================================
>--- linux.orig/mm/swap.c	2013-04-01 20:16:23.794121307 +0800
>+++ linux/mm/swap.c	2013-04-01 20:18:22.668627309 +0800
>@@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> /* used by __split_huge_page_refcount() */
> void lru_add_page_tail(struct page *page, struct page *page_tail,
>-		       struct lruvec *lruvec)
>+		       struct lruvec *lruvec, struct list_head *list)
> {
> 	int uninitialized_var(active);
> 	enum lru_list lru;
>@@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page
> 	VM_BUG_ON(NR_CPUS != 1 &&
> 		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
>
>-	SetPageLRU(page_tail);
>+	if (!list)
>+		SetPageLRU(page_tail);
>
> 	if (page_evictable(page_tail)) {
> 		if (PageActive(page)) {
>@@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page
>
> 	if (likely(PageLRU(page)))
> 		list_add_tail(&page_tail->lru, &page->lru);
>-	else {
>+	else if (list) {
>+		/* page reclaim is reclaiming a huge page */
>+		get_page(page_tail);
>+		list_add_tail(&page_tail->lru, list);
>+	} else {
> 		struct list_head *list_head;
> 		/*
> 		 * Head page has not yet been counted, as an hpage,
>Index: linux/mm/swap_state.c
>===================================================================
>--- linux.orig/mm/swap_state.c	2013-04-01 20:16:23.778121508 +0800
>+++ linux/mm/swap_state.c	2013-04-01 20:18:22.668627309 +0800
>@@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct pag
>  * Allocate swap space for the page and add the page to the
>  * swap cache.  Caller needs to hold the page lock. 
>  */
>-int add_to_swap(struct page *page)
>+int add_to_swap(struct page *page, struct list_head *list)
> {
> 	swp_entry_t entry;
> 	int err;
>@@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
> 		return 0;
>
> 	if (unlikely(PageTransHuge(page)))
>-		if (unlikely(split_huge_page(page))) {
>+		if (unlikely(split_huge_page_to_list(page, list))) {
> 			swapcache_free(entry, NULL);
> 			return 0;
> 		}
>Index: linux/mm/vmscan.c
>===================================================================
>--- linux.orig/mm/vmscan.c	2013-04-01 20:16:23.782121457 +0800
>+++ linux/mm/vmscan.c	2013-04-01 20:18:22.668627309 +0800
>@@ -780,7 +780,7 @@ static unsigned long shrink_page_list(st
> 		if (PageAnon(page) && !PageSwapCache(page)) {
> 			if (!(sc->gfp_mask & __GFP_IO))
> 				goto keep_locked;
>-			if (!add_to_swap(page))
>+			if (!add_to_swap(page, page_list))
> 				goto activate_locked;
> 			may_enter_fs = 1;
> 		}
>
>--
>To unsubscribe, send a message with 'unsubscribe linux-mm' in
>the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
>see: http://www.linux-mm.org/ .
>Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>