Re: [patch]THP: add split tail pages to shrink page list in page reclaim

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Shaohua,

On Mon, Apr 01, 2013 at 09:26:05PM +0800, Shaohua Li wrote:
> In page reclaim, huge page is split. split_huge_page() adds tail pages to LRU
> list. Since we are reclaiming a huge page, it's better we reclaim all subpages
> of the huge page instead of just the head page. This patch adds split tail
> pages to shrink page list so the tail pages can be reclaimed soon.
> 
> Before this patch, run a swap workload:
> thp_fault_alloc 3492
> thp_fault_fallback 608
> thp_collapse_alloc 6
> thp_collapse_alloc_failed 0
> thp_split 916
> 
> With this patch:
> thp_fault_alloc 4085
> thp_fault_fallback 16
> thp_collapse_alloc 90
> thp_collapse_alloc_failed 0
> thp_split 1272
> 
> fallback allocation is reduced a lot.

What I have a concern is that there is about spatial locality about 2M all pages
expecially, THP-always case. But yes, THP already have done it via
lru_add_page_tail and yours makes more sense if we really intended it.

But I didn't like passing page_list to split_huge_page, either.
Couldn't we do it in isolate_lru_pages in shrink_inactive_list?
Maybe, we can add new isolate_mode, ISOLATE_SPLIT_HUGEPAGE.
One problem I can see is deadlock of zone->lru_lock so maybe we have to
release the lock the work and re-hold it.

> 
> Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx>
> ---
>  include/linux/huge_mm.h |   11 ++++++++++-
>  include/linux/swap.h    |    4 ++--
>  mm/huge_memory.c        |   14 ++++++++------
>  mm/swap.c               |   11 ++++++++---
>  mm/swap_state.c         |    4 ++--
>  mm/vmscan.c             |    2 +-
>  6 files changed, 31 insertions(+), 15 deletions(-)
> 
> Index: linux/include/linux/huge_mm.h
> ===================================================================
> --- linux.orig/include/linux/huge_mm.h	2013-04-01 20:16:23.822120955 +0800
> +++ linux/include/linux/huge_mm.h	2013-04-01 20:18:22.668627309 +0800
> @@ -99,7 +99,11 @@ extern int copy_pte_range(struct mm_stru
>  extern int handle_pte_fault(struct mm_struct *mm,
>  			    struct vm_area_struct *vma, unsigned long address,
>  			    pte_t *pte, pmd_t *pmd, unsigned int flags);
> -extern int split_huge_page(struct page *page);
> +extern int split_huge_page_to_list(struct page *page, struct list_head *list);
> +static inline int split_huge_page(struct page *page)
> +{
> +	return split_huge_page_to_list(page, NULL);
> +}
>  extern void __split_huge_page_pmd(struct vm_area_struct *vma,
>  		unsigned long address, pmd_t *pmd);
>  #define split_huge_page_pmd(__vma, __address, __pmd)			\
> @@ -186,6 +190,11 @@ extern int do_huge_pmd_numa_page(struct
>  #define transparent_hugepage_enabled(__vma) 0
>  
>  #define transparent_hugepage_flags 0UL
> +static inline int
> +split_huge_page_to_list(struct page *page, struct list_head *list)
> +{
> +	return 0;
> +}
>  static inline int split_huge_page(struct page *page)
>  {
>  	return 0;
> Index: linux/include/linux/swap.h
> ===================================================================
> --- linux.orig/include/linux/swap.h	2013-04-01 20:16:23.810121105 +0800
> +++ linux/include/linux/swap.h	2013-04-01 20:18:22.668627309 +0800
> @@ -236,7 +236,7 @@ extern unsigned long nr_free_pagecache_p
>  extern void __lru_cache_add(struct page *, enum lru_list lru);
>  extern void lru_cache_add_lru(struct page *, enum lru_list lru);
>  extern void lru_add_page_tail(struct page *page, struct page *page_tail,
> -			      struct lruvec *lruvec);
> +			 struct lruvec *lruvec, struct list_head *head);
>  extern void activate_page(struct page *);
>  extern void mark_page_accessed(struct page *);
>  extern void lru_add_drain(void);
> @@ -343,7 +343,7 @@ extern struct address_space swapper_spac
>  #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
>  extern unsigned long total_swapcache_pages(void);
>  extern void show_swap_cache_info(void);
> -extern int add_to_swap(struct page *);
> +extern int add_to_swap(struct page *, struct list_head *list);
>  extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
>  extern void __delete_from_swap_cache(struct page *);
>  extern void delete_from_swap_cache(struct page *);
> Index: linux/mm/huge_memory.c
> ===================================================================
> --- linux.orig/mm/huge_memory.c	2013-04-01 20:16:23.798121258 +0800
> +++ linux/mm/huge_memory.c	2013-04-01 20:18:43.020371209 +0800
> @@ -1560,7 +1560,8 @@ static int __split_huge_page_splitting(s
>  	return ret;
>  }
>  
> -static void __split_huge_page_refcount(struct page *page)
> +static void __split_huge_page_refcount(struct page *page,
> +				       struct list_head *list)
>  {
>  	int i;
>  	struct zone *zone = page_zone(page);
> @@ -1646,7 +1647,7 @@ static void __split_huge_page_refcount(s
>  		BUG_ON(!PageDirty(page_tail));
>  		BUG_ON(!PageSwapBacked(page_tail));
>  
> -		lru_add_page_tail(page, page_tail, lruvec);
> +		lru_add_page_tail(page, page_tail, lruvec, list);
>  	}
>  	atomic_sub(tail_count, &page->_count);
>  	BUG_ON(atomic_read(&page->_count) <= 0);
> @@ -1753,7 +1754,8 @@ static int __split_huge_page_map(struct
>  
>  /* must be called with anon_vma->root->rwsem held */
>  static void __split_huge_page(struct page *page,
> -			      struct anon_vma *anon_vma)
> +			      struct anon_vma *anon_vma,
> +			      struct list_head *list)
>  {
>  	int mapcount, mapcount2;
>  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
> @@ -1784,7 +1786,7 @@ static void __split_huge_page(struct pag
>  		       mapcount, page_mapcount(page));
>  	BUG_ON(mapcount != page_mapcount(page));
>  
> -	__split_huge_page_refcount(page);
> +	__split_huge_page_refcount(page, list);
>  
>  	mapcount2 = 0;
>  	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
> @@ -1799,7 +1801,7 @@ static void __split_huge_page(struct pag
>  	BUG_ON(mapcount != mapcount2);
>  }
>  
> -int split_huge_page(struct page *page)
> +int split_huge_page_to_list(struct page *page, struct list_head *list)
>  {
>  	struct anon_vma *anon_vma;
>  	int ret = 1;
> @@ -1824,7 +1826,7 @@ int split_huge_page(struct page *page)
>  		goto out_unlock;
>  
>  	BUG_ON(!PageSwapBacked(page));
> -	__split_huge_page(page, anon_vma);
> +	__split_huge_page(page, anon_vma, list);
>  	count_vm_event(THP_SPLIT);
>  
>  	BUG_ON(PageCompound(page));
> Index: linux/mm/swap.c
> ===================================================================
> --- linux.orig/mm/swap.c	2013-04-01 20:16:23.794121307 +0800
> +++ linux/mm/swap.c	2013-04-01 20:18:22.668627309 +0800
> @@ -737,7 +737,7 @@ EXPORT_SYMBOL(__pagevec_release);
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  /* used by __split_huge_page_refcount() */
>  void lru_add_page_tail(struct page *page, struct page *page_tail,
> -		       struct lruvec *lruvec)
> +		       struct lruvec *lruvec, struct list_head *list)
>  {
>  	int uninitialized_var(active);
>  	enum lru_list lru;
> @@ -749,7 +749,8 @@ void lru_add_page_tail(struct page *page
>  	VM_BUG_ON(NR_CPUS != 1 &&
>  		  !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
>  
> -	SetPageLRU(page_tail);
> +	if (!list)
> +		SetPageLRU(page_tail);
>  
>  	if (page_evictable(page_tail)) {
>  		if (PageActive(page)) {
> @@ -767,7 +768,11 @@ void lru_add_page_tail(struct page *page
>  
>  	if (likely(PageLRU(page)))
>  		list_add_tail(&page_tail->lru, &page->lru);
> -	else {
> +	else if (list) {
> +		/* page reclaim is reclaiming a huge page */
> +		get_page(page_tail);
> +		list_add_tail(&page_tail->lru, list);
> +	} else {
>  		struct list_head *list_head;
>  		/*
>  		 * Head page has not yet been counted, as an hpage,
> Index: linux/mm/swap_state.c
> ===================================================================
> --- linux.orig/mm/swap_state.c	2013-04-01 20:16:23.778121508 +0800
> +++ linux/mm/swap_state.c	2013-04-01 20:18:22.668627309 +0800
> @@ -160,7 +160,7 @@ void __delete_from_swap_cache(struct pag
>   * Allocate swap space for the page and add the page to the
>   * swap cache.  Caller needs to hold the page lock. 
>   */
> -int add_to_swap(struct page *page)
> +int add_to_swap(struct page *page, struct list_head *list)
>  {
>  	swp_entry_t entry;
>  	int err;
> @@ -173,7 +173,7 @@ int add_to_swap(struct page *page)
>  		return 0;
>  
>  	if (unlikely(PageTransHuge(page)))
> -		if (unlikely(split_huge_page(page))) {
> +		if (unlikely(split_huge_page_to_list(page, list))) {
>  			swapcache_free(entry, NULL);
>  			return 0;
>  		}
> Index: linux/mm/vmscan.c
> ===================================================================
> --- linux.orig/mm/vmscan.c	2013-04-01 20:16:23.782121457 +0800
> +++ linux/mm/vmscan.c	2013-04-01 20:18:22.668627309 +0800
> @@ -780,7 +780,7 @@ static unsigned long shrink_page_list(st
>  		if (PageAnon(page) && !PageSwapCache(page)) {
>  			if (!(sc->gfp_mask & __GFP_IO))
>  				goto keep_locked;
> -			if (!add_to_swap(page))
> +			if (!add_to_swap(page, page_list))
>  				goto activate_locked;
>  			may_enter_fs = 1;
>  		}
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>

-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]