As described in the page_cache_get_speculative() comment in pagemap.h, the count of all pages coming out of the allocator must be considered unstable unless an RCU grace period has passed since the pages were allocated. This is an issue for THP because __split_huge_page_refcount() depends on tail page counts being stable. By setting a cookie on THP pages when they are allocated, we are able to ensure the tail page counts are stable before splitting such pages. In the typical case, the THP page should be old enough by the time we try to split it, so that we won't have to wait. Signed-off-by: Michel Lespinasse <walken@xxxxxxxxxx> --- mm/huge_memory.c | 33 +++++++++++++++++++++++++++++---- 1 files changed, 29 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 81532f2..46c0c0b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -657,15 +657,23 @@ static inline struct page *alloc_hugepage_vma(int defrag, unsigned long haddr, int nd, gfp_t extra_gfp) { - return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp), + struct page *page; + page = alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp), HPAGE_PMD_ORDER, vma, haddr, nd); + if (page) + page_get_gp_cookie(page); + return page; } #ifndef CONFIG_NUMA static inline struct page *alloc_hugepage(int defrag) { - return alloc_pages(alloc_hugepage_gfpmask(defrag, 0), + struct page *page; + page = alloc_pages(alloc_hugepage_gfpmask(defrag, 0), HPAGE_PMD_ORDER); + if (page) + page_get_gp_cookie(page); + return page; } #endif @@ -1209,7 +1217,7 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(page_mapcount(page_tail)); page_tail->_mapcount = page->_mapcount; - BUG_ON(page_tail->mapping); + BUG_ON(page_tail->mapping); /* see page_clear_gp_cookie() */ page_tail->mapping = page->mapping; page_tail->index = ++head_index; @@ -1387,9 +1395,11 @@ static void __split_huge_page(struct page *page, int split_huge_page(struct page *page) { struct anon_vma *anon_vma; - int ret = 1; + int ret; +retry: BUG_ON(!PageAnon(page)); + ret = 1; anon_vma = page_lock_anon_vma(page); if (!anon_vma) goto out; @@ -1397,6 +1407,21 @@ int split_huge_page(struct page *page) if (!PageCompound(page)) goto out_unlock; + /* + * Make sure the tail page counts are stable before splitting the page. + * See the page_cache_get_speculative() comment in pagemap.h. + */ + if (!page_gp_cookie_elapsed(page)) { + page_unlock_anon_vma(anon_vma); + synchronize_rcu(); + goto retry; + } + + /* + * Make sure page_tail->mapping is cleared before we split up the page. + */ + page_clear_gp_cookie(page); + BUG_ON(!PageSwapBacked(page)); __split_huge_page(page, anon_vma); count_vm_event(THP_SPLIT); -- 1.7.3.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>