From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> For huge page we add to radix tree HPAGE_CACHE_NR pages at once: head page for the specified index and HPAGE_CACHE_NR-1 tail pages for following indexes. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Acked-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> --- include/linux/huge_mm.h | 24 ++++++++++++++++++++++ include/linux/page-flags.h | 33 ++++++++++++++++++++++++++++++ mm/filemap.c | 50 +++++++++++++++++++++++++++++++++++----------- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1534e1e..4dc66c9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -230,6 +230,20 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE + +#define HPAGE_CACHE_ORDER (HPAGE_SHIFT - PAGE_CACHE_SHIFT) +#define HPAGE_CACHE_NR (1L << HPAGE_CACHE_ORDER) +#define HPAGE_CACHE_INDEX_MASK (HPAGE_CACHE_NR - 1) + +#else + +#define HPAGE_CACHE_ORDER ({ BUILD_BUG(); 0; }) +#define HPAGE_CACHE_NR ({ BUILD_BUG(); 0; }) +#define HPAGE_CACHE_INDEX_MASK ({ BUILD_BUG(); 0; }) + +#endif + static inline bool transparent_hugepage_pagecache(void) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE)) @@ -238,4 +252,14 @@ static inline bool transparent_hugepage_pagecache(void) return false; return transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_PAGECACHE); } + +static inline int hpagecache_nr_pages(struct page *page) +{ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE)) + return hpage_nr_pages(page); + + BUG_ON(PageTransHuge(page)); + return 1; +} + #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f1a5b59..7657de0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -452,6 +452,39 @@ static inline int PageTransTail(struct page *page) } #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE +static inline int PageTransHugeCache(struct page *page) +{ + return PageTransHuge(page); +} + +static inline int PageTransCompoundCache(struct page *page) +{ + return PageTransCompound(page); +} + +static inline int PageTransTailCache(struct page *page) +{ + return PageTransTail(page); +} +#else + +static inline int PageTransHugeCache(struct page *page) +{ + return 0; +} + +static inline int PageTransCompoundCache(struct page *page) +{ + return 0; +} + +static inline int PageTransTailCache(struct page *page) +{ + return 0; +} +#endif + /* * If network-based swap is enabled, sl*b must keep track of whether pages * were allocated from pfmemalloc reserves. diff --git a/mm/filemap.c b/mm/filemap.c index ae5cc01..619e6cb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -460,38 +460,64 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { int error; + int i, nr; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageSwapBacked(page)); + /* memory cgroup controller handles thp pages on its side */ error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); if (error) return error; - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); + if (PageTransHugeCache(page)) + BUILD_BUG_ON(HPAGE_CACHE_NR > RADIX_TREE_PRELOAD_NR); + + nr = hpagecache_nr_pages(page); + + error = radix_tree_maybe_preload_contig(nr, gfp_mask & ~__GFP_HIGHMEM); if (error) { mem_cgroup_uncharge_cache_page(page); return error; } - page_cache_get(page); - page->mapping = mapping; - page->index = offset; - spin_lock_irq(&mapping->tree_lock); - error = radix_tree_insert(&mapping->page_tree, offset, page); + page_cache_get(page); + for (i = 0; i < nr; i++) { + error = radix_tree_insert(&mapping->page_tree, + offset + i, page + i); + /* + * In the midle of THP we can collide with small page which was + * established before THP page cache is enabled or by other VMA + * with bad alignement (most likely MAP_FIXED). + */ + if (error) + goto err_insert; + page[i].index = offset + i; + page[i].mapping = mapping; + } radix_tree_preload_end(); - if (unlikely(error)) - goto err_insert; - mapping->nrpages++; - __inc_zone_page_state(page, NR_FILE_PAGES); + mapping->nrpages += nr; + __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr); + if (PageTransHuge(page)) + __inc_zone_page_state(page, NR_FILE_TRANSPARENT_HUGEPAGES); spin_unlock_irq(&mapping->tree_lock); trace_mm_filemap_add_to_page_cache(page); return 0; err_insert: - page->mapping = NULL; - /* Leave page->index set: truncation relies upon it */ + radix_tree_preload_end(); + if (i != 0) + error = -ENOSPC; /* no space for a huge page */ + + /* page[i] was not inserted to tree, skip it */ + i--; + + for (; i >= 0; i--) { + /* Leave page->index set: truncation relies upon it */ + page[i].mapping = NULL; + radix_tree_delete(&mapping->page_tree, offset + i); + } spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); page_cache_release(page); -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html