For huge page we add to radix tree HPAGE_CACHE_NR pages at once: head page for the specified index and HPAGE_CACHE_NR-1 tail pages for following indexes. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Acked-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> --- include/linux/huge_mm.h | 24 ++++++++++++++++++++++++ include/linux/page-flags.h | 13 +++++++++++++ mm/filemap.c | 45 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index fb0847572c..9747af1117 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -230,6 +230,20 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE + +#define HPAGE_CACHE_ORDER (HPAGE_SHIFT - PAGE_CACHE_SHIFT) +#define HPAGE_CACHE_NR (1L << HPAGE_CACHE_ORDER) +#define HPAGE_CACHE_INDEX_MASK (HPAGE_CACHE_NR - 1) + +#else + +#define HPAGE_CACHE_ORDER ({ BUILD_BUG(); 0; }) +#define HPAGE_CACHE_NR ({ BUILD_BUG(); 0; }) +#define HPAGE_CACHE_INDEX_MASK ({ BUILD_BUG(); 0; }) + +#endif + static inline bool transparent_hugepage_pagecache(void) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE)) @@ -243,4 +257,14 @@ static inline bool transparent_hugepage_pagecache(void) return false; return transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_PAGECACHE); } + +static inline int hpagecache_nr_pages(struct page *page) +{ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE)) + return hpage_nr_pages(page); + + BUG_ON(PageTransHuge(page)); + return 1; +} + #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6d53675c2b..6d2d7ce3e1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -452,6 +452,19 @@ static inline int PageTransTail(struct page *page) } #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE +static inline int PageTransHugeCache(struct page *page) +{ + return PageTransHuge(page); +} +#else + +static inline int PageTransHugeCache(struct page *page) +{ + return 0; +} +#endif + /* * If network-based swap is enabled, sl*b must keep track of whether pages * were allocated from pfmemalloc reserves. diff --git a/mm/filemap.c b/mm/filemap.c index c7e42aee5c..d2d6c0ebe9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -460,38 +460,63 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { int error; + int i, nr; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageSwapBacked(page)); + /* memory cgroup controller handles thp pages on its side */ error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); if (error) return error; - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); + if (PageTransHugeCache(page)) + BUILD_BUG_ON(HPAGE_CACHE_NR > RADIX_TREE_PRELOAD_NR); + + nr = hpagecache_nr_pages(page); + + error = radix_tree_maybe_preload_contig(nr, gfp_mask & ~__GFP_HIGHMEM); if (error) { mem_cgroup_uncharge_cache_page(page); return error; } + spin_lock_irq(&mapping->tree_lock); page_cache_get(page); - page->mapping = mapping; page->index = offset; - - spin_lock_irq(&mapping->tree_lock); - error = radix_tree_insert(&mapping->page_tree, offset, page); + page->mapping = mapping; + for (i = 0; i < nr; i++) { + error = radix_tree_insert(&mapping->page_tree, + offset + i, page); + /* + * In the midle of THP we can collide with small page which was + * established before THP page cache is enabled or by other VMA + * with bad alignement (most likely MAP_FIXED). + */ + if (error) { + i--; /* failed to insert anything at offset + i */ + goto err_insert; + } + } radix_tree_preload_end(); - if (unlikely(error)) - goto err_insert; - mapping->nrpages++; - __inc_zone_page_state(page, NR_FILE_PAGES); + mapping->nrpages += nr; + __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr); + if (PageTransHuge(page)) + __inc_zone_page_state(page, NR_FILE_TRANSPARENT_HUGEPAGES); spin_unlock_irq(&mapping->tree_lock); trace_mm_filemap_add_to_page_cache(page); return 0; err_insert: - page->mapping = NULL; + radix_tree_preload_end(); + if (i != 0) + error = -ENOSPC; /* no space for a huge page */ + /* Leave page->index set: truncation relies upon it */ + page->mapping = NULL; + for (; i >= 0; i--) + radix_tree_delete(&mapping->page_tree, offset + i); + spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); page_cache_release(page); -- 1.8.4.rc3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html