[PATCHv6 07/22] thp, mm: rewrite add_to_page_cache_locked() to support huge pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For huge page we add to radix tree HPAGE_CACHE_NR pages at once: head
page for the specified index and HPAGE_CACHE_NR-1 tail pages for
following indexes.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Acked-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
 include/linux/huge_mm.h    | 24 ++++++++++++++++++++++++
 include/linux/page-flags.h | 13 +++++++++++++
 mm/filemap.c               | 45 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index fb0847572c..9747af1117 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -230,6 +230,20 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE
+
+#define HPAGE_CACHE_ORDER      (HPAGE_SHIFT - PAGE_CACHE_SHIFT)
+#define HPAGE_CACHE_NR         (1L << HPAGE_CACHE_ORDER)
+#define HPAGE_CACHE_INDEX_MASK (HPAGE_CACHE_NR - 1)
+
+#else
+
+#define HPAGE_CACHE_ORDER      ({ BUILD_BUG(); 0; })
+#define HPAGE_CACHE_NR         ({ BUILD_BUG(); 0; })
+#define HPAGE_CACHE_INDEX_MASK ({ BUILD_BUG(); 0; })
+
+#endif
+
 static inline bool transparent_hugepage_pagecache(void)
 {
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE))
@@ -243,4 +257,14 @@ static inline bool transparent_hugepage_pagecache(void)
                  return false;
 	return transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_PAGECACHE);
 }
+
+static inline int hpagecache_nr_pages(struct page *page)
+{
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE))
+		return hpage_nr_pages(page);
+
+	BUG_ON(PageTransHuge(page));
+	return 1;
+}
+
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675c2b..6d2d7ce3e1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -452,6 +452,19 @@ static inline int PageTransTail(struct page *page)
 }
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE_PAGECACHE
+static inline int PageTransHugeCache(struct page *page)
+{
+	return PageTransHuge(page);
+}
+#else
+
+static inline int PageTransHugeCache(struct page *page)
+{
+	return 0;
+}
+#endif
+
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
diff --git a/mm/filemap.c b/mm/filemap.c
index c7e42aee5c..d2d6c0ebe9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -460,38 +460,63 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		pgoff_t offset, gfp_t gfp_mask)
 {
 	int error;
+	int i, nr;
 
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(PageSwapBacked(page));
 
+	/* memory cgroup controller handles thp pages on its side */
 	error = mem_cgroup_cache_charge(page, current->mm,
 					gfp_mask & GFP_RECLAIM_MASK);
 	if (error)
 		return error;
 
-	error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
+	if (PageTransHugeCache(page))
+		BUILD_BUG_ON(HPAGE_CACHE_NR > RADIX_TREE_PRELOAD_NR);
+
+	nr = hpagecache_nr_pages(page);
+
+	error = radix_tree_maybe_preload_contig(nr, gfp_mask & ~__GFP_HIGHMEM);
 	if (error) {
 		mem_cgroup_uncharge_cache_page(page);
 		return error;
 	}
 
+	spin_lock_irq(&mapping->tree_lock);
 	page_cache_get(page);
-	page->mapping = mapping;
 	page->index = offset;
-
-	spin_lock_irq(&mapping->tree_lock);
-	error = radix_tree_insert(&mapping->page_tree, offset, page);
+	page->mapping = mapping;
+	for (i = 0; i < nr; i++) {
+		error = radix_tree_insert(&mapping->page_tree,
+				offset + i, page);
+		/*
+		 * In the midle of THP we can collide with small page which was
+		 * established before THP page cache is enabled or by other VMA
+		 * with bad alignement (most likely MAP_FIXED).
+		 */
+		if (error) {
+			i--; /* failed to insert anything at offset + i */
+			goto err_insert;
+		}
+	}
 	radix_tree_preload_end();
-	if (unlikely(error))
-		goto err_insert;
-	mapping->nrpages++;
-	__inc_zone_page_state(page, NR_FILE_PAGES);
+	mapping->nrpages += nr;
+	__mod_zone_page_state(page_zone(page), NR_FILE_PAGES, nr);
+	if (PageTransHuge(page))
+		__inc_zone_page_state(page, NR_FILE_TRANSPARENT_HUGEPAGES);
 	spin_unlock_irq(&mapping->tree_lock);
 	trace_mm_filemap_add_to_page_cache(page);
 	return 0;
 err_insert:
-	page->mapping = NULL;
+	radix_tree_preload_end();
+	if (i != 0)
+		error = -ENOSPC; /* no space for a huge page */
+
 	/* Leave page->index set: truncation relies upon it */
+	page->mapping = NULL;
+	for (; i >= 0; i--)
+		radix_tree_delete(&mapping->page_tree, offset + i);
+
 	spin_unlock_irq(&mapping->tree_lock);
 	mem_cgroup_uncharge_cache_page(page);
 	page_cache_release(page);
-- 
1.8.4.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux