[RFC PATCH 10/11] shmem: add large folio support to the write path

Daniel Gomez <da.gomez@xxxxxxxxxxx> · Sat, 28 Oct 2023 21:15:50 +0000

Current work in progress. Large folios in the fallocate path makes
regress fstests generic/285 and generic/436.

Add large folio support for shmem write path matching the same high
order preference mechanism used for iomap buffered IO path as used in
__filemap_get_folio().

Add shmem_mapping_size_order to get a hint for the order of the folio
based on the file size which takes care of the mapping requirements.

Swap does not support high order folios for now, so make it order 0 in
case swap is enabled.

Add the __GFP_COMP flag for high order folios except when huge is
enabled. This fixes a memory leak when allocating high order folios.

Signed-off-by: Daniel Gomez <da.gomez@xxxxxxxxxxx>
---
 mm/shmem.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index fc7605da4316..eb314927be78 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1621,6 +1621,9 @@ static struct folio *shmem_alloc_folio(gfp_t gfp, struct shmem_inode_info *info,
 	pgoff_t ilx;
 	struct page *page;
 
+	if ((order != 0) && !(gfp & VM_HUGEPAGE))
+		gfp |= __GFP_COMP;
+
 	mpol = shmem_get_pgoff_policy(info, index, order, &ilx);
 	page = alloc_pages_mpol(gfp, order, mpol, ilx, numa_node_id());
 	mpol_cond_put(mpol);
@@ -1628,17 +1631,56 @@ static struct folio *shmem_alloc_folio(gfp_t gfp, struct shmem_inode_info *info,
 	return page_rmappable_folio(page);
 }
 
+/**
+ * shmem_mapping_size_order - Get maximum folio order for the given file size.
+ * @mapping: Target address_space.
+ * @index: The page index.
+ * @size: The suggested size of the folio to create.
+ *
+ * This returns a high order for folios (when supported) based on the file size
+ * which the mapping currently allows at the given index. The index is relevant
+ * due to alignment considerations the mapping might have. The returned order
+ * may be less than the size passed.
+ *
+ * Like __filemap_get_folio order calculation.
+ *
+ * Return: The order.
+ */
+static inline unsigned int
+shmem_mapping_size_order(struct address_space *mapping, pgoff_t index,
+			 size_t size, struct shmem_sb_info *sbinfo)
+{
+	unsigned int order = ilog2(size);
+
+	if ((order <= PAGE_SHIFT) ||
+	    (!mapping_large_folio_support(mapping) || !sbinfo->noswap))
+		return 0;
+
+	order -= PAGE_SHIFT;
+
+	/* If we're not aligned, allocate a smaller folio */
+	if (index & ((1UL << order) - 1))
+		order = __ffs(index);
+
+	order = min_t(size_t, order, MAX_PAGECACHE_ORDER);
+
+	/* Order-1 not supported due to THP dependency */
+	return (order == 1) ? 0 : order;
+}
+
 static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
 		struct inode *inode, pgoff_t index,
 		struct mm_struct *fault_mm, size_t len)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	unsigned int order = 0;
+	unsigned int order = shmem_mapping_size_order(mapping, index, len,
+						      SHMEM_SB(inode->i_sb));
 	struct folio *folio;
 	long pages;
 	int error;
 
+neworder:
 	if (gfp & VM_HUGEPAGE) {
 		pages = HPAGE_PMD_NR;
 		index = round_down(index, HPAGE_PMD_NR);
@@ -1721,6 +1763,11 @@ static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
 unlock:
 	folio_unlock(folio);
 	folio_put(folio);
+	if (order != 0) {
+		if (--order == 1)
+			order = 0;
+		goto neworder;
+	}
 	return ERR_PTR(error);
 }
 
-- 
2.39.2