[PATCHv4 34/39] thp, mm: handle huge pages in filemap_fault()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>

If caller asks for huge page (flags & FAULT_FLAG_TRANSHUGE),
filemap_fault() return it if there's a huge page already by the offset.

If the area of page cache required to create huge is empty, we create a
new huge page and return it.

Otherwise we return VM_FAULT_FALLBACK to indicate that fallback to small
pages is required.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
---
 mm/filemap.c |   52 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 9877347..1deedd6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1557,14 +1557,23 @@ EXPORT_SYMBOL(generic_file_aio_read);
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int page_cache_read(struct file *file, pgoff_t offset)
+static int page_cache_read(struct file *file, pgoff_t offset, bool thp)
 {
 	struct address_space *mapping = file->f_mapping;
-	struct page *page; 
+	struct page *page;
 	int ret;
 
 	do {
-		page = page_cache_alloc_cold(mapping);
+		if (thp) {
+			gfp_t gfp_mask = mapping_gfp_mask(mapping) | __GFP_COLD;
+			BUG_ON(offset & HPAGE_CACHE_INDEX_MASK);
+			page = alloc_pages(gfp_mask, HPAGE_PMD_ORDER);
+			if (page)
+				count_vm_event(THP_FAULT_ALLOC);
+			else
+				count_vm_event(THP_FAULT_FALLBACK);
+		} else
+			page = page_cache_alloc_cold(mapping);
 		if (!page)
 			return -ENOMEM;
 
@@ -1573,11 +1582,18 @@ static int page_cache_read(struct file *file, pgoff_t offset)
 			ret = mapping->a_ops->readpage(file, page);
 		else if (ret == -EEXIST)
 			ret = 0; /* losing race to add is OK */
+		else if (ret == -ENOSPC)
+			/*
+			 * No space in page cache to add huge page.
+			 * For caller it's the same as -ENOMEM: fall back to
+			 * small pages is required.
+			 */
+			ret = -ENOMEM;
 
 		page_cache_release(page);
 
 	} while (ret == AOP_TRUNCATED_PAGE);
-		
+
 	return ret;
 }
 
@@ -1669,13 +1685,20 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct address_space *mapping = file->f_mapping;
 	struct file_ra_state *ra = &file->f_ra;
 	struct inode *inode = mapping->host;
+	bool thp = vmf->flags & FAULT_FLAG_TRANSHUGE;
 	pgoff_t offset = vmf->pgoff;
+	unsigned long address = (unsigned long)vmf->virtual_address;
 	struct page *page;
 	pgoff_t size;
 	int ret = 0;
 
+	if (thp) {
+		BUG_ON(ra->ra_pages);
+		offset = linear_page_index(vma, address & HPAGE_PMD_MASK);
+	}
+
 	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (offset >= size)
+	if (vmf->pgoff >= size)
 		return VM_FAULT_SIGBUS;
 
 	/*
@@ -1700,7 +1723,8 @@ retry_find:
 			goto no_cached_page;
 	}
 
-	if (PageTransCompound(page))
+	/* Split huge page if we don't want huge page to be here */
+	if (!thp && PageTransCompound(page))
 		split_huge_page(compound_trans_head(page));
 	if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
 		page_cache_release(page);
@@ -1722,12 +1746,22 @@ retry_find:
 	if (unlikely(!PageUptodate(page)))
 		goto page_not_uptodate;
 
+	if (thp && !PageTransHuge(page)) {
+		/*
+		 * Caller asked for huge page, but we have small page
+		 * by this offset. Fallback to small pages.
+		 */
+		unlock_page(page);
+		page_cache_release(page);
+		return VM_FAULT_FALLBACK;
+	}
+
 	/*
 	 * Found the page and have a reference on it.
 	 * We must recheck i_size under page lock.
 	 */
 	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (unlikely(offset >= size)) {
+	if (unlikely(vmf->pgoff >= size)) {
 		unlock_page(page);
 		page_cache_release(page);
 		return VM_FAULT_SIGBUS;
@@ -1741,7 +1775,7 @@ no_cached_page:
 	 * We're only likely to ever get here if MADV_RANDOM is in
 	 * effect.
 	 */
-	error = page_cache_read(file, offset);
+	error = page_cache_read(file, offset, thp);
 
 	/*
 	 * The page we want has now been added to the page cache.
@@ -1757,7 +1791,7 @@ no_cached_page:
 	 * to schedule I/O.
 	 */
 	if (error == -ENOMEM)
-		return VM_FAULT_OOM;
+		return VM_FAULT_OOM | VM_FAULT_FALLBACK;
 	return VM_FAULT_SIGBUS;
 
 page_not_uptodate:
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux