Re: [V4 PATCH 1/2] tmpfs: add fallocate support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



于 2011年11月29日 14:02, KAMEZAWA Hiroyuki 写道:

You can't know whether the 'page' is allocated by alloc_page() in fallocate()
or just found as exiting one.
Then, yourwill corrupt existing pages in error path.
Is it allowed ?


According to the comment,

/*
 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
 *
 * If we allocate a new one we do not mark it dirty. That's up to the
 * vm. If we swap it in we mark it dirty since we also free the swap
 * entry since a page cannot live in both the swap and page cache
 */

so we can know if the page is newly allocated by checking page dirty bit.
Or am I missing something?

But whoops, I sent a wrong version of this patch, the below one is
the correct one. Sorry for this.

Subject: [V4 PATCH 1/2] tmpfs: add fallocate support

Systemd needs tmpfs to support fallocate [1], to be able
to safely use mmap(), regarding SIGBUS, on files on the
/dev/shm filesystem. The glibc fallback loop for -ENOSYS
on fallocate is just ugly.

This patch adds fallocate support to tmpfs, and as we
already have shmem_truncate_range(), it is also easy to
add FALLOC_FL_PUNCH_HOLE support too.

1. http://lkml.org/lkml/2011/10/20/275

V3->V4:
Handle 'undo' ENOSPC more correctly.

V2->V3:
a) Read i_size directly after holding i_mutex;
b) Call page_cache_release() too after shmem_getpage();
c) Undo previous changes when -ENOSPC.

Cc: Pekka Enberg <penberg@xxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Dave Hansen <dave@xxxxxxxxxxxxxxxxxx>
Cc: Lennart Poettering <lennart@xxxxxxxxxxxxxx>
Cc: Kay Sievers <kay.sievers@xxxxxxxx>
Cc: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
Signed-off-by: WANG Cong <amwang@xxxxxxxxxx>

---
commit ca055ad343a0d629f8f1fad1df30796d2292f6a2
Author: Cong Wang <amwang@xxxxxxxxxx>
Date:   Wed Nov 23 13:16:26 2011 +0800

    tmpfs: add fallocate support
---
 mm/shmem.c |   90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 90 insertions(+), 0 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index d672250..6a6fc66 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
+#include <linux/falloc.h>
 
 static struct vfsmount *shm_mnt;
 
@@ -1016,6 +1017,35 @@ failed:
 	return error;
 }
 
+static void shmem_putpage_noswap(struct inode *inode, pgoff_t index, bool new)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct shmem_inode_info *info;
+	struct shmem_sb_info *sbinfo;
+	struct page *page;
+
+	page = find_lock_page(mapping, index);
+
+	if (page) {
+		info = SHMEM_I(inode);
+		sbinfo = SHMEM_SB(inode->i_sb);
+		shmem_acct_block(info->flags);
+		if (!new && PageDirty(page)) {
+			ClearPageDirty(page);
+			delete_from_page_cache(page);
+			spin_lock(&info->lock);
+			info->alloced--;
+			inode->i_blocks -= BLOCKS_PER_PAGE;
+			spin_unlock(&info->lock);
+		}
+		if (sbinfo->max_blocks)
+			percpu_counter_add(&sbinfo->used_blocks, -1);
+		shmem_unacct_blocks(info->flags, 1);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1431,6 +1461,65 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	return error;
 }
 
+static long shmem_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	pgoff_t start = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP((offset + len), PAGE_CACHE_SIZE);
+	pgoff_t index = start;
+	loff_t i_size;
+	struct page *page = NULL;
+	int ret = 0;
+
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
+	mutex_lock(&inode->i_mutex);
+	i_size = inode->i_size;
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		if (!(offset > i_size || (end << PAGE_CACHE_SHIFT) > i_size))
+			shmem_truncate_range(inode, offset,
+					     (end << PAGE_CACHE_SHIFT) - 1);
+		goto unlock;
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+		ret = inode_newsize_ok(inode, (offset + len));
+		if (ret)
+			goto unlock;
+	}
+
+	while (index < end) {
+		ret = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
+		if (ret) {
+			if (ret == -ENOSPC)
+				goto undo;
+			else
+				goto unlock;
+		}
+		if (page) {
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		index++;
+	}
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && (index << PAGE_CACHE_SHIFT) > i_size)
+		i_size_write(inode, index << PAGE_CACHE_SHIFT);
+
+	goto unlock;
+
+undo:
+	while (index > start) {
+		shmem_putpage_noswap(inode, index, true);
+		index--;
+	}
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2286,6 +2375,7 @@ static const struct file_operations shmem_file_operations = {
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
 	.splice_write	= generic_file_splice_write,
+	.fallocate	= shmem_fallocate,
 #endif
 };
 

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]