[PATCH v3] fat: editions to support fat_fallocate

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>

Implement preallocation via the fallocate syscall on VFAT partitions.

Change Log:
v3: Release preallocated blocks at file release.

With FALLOC_FL_KEEP_SIZE, there is no way to distinguish if the mismatch
between i_size and no. of clusters allocated is a consequence of
fallocate or just plain corruption. When a non fallocate aware (old)
linux fat driver tries to write to such a file, it throws an error.
Also, fsck detects this as inconsistency and truncates the prealloc'd blocks.

To avoid this, as suggested by OGAWA, remove changes that make fallocate
persistent across mounts and restrict lifetime of blocks from
fallocate(2) to file release.

v2: On an area preallocated with FALLOC_FL_KEEP_SIZE, when a seek was
done to an offset beyond i_size, the old (garbage) data was exposed as
we did not zero out the area at allocation time. Added
fat_zero_falloc_area() to fix this.

v1: Reworked an earlier patch of the same name
(https://lkml.org/lkml/2007/12/22/130) to fix some bugs:
i)Preallocated space was not persistent and was lost on remount. Fixed
it.
ii)Did not zero out allocated clusters when FALLOC_FL_KEEP_SIZE was set,
thereby speeding up preallocation time.

Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
Signed-off-by: Ravishankar N <ravi.n1@xxxxxxxxxxx>
---
 fs/fat/file.c  |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fat/inode.c |   47 ++++++++++++++++++++++++++++++++
 2 files changed, 128 insertions(+)

diff --git a/fs/fat/file.c b/fs/fat/file.c
index b0b632e..76df547 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,11 @@
 #include <linux/blkdev.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
+#include <linux/falloc.h>
 #include "fat.h"
 
+static long fat_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len);
 static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 {
 	u32 attr;
@@ -140,6 +143,12 @@ static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd,
 
 static int fat_file_release(struct inode *inode, struct file *filp)
 {
+	struct super_block *sb = inode->i_sb;
+	loff_t mmu_private_ideal = (inode->i_size + (sb->s_blocksize-1)) &
+				    ~(sb->s_blocksize-1);
+	if (mmu_private_ideal < MSDOS_I(inode)->mmu_private &&
+	    filp->f_dentry->d_count == 1)
+		fat_truncate_blocks(inode, inode->i_size);
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
@@ -174,6 +183,7 @@ const struct file_operations fat_file_operations = {
 #endif
 	.fsync		= fat_file_fsync,
 	.splice_read	= generic_file_splice_read,
+	.fallocate      = fat_fallocate,
 };
 
 static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -211,7 +221,78 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
 out:
 	return err;
 }
+/*
+ * preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset.If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out.Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len)
+{
+	int err = 0;
+	struct inode *inode = file->f_mapping->host;
+	int cluster, nr_cluster, fclus, dclus, free_bytes, nr_bytes;
+	struct super_block *sb = inode->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+
+	/* No support for hole punch or other fallocate flags. */
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	if ((offset + len) <= MSDOS_I(inode)->mmu_private) {
+		fat_msg(sb, KERN_ERR,
+			"fat_fallocate():Blocks already allocated");
+		return -EINVAL;
+	}
 
+	if ((mode & FALLOC_FL_KEEP_SIZE)) {
+		/* First compute the number of clusters to be allocated */
+		if (inode->i_size > 0) {
+			err = fat_get_cluster(inode, FAT_ENT_EOF,
+					      &fclus, &dclus);
+			if (err < 0) {
+				fat_msg(sb, KERN_ERR,
+					"fat_fallocate():fat_get_cluster() error");
+				return err;
+			}
+			free_bytes = ((fclus+1) << sbi->cluster_bits)-
+				     (inode->i_size);
+			nr_bytes = (offset + len - inode->i_size) - free_bytes;
+		} else
+			nr_bytes = (offset + len - inode->i_size);
+		nr_cluster = (nr_bytes + (sbi->cluster_size - 1)) >>
+			     sbi->cluster_bits;
+		mutex_lock(&inode->i_mutex);
+		/* Start the allocation.We are not zeroing out the clusters */
+		while (nr_cluster-- > 0) {
+			err = fat_alloc_clusters(inode, &cluster, 1);
+			if (err) {
+				fat_msg(sb, KERN_ERR,
+					"fat_fallocate():fat_alloc_clusters() error");
+				goto error;
+			}
+			err = fat_chain_add(inode, cluster, 1);
+			if (err) {
+				fat_free_clusters(inode, cluster);
+				goto error;
+			}
+			MSDOS_I(inode)->mmu_private += sbi->cluster_size;
+		}
+	} else {
+		mutex_lock(&inode->i_mutex);
+		/* This is just an expanding truncate */
+		err = fat_cont_expand(inode, (offset + len));
+		if (err) {
+			fat_msg(sb, KERN_ERR,
+				"fat_fallocate():fat_cont_expand() error");
+		}
+	}
+error:
+	mutex_unlock(&inode->i_mutex);
+	return err;
+}
 /* Free all clusters after the skip'th cluster. */
 static int fat_free(struct inode *inode, int skip)
 {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index dfce656..ddf2969 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -152,11 +152,58 @@ static void fat_write_failed(struct address_space *mapping, loff_t to)
 	}
 }
 
+static int fat_zero_falloc_area(struct file *file,
+				struct address_space *mapping, loff_t pos)
+{
+	struct page *page;
+	struct inode *inode = mapping->host;
+	loff_t curpos = inode->i_size;
+	size_t count = pos-curpos;
+	int err;
+	do {
+		unsigned offset, bytes;
+		void *fsdata;
+
+		offset = (curpos & (PAGE_CACHE_SIZE - 1));
+		bytes = PAGE_CACHE_SIZE - offset;
+		if (bytes > count)
+			bytes = count;
+
+		err = pagecache_write_begin(NULL, mapping, curpos, bytes,
+					AOP_FLAG_UNINTERRUPTIBLE,
+					&page, &fsdata);
+		if (err)
+			break;
+
+		zero_user(page, offset, bytes);
+
+		err = pagecache_write_end(NULL, mapping, curpos, bytes, bytes,
+					page, fsdata);
+		WARN_ON(err <= 0);
+		curpos += bytes;
+		count -= bytes;
+		err = 0;
+	} while (count);
+
+	return -err;
+}
+
 static int fat_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
 	int err;
+	struct inode *inode = mapping->host;
+	struct super_block *sb = inode->i_sb;
+	loff_t mmu_private_actual = MSDOS_I(inode)->mmu_private;
+	loff_t mmu_private_ideal = (inode->i_size + (sb->s_blocksize-1)) &
+					 ~(sb->s_blocksize-1);
+
+	if ((mmu_private_actual > mmu_private_ideal) && (pos > inode->i_size)) {
+		err = fat_zero_falloc_area(file, mapping, pos);
+		if (err)
+			fat_msg(sb, KERN_ERR, "error zeroing fallocated area");
+	}
 
 	*pagep = NULL;
 	err = cont_write_begin(file, mapping, pos, len, flags,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux