[RFC 1/5] add metadata_incore ioctl in vfs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add an ioctl to dump filesystem's metadata in memory in vfs. Userspace collects
such info and uses it to do metadata readahead.
Filesystem can hook to super_operations.metadata_incore to get metadata in
specific approach. Next patch will give an example how to implement
.metadata_incore in btrfs.

Signed-off-by: Shaohua Li <shaohua.li@xxxxxxxxx>

---
 fs/compat_ioctl.c  |    1 
 fs/ioctl.c         |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |   15 ++++++++++
 3 files changed, 95 insertions(+)

Index: linux/fs/ioctl.c
===================================================================
--- linux.orig/fs/ioctl.c	2010-12-13 14:01:52.000000000 +0800
+++ linux/fs/ioctl.c	2010-12-13 14:01:56.000000000 +0800
@@ -530,6 +530,82 @@ static int ioctl_fsthaw(struct file *fil
 }
 
 /*
+ * Copy info about metadata in memory to userspace
+ * Returns:
+ * > 0, number of metadata_incore_ent entries copied to userspace
+ * = 0, no more metadata
+ * < 0, error
+ */
+static int ioctl_metadata_incore(struct file *filp, void __user *argp)
+{
+	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct metadata_incore_args args;
+	struct metadata_incore_ent ent;
+	loff_t offset, last_offset = 0;
+	ssize_t size, last_size = 0;
+	__u64 __user vec_addr;
+	int entries = 0;
+
+	if (!sb->s_op->metadata_incore)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&args, (struct metadata_incore_args __user *)argp,
+			sizeof(args)))
+		return -EFAULT;
+
+	/* Check the start address: needs to be page-aligned.. */
+	if (args.offset & ~PAGE_CACHE_MASK)
+		return -EINVAL;
+
+	if ((args.vec_size % sizeof(struct metadata_incore_ent)) != 0)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, args.vec_addr, args.vec_size))
+		return -EFAULT;
+
+	offset = args.offset;
+
+	ent.unused = 0;
+	vec_addr = args.vec_addr;
+
+	while (vec_addr < args.vec_addr + args.vec_size) {
+		if (signal_pending(current))
+			return -EINTR;
+		cond_resched();
+
+		if (sb->s_op->metadata_incore(sb, &offset, &size) < 0)
+			break;
+		/* A merge or offset == 0 */
+		if (offset == last_offset + last_size) {
+			last_size += size;
+			offset = offset + size;
+			continue;
+		}
+		ent.offset = last_offset;
+		ent.size = last_size;
+		if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent)))
+			return -EFAULT;
+		vec_addr += sizeof(ent);
+		entries++;
+
+		last_offset = offset;
+		last_size = size;
+		ent.unused = 0;
+		offset = offset + size;
+	}
+
+	if (last_size > 0 && vec_addr < args.vec_addr + args.vec_size) {
+		ent.offset = last_offset;
+		ent.size = last_size;
+		if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent)))
+			return -EFAULT;
+		entries++;
+	}
+
+	return entries;
+}
+
+/*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
@@ -589,6 +665,9 @@ int do_vfs_ioctl(struct file *filp, unsi
 		return put_user(inode->i_sb->s_blocksize, p);
 	}
 
+	case FIMETADATA_INCORE:
+		return ioctl_metadata_incore(filp, argp);
+
 	default:
 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
 			error = file_ioctl(filp, cmd, arg);
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h	2010-12-13 14:01:52.000000000 +0800
+++ linux/include/linux/fs.h	2010-12-13 14:01:56.000000000 +0800
@@ -52,6 +52,18 @@ struct inodes_stat_t {
 	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
 
+struct metadata_incore_ent {
+	__u64 offset;
+	__u32 size;
+	__u32 unused;
+};
+
+struct metadata_incore_args {
+	__u64 offset; /* offset in meta address */
+	__u64 __user vec_addr; /* vector's address */
+	__u32 vec_size; /* vector's size */
+	__u32 unused;
+};
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
@@ -325,6 +337,7 @@ struct inodes_stat_t {
 #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FIMETADATA_INCORE _IOWR('X', 122, struct metadata_incore_args)
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -1612,6 +1625,8 @@ struct super_operations {
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+	int (*metadata_incore)(struct super_block*, loff_t *offset,
+		ssize_t *size);
 };
 
 /*
Index: linux/fs/compat_ioctl.c
===================================================================
--- linux.orig/fs/compat_ioctl.c	2010-12-13 14:02:08.000000000 +0800
+++ linux/fs/compat_ioctl.c	2010-12-13 14:03:27.000000000 +0800
@@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
 /* 'X' - originally XFS but some now in the VFS */
 COMPATIBLE_IOCTL(FIFREEZE)
 COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FIMETADATA_INCORE)
 COMPATIBLE_IOCTL(KDGETKEYCODE)
 COMPATIBLE_IOCTL(KDSETKEYCODE)
 COMPATIBLE_IOCTL(KDGKBTYPE)


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux