Add an ioctl to dump filesystem's metadata in memory in vfs. Userspace collects such info and uses it to do metadata readahead. Filesystem can hook to super_operations.metadata_incore to get metadata in specific approach. Next patch will give an example how to implement .metadata_incore in btrfs. Signed-off-by: Shaohua Li <shaohua.li@xxxxxxxxx> --- fs/compat_ioctl.c | 1 fs/ioctl.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 15 ++++++++++ 3 files changed, 95 insertions(+) Index: linux/fs/ioctl.c =================================================================== --- linux.orig/fs/ioctl.c 2010-12-13 14:01:52.000000000 +0800 +++ linux/fs/ioctl.c 2010-12-13 14:01:56.000000000 +0800 @@ -530,6 +530,82 @@ static int ioctl_fsthaw(struct file *fil } /* + * Copy info about metadata in memory to userspace + * Returns: + * > 0, number of metadata_incore_ent entries copied to userspace + * = 0, no more metadata + * < 0, error + */ +static int ioctl_metadata_incore(struct file *filp, void __user *argp) +{ + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; + struct metadata_incore_args args; + struct metadata_incore_ent ent; + loff_t offset, last_offset = 0; + ssize_t size, last_size = 0; + __u64 __user vec_addr; + int entries = 0; + + if (!sb->s_op->metadata_incore) + return -EOPNOTSUPP; + + if (copy_from_user(&args, (struct metadata_incore_args __user *)argp, + sizeof(args))) + return -EFAULT; + + /* Check the start address: needs to be page-aligned.. */ + if (args.offset & ~PAGE_CACHE_MASK) + return -EINVAL; + + if ((args.vec_size % sizeof(struct metadata_incore_ent)) != 0) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, args.vec_addr, args.vec_size)) + return -EFAULT; + + offset = args.offset; + + ent.unused = 0; + vec_addr = args.vec_addr; + + while (vec_addr < args.vec_addr + args.vec_size) { + if (signal_pending(current)) + return -EINTR; + cond_resched(); + + if (sb->s_op->metadata_incore(sb, &offset, &size) < 0) + break; + /* A merge or offset == 0 */ + if (offset == last_offset + last_size) { + last_size += size; + offset = offset + size; + continue; + } + ent.offset = last_offset; + ent.size = last_size; + if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent))) + return -EFAULT; + vec_addr += sizeof(ent); + entries++; + + last_offset = offset; + last_size = size; + ent.unused = 0; + offset = offset + size; + } + + if (last_size > 0 && vec_addr < args.vec_addr + args.vec_size) { + ent.offset = last_offset; + ent.size = last_size; + if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent))) + return -EFAULT; + entries++; + } + + return entries; +} + +/* * When you add any new common ioctls to the switches above and below * please update compat_sys_ioctl() too. * @@ -589,6 +665,9 @@ int do_vfs_ioctl(struct file *filp, unsi return put_user(inode->i_sb->s_blocksize, p); } + case FIMETADATA_INCORE: + return ioctl_metadata_incore(filp, argp); + default: if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); Index: linux/include/linux/fs.h =================================================================== --- linux.orig/include/linux/fs.h 2010-12-13 14:01:52.000000000 +0800 +++ linux/include/linux/fs.h 2010-12-13 14:01:56.000000000 +0800 @@ -52,6 +52,18 @@ struct inodes_stat_t { int dummy[5]; /* padding for sysctl ABI compatibility */ }; +struct metadata_incore_ent { + __u64 offset; + __u32 size; + __u32 unused; +}; + +struct metadata_incore_args { + __u64 offset; /* offset in meta address */ + __u64 __user vec_addr; /* vector's address */ + __u32 vec_size; /* vector's size */ + __u32 unused; +}; #define NR_FILE 8192 /* this can well be larger on a larger system */ @@ -325,6 +337,7 @@ struct inodes_stat_t { #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ #define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FIMETADATA_INCORE _IOWR('X', 122, struct metadata_incore_args) #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) @@ -1612,6 +1625,8 @@ struct super_operations { ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); + int (*metadata_incore)(struct super_block*, loff_t *offset, + ssize_t *size); }; /* Index: linux/fs/compat_ioctl.c =================================================================== --- linux.orig/fs/compat_ioctl.c 2010-12-13 14:02:08.000000000 +0800 +++ linux/fs/compat_ioctl.c 2010-12-13 14:03:27.000000000 +0800 @@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ) /* 'X' - originally XFS but some now in the VFS */ COMPATIBLE_IOCTL(FIFREEZE) COMPATIBLE_IOCTL(FITHAW) +COMPATIBLE_IOCTL(FIMETADATA_INCORE) COMPATIBLE_IOCTL(KDGETKEYCODE) COMPATIBLE_IOCTL(KDSETKEYCODE) COMPATIBLE_IOCTL(KDGKBTYPE) -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html