[PATCH -V4 3/6] vfs: Add open by file handle support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx>
---
 fs/filesystems.c              |   32 ++++++++-
 fs/namei.c                    |   24 ------
 fs/namespace.c                |   38 ++++++++++
 fs/open.c                     |  155 +++++++++++++++++++++++++++++++++++++++++
 fs/pnode.c                    |    2 +-
 include/linux/fs.h            |    1 +
 include/linux/mnt_namespace.h |    2 +
 include/linux/namei.h         |   24 ++++++
 8 files changed, 252 insertions(+), 26 deletions(-)

diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492..743d36e 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -281,5 +281,35 @@ struct file_system_type *get_fs_type(const char *name)
 	}
 	return fs;
 }
-
 EXPORT_SYMBOL(get_fs_type);
+
+struct super_block *fs_get_sb(struct uuid *fsid)
+{
+	struct uuid *this_fsid;
+	struct file_system_type *fs_type;
+	struct super_block *sb, *found_sb = NULL;
+
+	read_lock(&file_systems_lock);
+	fs_type = file_systems;
+	while (fs_type) {
+		spin_lock(&sb_lock);
+		list_for_each_entry(sb, &fs_type->fs_supers, s_instances) {
+			if (!sb->s_op->get_fsid)
+				continue;
+			this_fsid = sb->s_op->get_fsid(sb);
+			if (!memcmp(fsid->uuid, this_fsid->uuid,
+					sizeof(this_fsid->uuid))) {
+				/* found the matching super_block */
+				atomic_inc(&sb->s_active);
+				found_sb = sb;
+				spin_unlock(&sb_lock);
+				goto out;
+			}
+		}
+		spin_unlock(&sb_lock);
+		fs_type = fs_type->next;
+	}
+out:
+	read_unlock(&file_systems_lock);
+	return found_sb;
+}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91..a18711e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1521,30 +1521,6 @@ out_unlock:
 	return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
-static inline int open_to_namei_flags(int flag)
-{
-	if ((flag+1) & O_ACCMODE)
-		flag++;
-	return flag;
-}
-
 static int open_will_truncate(int flag, struct inode *inode)
 {
 	/*
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8a..6168526 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2364,3 +2364,41 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	kfree(ns);
 }
 EXPORT_SYMBOL(put_mnt_ns);
+
+/*
+ * Get any vfsmount mapping the superblock in the
+ * task namespace
+ */
+struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+				struct super_block *sb)
+{
+	struct nsproxy *nsp;
+	struct list_head *mount_list;
+	struct mnt_namespace *ns = NULL;
+	struct vfsmount *mnt, *sb_mnt = NULL;
+
+	rcu_read_lock();
+	nsp = task_nsproxy(task);
+	if (nsp) {
+		ns = nsp->mnt_ns;
+		if (ns)
+			get_mnt_ns(ns);
+	}
+	rcu_read_unlock();
+	if (!ns)
+		return NULL;
+	down_read(&namespace_sem);
+	list_for_each(mount_list, &ns->list) {
+		mnt = list_entry(mount_list, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb == sb) {
+			/* found the matching super block */
+			sb_mnt = mnt;
+			mntget(sb_mnt);
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+
+	put_mnt_ns(ns);
+	return sb_mnt;
+}
diff --git a/fs/open.c b/fs/open.c
index dd27cbd..ca09c2d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1301,3 +1301,158 @@ err_out:
 	asmlinkage_protect(2, ret, name, handle);
 	return ret;
 }
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry *handle_to_dentry(struct vfsmount *mnt,
+				struct file_handle *fh)
+{
+	int retval = 0;
+	int handle_size;
+	void *handle = NULL;
+	struct dentry *dentry;
+
+	if ((fh->handle_size > MAX_HANDLE_SZ) ||
+		(fh->handle_size <= 0)) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+	handle = kmalloc(fh->handle_size, GFP_KERNEL);
+	if (!handle) {
+		retval =  -ENOMEM;
+		goto err_out;
+	}
+	if (copy_from_user(handle, fh->f_handle, fh->handle_size)) {
+		retval = -EFAULT;
+		goto err_out;
+	}
+	/* change the handle size to multiple of sizeof(u32) */
+	handle_size = fh->handle_size >> 2;
+	dentry = exportfs_decode_fh(mnt, (struct fid *)handle,
+					handle_size, fh->handle_type,
+					vfs_dentry_acceptable, NULL);
+	kfree(handle);
+	return dentry;
+
+err_out:
+	kfree(handle);
+	return ERR_PTR(retval);
+}
+
+long do_sys_open_by_handle(struct file_handle *fh, int flags)
+{
+	int fd;
+	int retval = 0;
+	int d_flags  = flags;
+	struct file *filp;
+	struct vfsmount *mnt;
+	struct inode *inode;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	if (!capable(CAP_DAC_OVERRIDE))
+		return -EPERM;
+
+	sb = fs_get_sb(&fh->fsid);
+	if (!sb)
+		return -ESTALE;
+	/*
+	 * Find the vfsmount for this superblock in the
+	 * current namespace
+	 */
+	mnt = fs_get_vfsmount(current, sb);
+	if (!mnt) {
+		retval = -ESTALE;
+		goto out_sb;
+	}
+
+	dentry = handle_to_dentry(mnt, fh);
+	if (IS_ERR(dentry)) {
+		retval = PTR_ERR(dentry);
+		goto out_mnt;
+	}
+
+	inode = dentry->d_inode;
+	/* Restrict open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		retval = -EINVAL;
+		goto out_err;
+	}
+
+	flags  = open_to_namei_flags(flags);
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flags & O_TRUNC)
+		flags |= MAY_WRITE;
+
+	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
+		(flags & FMODE_WRITE) && IS_APPEND(inode)) {
+		retval = -EPERM;
+		goto out_err;
+	}
+
+	if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		retval = -EACCES;
+		goto out_err;
+	}
+
+	/* Can't write directories. */
+	if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) {
+		retval = -EISDIR;
+		goto out_err;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		retval = fd;
+		goto out_err;
+	}
+
+	filp = dentry_open(dentry, mntget(mnt),
+			d_flags, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		retval =  PTR_ERR(filp);
+		goto out_err;
+	}
+
+	if (inode->i_mode & S_IFREG) {
+		filp->f_flags |= O_NOATIME;
+		filp->f_mode |= FMODE_NOCMTIME;
+	}
+	fsnotify_open(filp->f_path.dentry);
+	fd_install(fd, filp);
+	retval = fd;
+	goto out_mnt;
+
+out_err:
+	dput(dentry);
+out_mnt:
+	mntput(mnt);
+out_sb:
+	deactivate_super(sb);
+
+	return retval;
+}
+
+SYSCALL_DEFINE2(open_by_handle, struct file_handle __user *, handle,
+		int, flags)
+{
+	long ret;
+	struct file_handle f_handle;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_open_by_handle(&f_handle, flags);
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(2, ret, handle, flags);
+	return ret;
+}
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a..9f6d12d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -6,9 +6,9 @@
  *	Author : Ram Pai (linuxram@xxxxxxxxxx)
  *
  */
+#include <linux/fs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
-#include <linux/fs.h>
 #include "internal.h"
 #include "pnode.h"
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 157ed57..2116b00 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1935,6 +1935,7 @@ extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 extern long do_sys_name_to_handle(const char __user *name,
 				struct file_handle *fh);
+extern long do_sys_open_by_handle(struct file_handle *fh, int flags);
 
 /* fs/ioctl.c */
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 0b89efc..d363ecc 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -36,6 +36,8 @@ extern const struct seq_operations mounts_op;
 extern const struct seq_operations mountinfo_op;
 extern const struct seq_operations mountstats_op;
 extern int mnt_had_events(struct proc_mounts *);
+extern struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+					struct super_block *sb);
 
 #endif
 #endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..a853aa0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -4,6 +4,7 @@
 #include <linux/dcache.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
+#include <asm-generic/fcntl.h>
 
 struct vfsmount;
 
@@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int open_to_namei_flags(int flag)
+{
+	if ((flag+1) & O_ACCMODE)
+		flag++;
+	return flag;
+}
 #endif /* _LINUX_NAMEI_H */
-- 
1.7.0.4.360.g11766c

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux