Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> --- fs/filesystems.c | 32 ++++++++- fs/namei.c | 24 ------ fs/namespace.c | 38 ++++++++++ fs/open.c | 155 +++++++++++++++++++++++++++++++++++++++++ fs/pnode.c | 2 +- include/linux/fs.h | 1 + include/linux/mnt_namespace.h | 2 + include/linux/namei.h | 24 ++++++ 8 files changed, 252 insertions(+), 26 deletions(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 68ba492..743d36e 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -281,5 +281,35 @@ struct file_system_type *get_fs_type(const char *name) } return fs; } - EXPORT_SYMBOL(get_fs_type); + +struct super_block *fs_get_sb(struct uuid *fsid) +{ + struct uuid *this_fsid; + struct file_system_type *fs_type; + struct super_block *sb, *found_sb = NULL; + + read_lock(&file_systems_lock); + fs_type = file_systems; + while (fs_type) { + spin_lock(&sb_lock); + list_for_each_entry(sb, &fs_type->fs_supers, s_instances) { + if (!sb->s_op->get_fsid) + continue; + this_fsid = sb->s_op->get_fsid(sb); + if (!memcmp(fsid->uuid, this_fsid->uuid, + sizeof(this_fsid->uuid))) { + /* found the matching super_block */ + atomic_inc(&sb->s_active); + found_sb = sb; + spin_unlock(&sb_lock); + goto out; + } + } + spin_unlock(&sb_lock); + fs_type = fs_type->next; + } +out: + read_unlock(&file_systems_lock); + return found_sb; +} diff --git a/fs/namei.c b/fs/namei.c index a7dce91..a18711e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1521,30 +1521,6 @@ out_unlock: return may_open(&nd->path, 0, open_flag & ~O_TRUNC); } -/* - * Note that while the flag value (low two bits) for sys_open means: - * 00 - read-only - * 01 - write-only - * 10 - read-write - * 11 - special - * it is changed into - * 00 - no permissions needed - * 01 - read-permission - * 10 - write-permission - * 11 - read-write - * for the internal routines (ie open_namei()/follow_link() etc) - * This is more logical, and also allows the 00 "no perm needed" - * to be used for symlinks (where the permissions are checked - * later). - * -*/ -static inline int open_to_namei_flags(int flag) -{ - if ((flag+1) & O_ACCMODE) - flag++; - return flag; -} - static int open_will_truncate(int flag, struct inode *inode) { /* diff --git a/fs/namespace.c b/fs/namespace.c index 8174c8a..6168526 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2364,3 +2364,41 @@ void put_mnt_ns(struct mnt_namespace *ns) kfree(ns); } EXPORT_SYMBOL(put_mnt_ns); + +/* + * Get any vfsmount mapping the superblock in the + * task namespace + */ +struct vfsmount *fs_get_vfsmount(struct task_struct *task, + struct super_block *sb) +{ + struct nsproxy *nsp; + struct list_head *mount_list; + struct mnt_namespace *ns = NULL; + struct vfsmount *mnt, *sb_mnt = NULL; + + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + ns = nsp->mnt_ns; + if (ns) + get_mnt_ns(ns); + } + rcu_read_unlock(); + if (!ns) + return NULL; + down_read(&namespace_sem); + list_for_each(mount_list, &ns->list) { + mnt = list_entry(mount_list, struct vfsmount, mnt_list); + if (mnt->mnt_sb == sb) { + /* found the matching super block */ + sb_mnt = mnt; + mntget(sb_mnt); + break; + } + } + up_read(&namespace_sem); + + put_mnt_ns(ns); + return sb_mnt; +} diff --git a/fs/open.c b/fs/open.c index dd27cbd..ca09c2d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1301,3 +1301,158 @@ err_out: asmlinkage_protect(2, ret, name, handle); return ret; } + +static int vfs_dentry_acceptable(void *context, struct dentry *dentry) +{ + return 1; +} + +static struct dentry *handle_to_dentry(struct vfsmount *mnt, + struct file_handle *fh) +{ + int retval = 0; + int handle_size; + void *handle = NULL; + struct dentry *dentry; + + if ((fh->handle_size > MAX_HANDLE_SZ) || + (fh->handle_size <= 0)) { + retval = -EINVAL; + goto err_out; + } + handle = kmalloc(fh->handle_size, GFP_KERNEL); + if (!handle) { + retval = -ENOMEM; + goto err_out; + } + if (copy_from_user(handle, fh->f_handle, fh->handle_size)) { + retval = -EFAULT; + goto err_out; + } + /* change the handle size to multiple of sizeof(u32) */ + handle_size = fh->handle_size >> 2; + dentry = exportfs_decode_fh(mnt, (struct fid *)handle, + handle_size, fh->handle_type, + vfs_dentry_acceptable, NULL); + kfree(handle); + return dentry; + +err_out: + kfree(handle); + return ERR_PTR(retval); +} + +long do_sys_open_by_handle(struct file_handle *fh, int flags) +{ + int fd; + int retval = 0; + int d_flags = flags; + struct file *filp; + struct vfsmount *mnt; + struct inode *inode; + struct dentry *dentry; + struct super_block *sb; + + if (!capable(CAP_DAC_OVERRIDE)) + return -EPERM; + + sb = fs_get_sb(&fh->fsid); + if (!sb) + return -ESTALE; + /* + * Find the vfsmount for this superblock in the + * current namespace + */ + mnt = fs_get_vfsmount(current, sb); + if (!mnt) { + retval = -ESTALE; + goto out_sb; + } + + dentry = handle_to_dentry(mnt, fh); + if (IS_ERR(dentry)) { + retval = PTR_ERR(dentry); + goto out_mnt; + } + + inode = dentry->d_inode; + /* Restrict open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + retval = -EINVAL; + goto out_err; + } + + flags = open_to_namei_flags(flags); + /* O_TRUNC implies we need access checks for write permissions */ + if (flags & O_TRUNC) + flags |= MAY_WRITE; + + if ((!(flags & O_APPEND) || (flags & O_TRUNC)) && + (flags & FMODE_WRITE) && IS_APPEND(inode)) { + retval = -EPERM; + goto out_err; + } + + if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + retval = -EACCES; + goto out_err; + } + + /* Can't write directories. */ + if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) { + retval = -EISDIR; + goto out_err; + } + + fd = get_unused_fd(); + if (fd < 0) { + retval = fd; + goto out_err; + } + + filp = dentry_open(dentry, mntget(mnt), + d_flags, current_cred()); + if (IS_ERR(filp)) { + put_unused_fd(fd); + retval = PTR_ERR(filp); + goto out_err; + } + + if (inode->i_mode & S_IFREG) { + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NOCMTIME; + } + fsnotify_open(filp->f_path.dentry); + fd_install(fd, filp); + retval = fd; + goto out_mnt; + +out_err: + dput(dentry); +out_mnt: + mntput(mnt); +out_sb: + deactivate_super(sb); + + return retval; +} + +SYSCALL_DEFINE2(open_by_handle, struct file_handle __user *, handle, + int, flags) +{ + long ret; + struct file_handle f_handle; + + if (force_o_largefile()) + flags |= O_LARGEFILE; + + if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) { + ret = -EFAULT; + goto err_out; + } + ret = do_sys_open_by_handle(&f_handle, flags); +err_out: + /* avoid REGPARM breakage on x86: */ + asmlinkage_protect(2, ret, handle, flags); + return ret; +} diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a..9f6d12d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -6,9 +6,9 @@ * Author : Ram Pai (linuxram@xxxxxxxxxx) * */ +#include <linux/fs.h> #include <linux/mnt_namespace.h> #include <linux/mount.h> -#include <linux/fs.h> #include "internal.h" #include "pnode.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index 157ed57..2116b00 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1935,6 +1935,7 @@ extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); extern long do_sys_name_to_handle(const char __user *name, struct file_handle *fh); +extern long do_sys_open_by_handle(struct file_handle *fh, int flags); /* fs/ioctl.c */ diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 0b89efc..d363ecc 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -36,6 +36,8 @@ extern const struct seq_operations mounts_op; extern const struct seq_operations mountinfo_op; extern const struct seq_operations mountstats_op; extern int mnt_had_events(struct proc_mounts *); +extern struct vfsmount *fs_get_vfsmount(struct task_struct *task, + struct super_block *sb); #endif #endif diff --git a/include/linux/namei.h b/include/linux/namei.h index 05b441d..a853aa0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -4,6 +4,7 @@ #include <linux/dcache.h> #include <linux/linkage.h> #include <linux/path.h> +#include <asm-generic/fcntl.h> struct vfsmount; @@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) ((char *) name)[min(len, maxlen)] = '\0'; } +/* + * Note that while the flag value (low two bits) for sys_open means: + * 00 - read-only + * 01 - write-only + * 10 - read-write + * 11 - special + * it is changed into + * 00 - no permissions needed + * 01 - read-permission + * 10 - write-permission + * 11 - read-write + * for the internal routines (ie open_namei()/follow_link() etc) + * This is more logical, and also allows the 00 "no perm needed" + * to be used for symlinks (where the permissions are checked + * later). + * +*/ +static inline int open_to_namei_flags(int flag) +{ + if ((flag+1) & O_ACCMODE) + flag++; + return flag; +} #endif /* _LINUX_NAMEI_H */ -- 1.7.0.4.360.g11766c -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html