Acked-by: Serge Hallyn <serue@xxxxxxxxxx> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> --- fs/filesystems.c | 33 +++++++++- fs/namei.c | 24 ------- fs/namespace.c | 38 +++++++++++ fs/open.c | 142 +++++++++++++++++++++++++++++++++++++++++ fs/pnode.c | 2 +- include/linux/fs.h | 1 + include/linux/mnt_namespace.h | 2 + include/linux/namei.h | 24 +++++++ 8 files changed, 240 insertions(+), 26 deletions(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 68ba492..a424691 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -281,5 +281,36 @@ struct file_system_type *get_fs_type(const char *name) } return fs; } - EXPORT_SYMBOL(get_fs_type); + +struct super_block *fs_get_sb(struct uuid *fsid) +{ + int error; + struct uuid this_fsid; + struct file_system_type *fs_type; + struct super_block *sb, *found_sb = NULL; + + read_lock(&file_systems_lock); + for (fs_type = file_systems; fs_type; fs_type = fs_type->next) { + spin_lock(&sb_lock); + list_for_each_entry(sb, &fs_type->fs_supers, s_instances) { + if (!sb->s_op->get_fsid) + continue; + error = sb->s_op->get_fsid(sb, &this_fsid); + if (error) + continue; + if (!memcmp(fsid->uuid, this_fsid.uuid, + sizeof(this_fsid.uuid))) { + /* found the matching super_block */ + atomic_inc(&sb->s_active); + found_sb = sb; + spin_unlock(&sb_lock); + goto out; + } + } + spin_unlock(&sb_lock); + } +out: + read_unlock(&file_systems_lock); + return found_sb; +} diff --git a/fs/namei.c b/fs/namei.c index a7dce91..a18711e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1521,30 +1521,6 @@ out_unlock: return may_open(&nd->path, 0, open_flag & ~O_TRUNC); } -/* - * Note that while the flag value (low two bits) for sys_open means: - * 00 - read-only - * 01 - write-only - * 10 - read-write - * 11 - special - * it is changed into - * 00 - no permissions needed - * 01 - read-permission - * 10 - write-permission - * 11 - read-write - * for the internal routines (ie open_namei()/follow_link() etc) - * This is more logical, and also allows the 00 "no perm needed" - * to be used for symlinks (where the permissions are checked - * later). - * -*/ -static inline int open_to_namei_flags(int flag) -{ - if ((flag+1) & O_ACCMODE) - flag++; - return flag; -} - static int open_will_truncate(int flag, struct inode *inode) { /* diff --git a/fs/namespace.c b/fs/namespace.c index 8174c8a..6168526 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2364,3 +2364,41 @@ void put_mnt_ns(struct mnt_namespace *ns) kfree(ns); } EXPORT_SYMBOL(put_mnt_ns); + +/* + * Get any vfsmount mapping the superblock in the + * task namespace + */ +struct vfsmount *fs_get_vfsmount(struct task_struct *task, + struct super_block *sb) +{ + struct nsproxy *nsp; + struct list_head *mount_list; + struct mnt_namespace *ns = NULL; + struct vfsmount *mnt, *sb_mnt = NULL; + + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + ns = nsp->mnt_ns; + if (ns) + get_mnt_ns(ns); + } + rcu_read_unlock(); + if (!ns) + return NULL; + down_read(&namespace_sem); + list_for_each(mount_list, &ns->list) { + mnt = list_entry(mount_list, struct vfsmount, mnt_list); + if (mnt->mnt_sb == sb) { + /* found the matching super block */ + sb_mnt = mnt; + mntget(sb_mnt); + break; + } + } + up_read(&namespace_sem); + + put_mnt_ns(ns); + return sb_mnt; +} diff --git a/fs/open.c b/fs/open.c index 5d0f87b..e0a0cb1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1296,3 +1296,145 @@ err_out: asmlinkage_protect(4, ret, dfd, name, handle, flag); return ret; } + +static int vfs_dentry_acceptable(void *context, struct dentry *dentry) +{ + return 1; +} + +static struct dentry *handle_to_dentry(struct vfsmount *mnt, + struct file_handle *handle) +{ + int handle_size; + struct dentry *dentry; + + /* change the handle size to multiple of sizeof(u32) */ + handle_size = handle->handle_size >> 2; + dentry = exportfs_decode_fh(mnt, (struct fid *)handle->f_handle, + handle_size, handle->handle_type, + vfs_dentry_acceptable, NULL); + return dentry; +} + +static long do_sys_open_by_handle(struct file_handle __user *ufh, int flags) +{ + int fd; + int retval = 0; + int d_flags = flags; + struct file *filp; + struct vfsmount *mnt; + struct inode *inode; + struct dentry *dentry; + struct super_block *sb; + struct file_handle f_handle; + struct file_handle *handle = NULL; + + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { + retval = -EFAULT; + goto out_err; + } + if ((f_handle.handle_size > MAX_HANDLE_SZ) || + (f_handle.handle_size <= 0)) { + retval = -EINVAL; + goto out_err; + } + if (!capable(CAP_DAC_OVERRIDE)) { + retval = -EPERM; + goto out_err; + } + sb = fs_get_sb(&f_handle.fsid); + if (!sb) + return -ESTALE; + /* + * Find the vfsmount for this superblock in the + * current namespace + */ + mnt = fs_get_vfsmount(current, sb); + if (!mnt) { + retval = -ESTALE; + goto out_sb; + } + + handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_size, + GFP_KERNEL); + if (!handle) { + retval = -ENOMEM; + goto out_mnt; + } + /* copy the full handle */ + if (copy_from_user(handle, ufh, + sizeof(struct file_handle) + + f_handle.handle_size)) { + retval = -EFAULT; + goto out_mnt; + } + dentry = handle_to_dentry(mnt, handle); + if (IS_ERR(dentry)) { + retval = PTR_ERR(dentry); + goto out_mnt; + } + inode = dentry->d_inode; + flags = open_to_namei_flags(flags); + /* O_TRUNC implies we need access checks for write permissions */ + if (flags & O_TRUNC) + flags |= MAY_WRITE; + + if ((!(flags & O_APPEND) || (flags & O_TRUNC)) && + (flags & FMODE_WRITE) && IS_APPEND(inode)) { + retval = -EPERM; + goto out_dentry; + } + if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + retval = -EACCES; + goto out_dentry; + } + /* Can't write directories. */ + if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) { + retval = -EISDIR; + goto out_dentry; + } + fd = get_unused_fd_flags(d_flags); + if (fd < 0) { + retval = fd; + goto out_dentry; + } + filp = dentry_open(dget(dentry), mntget(mnt), + d_flags, current_cred()); + if (IS_ERR(filp)) { + put_unused_fd(fd); + retval = PTR_ERR(filp); + goto out_dentry; + } + if (inode->i_mode & S_IFREG) { + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NOCMTIME; + } + fsnotify_open(filp->f_path.dentry); + fd_install(fd, filp); + retval = fd; + +out_dentry: + dput(dentry); +out_mnt: + kfree(handle); + mntput(mnt); +out_sb: + deactivate_super(sb); +out_err: + return retval; +} + +SYSCALL_DEFINE2(open_by_handle, struct file_handle __user *, handle, + int, flags) +{ + long ret; + + if (force_o_largefile()) + flags |= O_LARGEFILE; + + ret = do_sys_open_by_handle(handle, flags); + + /* avoid REGPARM breakage on x86: */ + asmlinkage_protect(2, ret, handle, flags); + return ret; +} diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a..9f6d12d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -6,9 +6,9 @@ * Author : Ram Pai (linuxram@xxxxxxxxxx) * */ +#include <linux/fs.h> #include <linux/mnt_namespace.h> #include <linux/mount.h> -#include <linux/fs.h> #include "internal.h" #include "pnode.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index 055734c..da6d297 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2344,6 +2344,7 @@ extern struct super_block *get_super(struct block_device *); extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); +extern struct super_block *fs_get_sb(struct uuid *fsid); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 0b89efc..d363ecc 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -36,6 +36,8 @@ extern const struct seq_operations mounts_op; extern const struct seq_operations mountinfo_op; extern const struct seq_operations mountstats_op; extern int mnt_had_events(struct proc_mounts *); +extern struct vfsmount *fs_get_vfsmount(struct task_struct *task, + struct super_block *sb); #endif #endif diff --git a/include/linux/namei.h b/include/linux/namei.h index 05b441d..a853aa0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -4,6 +4,7 @@ #include <linux/dcache.h> #include <linux/linkage.h> #include <linux/path.h> +#include <asm-generic/fcntl.h> struct vfsmount; @@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) ((char *) name)[min(len, maxlen)] = '\0'; } +/* + * Note that while the flag value (low two bits) for sys_open means: + * 00 - read-only + * 01 - write-only + * 10 - read-write + * 11 - special + * it is changed into + * 00 - no permissions needed + * 01 - read-permission + * 10 - write-permission + * 11 - read-write + * for the internal routines (ie open_namei()/follow_link() etc) + * This is more logical, and also allows the 00 "no perm needed" + * to be used for symlinks (where the permissions are checked + * later). + * +*/ +static inline int open_to_namei_flags(int flag) +{ + if ((flag+1) & O_ACCMODE) + flag++; + return flag; +} #endif /* _LINUX_NAMEI_H */ -- 1.7.0.4.360.g11766c -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html