This ensures that VFS implementations don't call ptrace_may_access() from VFS read or write handlers. In order for file descriptor passing to have its intended security properties, VFS read/write handlers must not do any kind of privilege checking. Signed-off-by: Jann Horn <jann@xxxxxxxxx> --- fs/aio.c | 2 ++ fs/read_write.c | 12 ++++++++++++ fs/splice.c | 12 ++++++++++-- include/linux/sched.h | 4 ++++ kernel/ptrace.c | 7 +++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index fb8e45b..c941885 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1454,9 +1454,11 @@ rw_common: if (rw == WRITE) file_start_write(file); + current->in_unprivileged_vfs++; ret = iter_op(req, &iter); + current->in_unprivileged_vfs--; if (rw == WRITE) file_end_write(file); kfree(iovec); diff --git a/fs/read_write.c b/fs/read_write.c index 66215a7..deddb93 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -472,7 +472,9 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) if (!ret) { if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; + current->in_unprivileged_vfs++; ret = __vfs_read(file, buf, count, pos); + current->in_unprivileged_vfs--; if (ret > 0) { fsnotify_access(file); add_rchar(current, ret); @@ -557,7 +559,9 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (count > MAX_RW_COUNT) count = MAX_RW_COUNT; file_start_write(file); + current->in_unprivileged_vfs++; ret = __vfs_write(file, buf, count, pos); + current->in_unprivileged_vfs--; if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); @@ -838,12 +842,14 @@ static ssize_t do_readv_writev(int type, struct file *file, iter_fn = file->f_op->write_iter; file_start_write(file); } + current->in_unprivileged_vfs++; if (iter_fn) ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); else ret = do_loop_readv_writev(file, &iter, pos, fn, flags); + current->in_unprivileged_vfs--; if (type != READ) file_end_write(file); @@ -1063,12 +1069,14 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, iter_fn = file->f_op->write_iter; file_start_write(file); } + current->in_unprivileged_vfs++; if (iter_fn) ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); else ret = do_loop_readv_writev(file, &iter, pos, fn, flags); + current->in_unprivileged_vfs--; if (type != READ) file_end_write(file); @@ -1369,7 +1377,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, fl = SPLICE_F_NONBLOCK; #endif file_start_write(out.file); + current->in_unprivileged_vfs++; retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); + current->in_unprivileged_vfs--; file_end_write(out.file); if (retval > 0) { @@ -1512,6 +1522,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, ret = mnt_want_write_file(file_out); if (ret) return ret; + current->in_unprivileged_vfs++; ret = -EOPNOTSUPP; if (file_out->f_op->copy_file_range) @@ -1521,6 +1532,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); + current->in_unprivileged_vfs--; if (ret > 0) { fsnotify_access(file_in); add_rchar(current, ret); diff --git a/fs/splice.c b/fs/splice.c index dd9bf7e..5fcad56 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1134,7 +1134,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, { ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); - int ret; + long ret; if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; @@ -1151,7 +1151,11 @@ static long do_splice_to(struct file *in, loff_t *ppos, else splice_read = default_file_splice_read; - return splice_read(in, ppos, pipe, len, flags); + current->in_unprivileged_vfs++; + ret = splice_read(in, ppos, pipe, len, flags); + current->in_unprivileged_vfs--; + + return ret; } /** @@ -1334,7 +1338,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, if (unlikely(ret < 0)) return ret; + current->in_unprivileged_vfs++; ret = splice_direct_to_actor(in, &sd, direct_splice_actor); + current->in_unprivileged_vfs--; if (ret > 0) *ppos = sd.pos; @@ -1401,7 +1407,9 @@ static long do_splice(struct file *in, loff_t __user *off_in, return ret; file_start_write(out); + current->in_unprivileged_vfs++; ret = do_splice_from(ipipe, out, &offset, len, flags); + current->in_unprivileged_vfs--; file_end_write(out); if (!off_out) diff --git a/include/linux/sched.h b/include/linux/sched.h index e4bf894..6094720 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1473,6 +1473,10 @@ struct task_struct { atomic_t usage; unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; + /* depth of VFS read/write; non-zero values let certain privilege checks + * fail with a warning + */ + unsigned int in_unprivileged_vfs; #ifdef CONFIG_SMP struct llist_node wake_entry; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b311ca5..d839919 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -302,6 +302,13 @@ ok: bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; + + /* If you have to check for ptrace access from a VFS method, use + * ptrace_may_access_noncurrent() instead. + */ + if (WARN_ON(current->in_unprivileged_vfs != 0)) + return false; + task_lock(task); err = __ptrace_may_access(task, mode, current_cred(), ¤t->self_privunit_id); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html