On Thu, Dec 21, 2023 at 12:27:12AM -0800, Christoph Hellwig wrote: > On Thu, Dec 21, 2023 at 04:08:45AM +0100, Ahelenia Ziemiańska wrote: > > Otherwise we risk sleeping with the pipe locked for indeterminate > You can't just assume that any ->read_iter support IOCB_NOWAIT. Let's see. zero_fops drivers/char/mem.c: .splice_read = copy_splice_read, full_fops drivers/char/mem.c: .splice_read = copy_splice_read, random_fops drivers/char/random.c: .splice_read = copy_splice_read, random_read_iter checks urandom_fops drivers/char/random.c: .splice_read = copy_splice_read, urandom_read_iter returns instantly if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host)) fs/splice.c: return copy_splice_read(in, ppos, pipe, len, flags); FMODE_CAN_ODIRECT is set by filesystems and blockdevs, so trusted fs/9p/vfs_file.c: return copy_splice_read(in, ppos, pipe, len, flags); fs/ceph/file.c: return copy_splice_read(in, ppos, pipe, len, flags); fs/ceph/file.c: return copy_splice_read(in, ppos, pipe, len, flags); fs/gfs2/file.c: .splice_read = copy_splice_read, fs/gfs2/file.c: .splice_read = copy_splice_read, fs/kernfs/file.c: .splice_read = copy_splice_read, fs/smb/client/cifsfs.c: .splice_read = copy_splice_read, fs/smb/client/cifsfs.c: .splice_read = copy_splice_read, fs/proc/inode.c: .splice_read = copy_splice_read, fs/proc/inode.c: .splice_read = copy_splice_read, fs/proc/proc_sysctl.c: .splice_read = copy_splice_read, fs/proc_namespace.c: .splice_read = copy_splice_read, fs/proc_namespace.c: .splice_read = copy_splice_read, fs/proc_namespace.c: .splice_read = copy_splice_read, filesystems => trusted tracing_fops kernel/trace/trace.c: .splice_read = copy_splice_read, used in /sys/kernel/debug/tracing/per_cpu/cpu*/trace and /sys/kernel/debug/tracing/trace which are seq_read_iter and even if they did block, it's in tracefs so same logic as tracing_buffers_splice_read applies. net/socket.c: return copy_splice_read(file, ppos, pipe, len, flags); this is the default implementation for protocols without explicit splice_reads, and sock_read_iter translates IOCB_NOWAIT into MSG_DONTAIT. So I think I can, because the ~three implementations that we want to constrain do support it. If anything, this hints to me that to yield a more consistent API that doesn't arbitrarily distinguish between O_DIRECT files with and without IOCB_NOWAIT support, something to the effect of the following diff may be used. diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 11cd8d23f6f2..dc42837ee0af 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -392,7 +392,7 @@ static ssize_t v9fs_file_splice_read(struct file *in, loff_t *ppos, fid->fid, len, *ppos); if (fid->mode & P9L_DIRECT) - return copy_splice_read(in, ppos, pipe, len, flags); + return copy_splice_read_sleepok(in, ppos, pipe, len, flags); return filemap_splice_read(in, ppos, pipe, len, flags); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3b5aae29e944..9a4679013135 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2209,7 +2209,7 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, if (ceph_has_inline_data(ci) || (fi->flags & CEPH_F_SYNC)) - return copy_splice_read(in, ppos, pipe, len, flags); + return copy_splice_read_sleepok(in, ppos, pipe, len, flags); ceph_start_io_read(inode); @@ -2228,7 +2228,7 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, ceph_put_cap_refs(ci, got); ceph_end_io_read(inode); - return copy_splice_read(in, ppos, pipe, len, flags); + return copy_splice_read_sleepok(in, ppos, pipe, len, flags); } dout("splice_read %p %llx.%llx %llu~%zu got cap refs on %s\n", diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4b66efc1a82a..5b0cbb6b95c4 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1581,7 +1581,7 @@ const struct file_operations gfs2_file_fops = { .fsync = gfs2_fsync, .lock = gfs2_lock, .flock = gfs2_flock, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .splice_write = gfs2_file_splice_write, .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, @@ -1612,7 +1612,7 @@ const struct file_operations gfs2_file_fops_nolock = { .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index f0cb729e9a97..f0b6e85b2c5b 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -989,7 +989,7 @@ const struct file_operations kernfs_file_fops = { .release = kernfs_fop_release, .poll = kernfs_fop_poll, .fsync = noop_fsync, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .splice_write = iter_file_splice_write, }; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b33e490e3fd9..7ec2f4653299 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -588,7 +588,7 @@ static const struct file_operations proc_iter_file_ops = { .llseek = proc_reg_llseek, .read_iter = proc_reg_read_iter, .write = proc_reg_write, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .mmap = proc_reg_mmap, @@ -614,7 +614,7 @@ static const struct file_operations proc_reg_file_ops_compat = { static const struct file_operations proc_iter_file_ops_compat = { .llseek = proc_reg_llseek, .read_iter = proc_reg_read_iter, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8064ea76f80b..11d26fd14e7d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -864,7 +864,7 @@ static const struct file_operations proc_sys_file_operations = { .poll = proc_sys_poll, .read_iter = proc_sys_read, .write_iter = proc_sys_write, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .splice_write = iter_file_splice_write, .llseek = default_llseek, }; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 250eb5bf7b52..e9d19a856dd7 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -324,7 +324,7 @@ static int mountstats_open(struct inode *inode, struct file *file) const struct file_operations proc_mounts_operations = { .open = mounts_open, .read_iter = seq_read_iter, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -333,7 +333,7 @@ const struct file_operations proc_mounts_operations = { const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, .read_iter = seq_read_iter, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -342,7 +342,7 @@ const struct file_operations proc_mountinfo_operations = { const struct file_operations proc_mountstats_operations = { .open = mountstats_open, .read_iter = seq_read_iter, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .llseek = seq_lseek, .release = mounts_release, }; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 2131638f26d0..5f9fb3ce3bcb 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1561,7 +1561,7 @@ const struct file_operations cifs_file_direct_ops = { .fsync = cifs_fsync, .flush = cifs_flush, .mmap = cifs_file_mmap, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, @@ -1615,7 +1615,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .fsync = cifs_fsync, .flush = cifs_flush, .mmap = cifs_file_mmap, - .splice_read = copy_splice_read, + .splice_read = copy_splice_read_sleepok, .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, diff --git a/fs/splice.c b/fs/splice.c index 2871c6f9366f..90ebcf236c05 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -298,12 +298,14 @@ void splice_shrink_spd(struct splice_pipe_desc *spd) } /** - * copy_splice_read - Copy data from a file and splice the copy into a pipe + * __copy_splice_read - Copy data from a file and splice the copy into a pipe * @in: The file to read from * @ppos: Pointer to the file position to read from * @pipe: The pipe to splice into * @len: The amount to splice * @flags: The SPLICE_F_* flags + * @sleepok: Set if splicing from a trusted filesystem, + * don't set if splicing from an IPC mechanism * * This function allocates a bunch of pages sufficient to hold the requested * amount of data (but limited by the remaining pipe capacity), passes it to @@ -317,10 +319,11 @@ void splice_shrink_spd(struct splice_pipe_desc *spd) * if the pipe has insufficient space, we reach the end of the data or we hit a * hole. */ -ssize_t copy_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, unsigned int flags) +static ssize_t __copy_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags, bool sleepok) { + printk("__copy_splice_read(%d)\n", sleepok); struct iov_iter to; struct bio_vec *bv; struct kiocb kiocb; @@ -361,7 +364,8 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, iov_iter_bvec(&to, ITER_DEST, bv, npages, len); init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; - kiocb.ki_flags |= IOCB_NOWAIT; + if (!sleepok) + kiocb.ki_flags |= IOCB_NOWAIT; ret = call_read_iter(in, &kiocb, &to); if (ret > 0) { @@ -399,8 +403,21 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, kfree(bv); return ret; } + +ssize_t copy_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) { + return __copy_splice_read(in, ppos, pipe, len, flags, false); +} EXPORT_SYMBOL(copy_splice_read); +ssize_t copy_splice_read_sleepok(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) { + return __copy_splice_read(in, ppos, pipe, len, flags, true); +} +EXPORT_SYMBOL(copy_splice_read_sleepok); + const struct pipe_buf_operations default_pipe_buf_ops = { .release = generic_pipe_buf_release, .try_steal = generic_pipe_buf_try_steal, @@ -988,7 +1005,7 @@ long vfs_splice_read(struct file *in, loff_t *ppos, * buffer, copy into it and splice that into the pipe. */ if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host)) - return copy_splice_read(in, ppos, pipe, len, flags); + return copy_splice_read_sleepok(in, ppos, pipe, len, flags); return in->f_op->splice_read(in, ppos, pipe, len, flags); } EXPORT_SYMBOL_GPL(vfs_splice_read); diff --git a/include/linux/fs.h b/include/linux/fs.h index 98b7a7a8c42e..0980bf6ba8fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2989,6 +2989,9 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, ssize_t copy_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +ssize_t copy_splice_read_sleepok(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
Attachment:
signature.asc
Description: PGP signature