For direct block device opened with O_DIRECT, use copy_file_range to issue device copy offload, and fallback to generic_copy_file_range incase device copy offload capability is absent. Modify checks to allow bdevs to use copy_file_range. Suggested-by: Ming Lei <ming.lei@xxxxxxxxxx> Signed-off-by: Anuj Gupta <anuj20.g@xxxxxxxxxxx> Signed-off-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> --- block/blk-lib.c | 23 +++++++++++++++++++++++ block/fops.c | 20 ++++++++++++++++++++ fs/read_write.c | 11 +++++++++-- include/linux/blkdev.h | 3 +++ mm/filemap.c | 11 ++++++++--- 5 files changed, 63 insertions(+), 5 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index ba32545eb8d5..7d6ef85692a6 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -523,6 +523,29 @@ int blkdev_issue_copy(struct block_device *bdev_in, loff_t pos_in, } EXPORT_SYMBOL_GPL(blkdev_issue_copy); +/* Returns the length of bytes copied */ +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, size_t len, + gfp_t gfp_mask) +{ + struct request_queue *in_q = bdev_get_queue(bdev_in); + struct request_queue *out_q = bdev_get_queue(bdev_out); + int ret = 0; + + if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len)) + return 0; + + if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) { + ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out, + len, NULL, NULL, gfp_mask); + if (ret < 0) + return 0; + } + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_copy_offload); + static int __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) diff --git a/block/fops.c b/block/fops.c index ab750e8a040f..df8985675ed1 100644 --- a/block/fops.c +++ b/block/fops.c @@ -614,6 +614,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); + int comp_len = 0; + + if ((file_in->f_iocb_flags & IOCB_DIRECT) && + (file_out->f_iocb_flags & IOCB_DIRECT)) + comp_len = blkdev_copy_offload(in_bdev, pos_in, out_bdev, + pos_out, len, GFP_KERNEL); + if (comp_len != len) + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, + file_out, pos_out + comp_len, len - comp_len, flags); + + return comp_len; +} + #define BLKDEV_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) @@ -697,6 +716,7 @@ const struct file_operations def_blk_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .copy_file_range = blkdev_copy_file_range, }; static __init int blkdev_init(void) diff --git a/fs/read_write.c b/fs/read_write.c index a21ba3be7dbe..47e848fcfd42 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -20,6 +20,7 @@ #include <linux/compat.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/blkdev.h> #include "internal.h" #include <linux/uaccess.h> @@ -1447,7 +1448,11 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, return -EOVERFLOW; /* Shorten the copy to EOF */ - size_in = i_size_read(inode_in); + if (S_ISBLK(inode_in->i_mode)) + size_in = bdev_nr_bytes(I_BDEV(file_in->f_mapping->host)); + else + size_in = i_size_read(inode_in); + if (pos_in >= size_in) count = 0; else @@ -1708,7 +1713,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) /* Don't copy dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) return -EINVAL; if (!(file_in->f_mode & FMODE_READ) || diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a95c26faa8b6..a9bb7e3a8c79 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1054,6 +1054,9 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, int blkdev_issue_copy(struct block_device *bdev_in, loff_t pos_in, struct block_device *bdev_out, loff_t pos_out, size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask); +int blkdev_copy_offload(struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, size_t len, + gfp_t gfp_mask); struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask); void bio_map_kern_endio(struct bio *bio); diff --git a/mm/filemap.c b/mm/filemap.c index 570bc8c3db87..289f0c8229ec 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -48,6 +48,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "internal.h" +#include <linux/blkdev.h> #define CREATE_TRACE_POINTS #include <trace/events/filemap.h> @@ -2855,7 +2856,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, { struct folio_batch fbatch; struct kiocb iocb; - size_t total_spliced = 0, used, npages; + size_t total_spliced = 0, used, npages, size_in; loff_t isize, end_offset; bool writably_mapped; int i, error = 0; @@ -2863,6 +2864,10 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, init_sync_kiocb(&iocb, in); iocb.ki_pos = *ppos; + if (S_ISBLK(file_inode(in)->i_mode)) + size_in = bdev_nr_bytes(I_BDEV(in->f_mapping->host)); + else + size_in = i_size_read(file_inode(in)); /* Work out how much data we can actually add into the pipe */ used = pipe_occupancy(pipe->head, pipe->tail); npages = max_t(ssize_t, pipe->max_usage - used, 0); @@ -2873,7 +2878,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, do { cond_resched(); - if (*ppos >= i_size_read(file_inode(in))) + if (*ppos >= size_in) break; iocb.ki_pos = *ppos; @@ -2889,7 +2894,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, * part of the page is not copied back to userspace (unless * another truncate extends the file - this is desired though). */ - isize = i_size_read(file_inode(in)); + isize = size_in; if (unlikely(*ppos >= isize)) break; end_offset = min_t(loff_t, isize, *ppos + len); -- 2.35.1.500.gb896f729e2