Hello Nicholas, What do you think about this patch? Thanks, Andrei On Wed, Mar 21, 2018 at 11:55:02PM -0700, Andrei Vagin wrote: > There are two advantages: > * Direct I/O allows to avoid the write-back cache, so it reduces > affects to other processes in the system. > * Async I/O allows to handle a few commands concurrently. > > DIO + AIO shows a better perfomance for random write operations: > > Mode: O_DSYNC Async: 1 > $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepth=64 --name=/dev/sda --runtime=20 --numjobs=2 > WRITE: bw=45.9MiB/s (48.1MB/s), 21.9MiB/s-23.0MiB/s (22.0MB/s-25.2MB/s), io=919MiB (963MB), run=20002-20020msec > > Mode: O_DSYNC Async: 0 > $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepth=64 --name=/dev/sdb --runtime=20 --numjobs=2 > WRITE: bw=1607KiB/s (1645kB/s), 802KiB/s-805KiB/s (821kB/s-824kB/s), io=31.8MiB (33.4MB), run=20280-20295msec > > Known issue: > > DIF (PI) emulation doesn't work when a target uses async I/O, because > DIF metadata is saved in a separate file, and it is another non-trivial > task how to synchronize writing in two files, so that a following read > operation always returns a consisten metadata for a specified block. > > v2: fix comments from Christoph Hellwig > > Cc: "Nicholas A. Bellinger" <nab@xxxxxxxxxxxxxxx> > Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx> > Cc: Bryant G. Ly <bryantly@xxxxxxxxxxxxxxxxxx> > Tested-by: Bryant G. Ly <bryantly@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Andrei Vagin <avagin@xxxxxxxxxx> > --- > drivers/target/target_core_file.c | 137 ++++++++++++++++++++++++++++++++++---- > drivers/target/target_core_file.h | 1 + > 2 files changed, 124 insertions(+), 14 deletions(-) > > diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c > index 9b2c0c773022..16751ae55d7b 100644 > --- a/drivers/target/target_core_file.c > +++ b/drivers/target/target_core_file.c > @@ -250,6 +250,84 @@ static void fd_destroy_device(struct se_device *dev) > } > } > > +struct target_core_file_cmd { > + unsigned long len; > + struct se_cmd *cmd; > + struct kiocb iocb; > +}; > + > +static void cmd_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) > +{ > + struct target_core_file_cmd *cmd; > + > + cmd = container_of(iocb, struct target_core_file_cmd, iocb); > + > + if (ret != cmd->len) > + target_complete_cmd(cmd->cmd, SAM_STAT_CHECK_CONDITION); > + else > + target_complete_cmd(cmd->cmd, SAM_STAT_GOOD); > + > + kfree(cmd); > +} > + > +static sense_reason_t > +fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > + enum dma_data_direction data_direction) > +{ > + int is_write = !(data_direction == DMA_FROM_DEVICE); > + struct se_device *dev = cmd->se_dev; > + struct fd_dev *fd_dev = FD_DEV(dev); > + struct file *file = fd_dev->fd_file; > + struct target_core_file_cmd *aio_cmd; > + struct iov_iter iter = {}; > + struct scatterlist *sg; > + struct bio_vec *bvec; > + ssize_t len = 0; > + int ret = 0, i; > + > + aio_cmd = kmalloc(sizeof(struct target_core_file_cmd), GFP_KERNEL); > + if (!aio_cmd) > + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; > + > + bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL); > + if (!bvec) { > + kfree(aio_cmd); > + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; > + } > + > + for_each_sg(sgl, sg, sgl_nents, i) { > + bvec[i].bv_page = sg_page(sg); > + bvec[i].bv_len = sg->length; > + bvec[i].bv_offset = sg->offset; > + > + len += sg->length; > + } > + > + iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len); > + > + aio_cmd->cmd = cmd; > + aio_cmd->len = len; > + aio_cmd->iocb.ki_pos = cmd->t_task_lba * dev->dev_attrib.block_size; > + aio_cmd->iocb.ki_filp = file; > + aio_cmd->iocb.ki_complete = cmd_rw_aio_complete; > + aio_cmd->iocb.ki_flags = IOCB_DIRECT; > + > + if (is_write && (cmd->se_cmd_flags & SCF_FUA)) > + aio_cmd->iocb.ki_flags |= IOCB_DSYNC; > + > + if (is_write) > + ret = call_write_iter(file, &aio_cmd->iocb, &iter); > + else > + ret = call_read_iter(file, &aio_cmd->iocb, &iter); > + > + kfree(bvec); > + > + if (ret != -EIOCBQUEUED) > + cmd_rw_aio_complete(&aio_cmd->iocb, ret, 0); > + > + return 0; > +} > + > static int fd_do_rw(struct se_cmd *cmd, struct file *fd, > u32 block_size, struct scatterlist *sgl, > u32 sgl_nents, u32 data_length, int is_write) > @@ -527,7 +605,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) > } > > static sense_reason_t > -fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > +fd_execute_rw_buffered(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > enum dma_data_direction data_direction) > { > struct se_device *dev = cmd->se_dev; > @@ -537,16 +615,6 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > sense_reason_t rc; > int ret = 0; > /* > - * We are currently limited by the number of iovecs (2048) per > - * single vfs_[writev,readv] call. > - */ > - if (cmd->data_length > FD_MAX_BYTES) { > - pr_err("FILEIO: Not able to process I/O of %u bytes due to" > - "FD_MAX_BYTES: %u iovec count limitation\n", > - cmd->data_length, FD_MAX_BYTES); > - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; > - } > - /* > * Call vectorized fileio functions to map struct scatterlist > * physical memory addresses to struct iovec virtual memory. > */ > @@ -620,14 +688,39 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > return 0; > } > > +static sense_reason_t > +fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, > + enum dma_data_direction data_direction) > +{ > + struct se_device *dev = cmd->se_dev; > + struct fd_dev *fd_dev = FD_DEV(dev); > + > + /* > + * We are currently limited by the number of iovecs (2048) per > + * single vfs_[writev,readv] call. > + */ > + if (cmd->data_length > FD_MAX_BYTES) { > + pr_err("FILEIO: Not able to process I/O of %u bytes due to" > + "FD_MAX_BYTES: %u iovec count limitation\n", > + cmd->data_length, FD_MAX_BYTES); > + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; > + } > + > + if (fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO) > + return fd_execute_rw_aio(cmd, sgl, sgl_nents, data_direction); > + return fd_execute_rw_buffered(cmd, sgl, sgl_nents, data_direction); > +} > + > enum { > - Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io, Opt_err > + Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io, > + Opt_fd_async_io, Opt_err > }; > > static match_table_t tokens = { > {Opt_fd_dev_name, "fd_dev_name=%s"}, > {Opt_fd_dev_size, "fd_dev_size=%s"}, > {Opt_fd_buffered_io, "fd_buffered_io=%d"}, > + {Opt_fd_async_io, "fd_async_io=%d"}, > {Opt_err, NULL} > }; > > @@ -693,6 +786,21 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev, > > fd_dev->fbd_flags |= FDBD_HAS_BUFFERED_IO_WCE; > break; > + case Opt_fd_async_io: > + ret = match_int(args, &arg); > + if (ret) > + goto out; > + if (arg != 1) { > + pr_err("bogus fd_async_io=%d value\n", arg); > + ret = -EINVAL; > + goto out; > + } > + > + pr_debug("FILEIO: Using async I/O" > + " operations for struct fd_dev\n"); > + > + fd_dev->fbd_flags |= FDBD_HAS_ASYNC_IO; > + break; > default: > break; > } > @@ -709,10 +817,11 @@ static ssize_t fd_show_configfs_dev_params(struct se_device *dev, char *b) > ssize_t bl = 0; > > bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); > - bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", > + bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s Async: %d\n", > fd_dev->fd_dev_name, fd_dev->fd_dev_size, > (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) ? > - "Buffered-WCE" : "O_DSYNC"); > + "Buffered-WCE" : "O_DSYNC", > + !!(fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO)); > return bl; > } > > diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h > index 53be5ffd3261..929b1ecd544e 100644 > --- a/drivers/target/target_core_file.h > +++ b/drivers/target/target_core_file.h > @@ -22,6 +22,7 @@ > #define FBDF_HAS_PATH 0x01 > #define FBDF_HAS_SIZE 0x02 > #define FDBD_HAS_BUFFERED_IO_WCE 0x04 > +#define FDBD_HAS_ASYNC_IO 0x08 > #define FDBD_FORMAT_UNIT_SIZE 2048 > > struct fd_dev { > -- > 2.13.6 > -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html