On Wed, Jan 28, 2015 at 11:54:21PM +0100, Christoph Hellwig wrote: > Back in the days when the direct I/O ->end_io callback could be called > from interrupt context for AIO we needed a structure to hand off to the > workqueue, and reused the ioend structure for this purpose. These days > ->end_io is always called from user or workqueue context, which allows us > to avoid this memory allocation and simplify the code significantly. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- Looks mostly Ok to me. In fact, with xfs_finish_ioend_sync() calling xfs_end_io() directly, I don't see how we currently get into the wq at all. Anyways, a few notes... > fs/xfs/xfs_aops.c | 133 ++++++++++++++++++++++++------------------------------ > fs/xfs/xfs_aops.h | 3 -- > 2 files changed, 59 insertions(+), 77 deletions(-) > > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index 18e2f3b..31d3d7d 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > @@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( > */ > STATIC int > xfs_setfilesize( > - struct xfs_ioend *ioend) > + struct xfs_inode *ip, > + struct xfs_trans *tp, > + xfs_off_t offset, > + size_t size) > { > - struct xfs_inode *ip = XFS_I(ioend->io_inode); > - struct xfs_trans *tp = ioend->io_append_trans; > xfs_fsize_t isize; > > - /* > - * The transaction may have been allocated in the I/O submission thread, > - * thus we need to mark ourselves as beeing in a transaction manually. > - * Similarly for freeze protection. > - */ > - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); > - rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], > - 0, 1, _THIS_IP_); > - > xfs_ilock(ip, XFS_ILOCK_EXCL); > - isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); > + isize = xfs_new_eof(ip, offset + size); > if (!isize) { > xfs_iunlock(ip, XFS_ILOCK_EXCL); > xfs_trans_cancel(tp, 0); > return 0; > } > > - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); > + trace_xfs_setfilesize(ip, offset, size); > > ip->i_d.di_size = isize; > xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); > @@ -167,6 +159,25 @@ xfs_setfilesize( > return xfs_trans_commit(tp, 0); > } > > +STATIC int > +xfs_setfilesize_ioend( > + struct xfs_ioend *ioend) > +{ > + struct xfs_inode *ip = XFS_I(ioend->io_inode); > + struct xfs_trans *tp = ioend->io_append_trans; > + > + /* > + * The transaction may have been allocated in the I/O submission thread, > + * thus we need to mark ourselves as beeing in a transaction manually. Copied from the the original, but since we're here: "being." > + * Similarly for freeze protection. > + */ > + current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); > + rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], > + 0, 1, _THIS_IP_); > + > + return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); > +} > + > /* > * Schedule IO completion handling on the final put of an ioend. > * > @@ -182,8 +193,7 @@ xfs_finish_ioend( > > if (ioend->io_type == XFS_IO_UNWRITTEN) > queue_work(mp->m_unwritten_workqueue, &ioend->io_work); > - else if (ioend->io_append_trans || > - (ioend->io_isdirect && xfs_ioend_is_append(ioend))) > + else if (ioend->io_append_trans) > queue_work(mp->m_data_workqueue, &ioend->io_work); > else > xfs_destroy_ioend(ioend); > @@ -215,22 +225,8 @@ xfs_end_io( > if (ioend->io_type == XFS_IO_UNWRITTEN) { > error = xfs_iomap_write_unwritten(ip, ioend->io_offset, > ioend->io_size); > - } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { > - /* > - * For direct I/O we do not know if we need to allocate blocks > - * or not so we can't preallocate an append transaction as that > - * results in nested reservations and log space deadlocks. Hence > - * allocate the transaction here. While this is sub-optimal and > - * can block IO completion for some time, we're stuck with doing > - * it this way until we can pass the ioend to the direct IO > - * allocation callbacks and avoid nesting that way. > - */ > - error = xfs_setfilesize_trans_alloc(ioend); > - if (error) > - goto done; > - error = xfs_setfilesize(ioend); > } else if (ioend->io_append_trans) { > - error = xfs_setfilesize(ioend); > + error = xfs_setfilesize_ioend(ioend); > } else { > ASSERT(!xfs_ioend_is_append(ioend)); > } > @@ -273,7 +269,6 @@ xfs_alloc_ioend( > * all the I/O from calling the completion routine too early. > */ > atomic_set(&ioend->io_remaining, 1); > - ioend->io_isdirect = 0; > ioend->io_error = 0; > ioend->io_list = NULL; > ioend->io_type = type; > @@ -1459,11 +1454,7 @@ xfs_get_blocks_direct( > * > * If the private argument is non-NULL __xfs_get_blocks signals us that we > * need to issue a transaction to convert the range from unwritten to written > - * extents. In case this is regular synchronous I/O we just call xfs_end_io > - * to do this and we are done. But in case this was a successful AIO > - * request this handler is called from interrupt context, from which we > - * can't start transactions. In that case offload the I/O completion to > - * the workqueues we also use for buffered I/O completion. > + * extents. > */ > STATIC void > xfs_end_io_direct_write( > @@ -1472,7 +1463,12 @@ xfs_end_io_direct_write( > ssize_t size, > void *private) > { > - struct xfs_ioend *ioend = iocb->private; > + struct inode *inode = file_inode(iocb->ki_filp); > + struct xfs_inode *ip = XFS_I(inode); > + struct xfs_mount *mp = ip->i_mount; > + > + if (XFS_FORCED_SHUTDOWN(mp)) > + return; > > /* > * While the generic direct I/O code updates the inode size, it does > @@ -1480,22 +1476,33 @@ xfs_end_io_direct_write( > * end_io handler thinks the on-disk size is outside the in-core > * size. To prevent this just update it a little bit earlier here. > */ > - if (offset + size > i_size_read(ioend->io_inode)) > - i_size_write(ioend->io_inode, offset + size); > + if (offset + size > i_size_read(inode)) > + i_size_write(inode, offset + size); > > /* > - * blockdev_direct_IO can return an error even after the I/O > - * completion handler was called. Thus we need to protect > - * against double-freeing. > + * For direct I/O we do not know if we need to allocate blocks or not, > + * so we can't preallocate an append transaction, as that results in > + * nested reservations and log space deadlocks. Hence allocate the > + * transaction here. While this is sub-optimal and can block IO > + * completion for some time, we're stuck with doing it this way until > + * we can pass the ioend to the direct IO allocation callbacks and > + * avoid nesting that way. > */ > - iocb->private = NULL; > - > - ioend->io_offset = offset; > - ioend->io_size = size; > - if (private && size > 0) > - ioend->io_type = XFS_IO_UNWRITTEN; > + if (private && size > 0) { > + xfs_iomap_write_unwritten(ip, offset, size); > + } else if (offset + size > ip->i_d.di_size) { > + struct xfs_trans *tp; > + int error; > + > + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); > + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); > + if (error) { > + xfs_trans_cancel(tp, 0); > + return; > + } > > - xfs_finish_ioend_sync(ioend); I get an unused function warning for xfs_finish_ioend_sync() now, so I guess we should kill that. > + xfs_setfilesize(ip, tp, offset, size); > + } > } > > STATIC ssize_t > @@ -1507,39 +1514,17 @@ xfs_vm_direct_IO( > { > struct inode *inode = iocb->ki_filp->f_mapping->host; > struct block_device *bdev = xfs_find_bdev_for_inode(inode); > - struct xfs_ioend *ioend = NULL; > - ssize_t ret; > > if (rw & WRITE) { A nit, but I guess you could kill the braces here now too. Brian > - size_t size = iov_iter_count(iter); > - > - /* > - * We cannot preallocate a size update transaction here as we > - * don't know whether allocation is necessary or not. Hence we > - * can only tell IO completion that one is necessary if we are > - * not doing unwritten extent conversion. > - */ > - iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); > - if (offset + size > XFS_I(inode)->i_d.di_size) > - ioend->io_isdirect = 1; > - > - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, > + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, > offset, xfs_get_blocks_direct, > xfs_end_io_direct_write, NULL, > DIO_ASYNC_EXTEND); > - if (ret != -EIOCBQUEUED && iocb->private) > - goto out_destroy_ioend; > } else { > - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, > + return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, > offset, xfs_get_blocks_direct, > NULL, NULL, 0); > } > - > - return ret; > - > -out_destroy_ioend: > - xfs_destroy_ioend(ioend); > - return ret; > } > > /* > diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h > index f94dd45..ac644e0 100644 > --- a/fs/xfs/xfs_aops.h > +++ b/fs/xfs/xfs_aops.h > @@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool; > * Types of I/O for bmap clustering and I/O completion tracking. > */ > enum { > - XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ > XFS_IO_DELALLOC, /* covers delalloc region */ > XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ > XFS_IO_OVERWRITE, /* covers already allocated extent */ > }; > > #define XFS_IO_TYPES \ > - { 0, "" }, \ > { XFS_IO_DELALLOC, "delalloc" }, \ > { XFS_IO_UNWRITTEN, "unwritten" }, \ > { XFS_IO_OVERWRITE, "overwrite" } > @@ -45,7 +43,6 @@ typedef struct xfs_ioend { > unsigned int io_type; /* delalloc / unwritten */ > int io_error; /* I/O error code */ > atomic_t io_remaining; /* hold count */ > - unsigned int io_isdirect : 1;/* direct I/O */ > struct inode *io_inode; /* file being written to */ > struct buffer_head *io_buffer_head;/* buffer linked list head */ > struct buffer_head *io_buffer_tail;/* buffer linked list tail */ > -- > 1.9.1 > > _______________________________________________ > xfs mailing list > xfs@xxxxxxxxxxx > http://oss.sgi.com/mailman/listinfo/xfs _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs