On Tue, Feb 07, 2012 at 01:10:41PM -0500, Christoph Hellwig wrote: > Do not use unlogged metadata updates and the VFS dirty bit for updating > the file size after writeback. In addition to causing various problems > with updates getting delayed for far too log this also drags in the long > unscalable VFS dirty tracking, and is one of the few remaining unlogged > metadata updates. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > --- .... > @@ -173,18 +214,26 @@ xfs_end_io( > * range to normal written extens after the data I/O has finished. > */ > if (ioend->io_type == IO_UNWRITTEN) { > + if (ioend->io_append_trans) { > + ASSERT(ioend->io_isdirect); > + > + current_set_flags_nested( > + &ioend->io_append_trans->t_pflags, PF_FSTRANS); > + xfs_trans_cancel(ioend->io_append_trans, 0); > + } > + > error = xfs_iomap_write_unwritten(ip, ioend->io_offset, > ioend->io_size); > if (error) { > ioend->io_error = -error; > goto done; > } > + } else if (ioend->io_append_trans) { > + error = xfs_setfilesize(ioend); > + if (error) > + ioend->io_error = error; That looks like it should be: ioend->io_error = -error; > } else { > - /* > - * We might have to update the on-disk file size after > - * extending writes. > - */ > - xfs_setfilesize(ioend); > + ASSERT(!xfs_ioend_is_append(ioend)); > } > > done: > @@ -224,6 +273,7 @@ xfs_alloc_ioend( > */ > atomic_set(&ioend->io_remaining, 1); > ioend->io_isasync = 0; > + ioend->io_isdirect = 0; > ioend->io_error = 0; > ioend->io_list = NULL; > ioend->io_type = type; > @@ -234,6 +284,7 @@ xfs_alloc_ioend( > ioend->io_size = 0; > ioend->io_iocb = NULL; > ioend->io_result = 0; > + ioend->io_append_trans = NULL; This is starting to look like it should memset the ioend to zero after allocation rather than all these individual "initialise to zero" lines. > > INIT_WORK(&ioend->io_work, xfs_end_io); > return ioend; > @@ -341,18 +392,9 @@ xfs_submit_ioend_bio( > xfs_ioend_t *ioend, > struct bio *bio) > { > - struct xfs_inode *ip = XFS_I(ioend->io_inode); > atomic_inc(&ioend->io_remaining); > bio->bi_private = ioend; > bio->bi_end_io = xfs_end_bio; > - > - /* > - * If the I/O is beyond EOF we mark the inode dirty immediately > - * but don't update the inode size until I/O completion. > - */ > - if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size)) > - xfs_mark_inode_dirty(ip); > - > submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); > } > > @@ -1014,8 +1056,20 @@ xfs_vm_writepage( > wbc, end_index); > } > > - if (iohead) > + if (iohead) { > + /* > + * Reserve log space if we might write beyond the on-disk > + * inode size. > + */ > + if (ioend->io_type != IO_UNWRITTEN && > + xfs_ioend_is_append(ioend)) { > + err = xfs_setfilesize_trans_alloc(ioend); > + if (err) > + goto error; > + } > + > xfs_submit_ioend(wbc, iohead); > + } > > return 0; > > @@ -1295,17 +1349,26 @@ xfs_vm_direct_IO( > { > struct inode *inode = iocb->ki_filp->f_mapping->host; > struct block_device *bdev = xfs_find_bdev_for_inode(inode); > + struct xfs_ioend *ioend = NULL; > ssize_t ret; > > if (rw & WRITE) { > - iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); > + size_t size = iov_length(iov, nr_segs); > + > + iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT); > + if (offset + size > XFS_I(inode)->i_d.di_size) { > + ret = xfs_setfilesize_trans_alloc(ioend); > + if (ret) > + goto destroy_ioend; > + ioend->io_isdirect = 1; > + } This is a bit messy, but necessary to handle racing IOs and unwritten extent conversion.. Can you add a comment explining why it is necessary so we don't forget in future? -- Dave Chinner david@xxxxxxxxxxxxx _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs