On Tue, 2 Mar 2010 13:18:41 -0500, "Theodore Ts'o" <tytso@xxxxxxx> wrote: > Renaming the dio block allocation flags, variables, and functions > introduced in Mingming's "Direct IO for holes and fallocate" > patches so that they can be used by ext4 buffer write as well. > Also changed the related function comments accordingly to cover > both direct write and buffer wirte cases. > > Signed-off-by: Jiaying Zhang <jiayingz@xxxxxxxxxx> > Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx> > --- > fs/ext4/ext4.h | 18 ++++++------ > fs/ext4/extents.c | 24 +++++++------- > fs/ext4/fsync.c | 2 +- > fs/ext4/inode.c | 84 ++++++++++++++++++++++++----------------------------- > fs/ext4/super.c | 2 +- > 5 files changed, 61 insertions(+), 69 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 74664ca..c831a58 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -133,7 +133,7 @@ struct mpage_da_data { > int pages_written; > int retval; > }; > -#define DIO_AIO_UNWRITTEN 0x1 > +#define EXT4_IO_UNWRITTEN 0x1 > typedef struct ext4_io_end { > struct list_head list; /* per-file finished AIO list */ > struct inode *inode; /* file being written to */ > @@ -355,13 +355,13 @@ struct ext4_new_group_data { > /* caller is from the direct IO path, request to creation of an > unitialized extents if not allocated, split the uninitialized > extent if blocks has been preallocated already*/ > -#define EXT4_GET_BLOCKS_DIO 0x0008 > +#define EXT4_GET_BLOCKS_PRE_IO 0x0008 > #define EXT4_GET_BLOCKS_CONVERT 0x0010 > -#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ > +#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ > EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) > - /* Convert extent to initialized after direct IO complete */ > -#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ > - EXT4_GET_BLOCKS_DIO_CREATE_EXT) > + /* Convert extent to initialized after IO complete */ > +#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ > + EXT4_GET_BLOCKS_IO_CREATE_EXT) > Last time reviewed we said this flags need to documented further. I also had a question regarding the last flag. The related message id is message-id:87y6jw23yn.fsf@xxxxxxxxxxxxxxxxxx > /* > * Flags used by ext4_free_blocks > @@ -700,8 +700,8 @@ struct ext4_inode_info { > qsize_t i_reserved_quota; > #endif > > - /* completed async DIOs that might need unwritten extents handling */ > - struct list_head i_aio_dio_complete_list; > + /* completed IOs that might need unwritten extents handling */ > + struct list_head i_completed_io_list; > /* current io_end structure for async DIO write*/ > ext4_io_end_t *cur_aio_dio; > > @@ -1461,7 +1461,7 @@ extern int ext4_block_truncate_page(handle_t *handle, > struct address_space *mapping, loff_t from); > extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); > extern qsize_t *ext4_get_reserved_space(struct inode *inode); > -extern int flush_aio_dio_completed_IO(struct inode *inode); > +extern int flush_completed_IO(struct inode *inode); > extern void ext4_da_update_reserve_space(struct inode *inode, > int used, int quota_claim); > /* ioctl.c */ > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index e0e2009..f6ea4a2 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -1619,7 +1619,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, > BUG_ON(path[depth].p_hdr == NULL); > > /* try to insert block into found extent and return */ > - if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) > + if (ex && (flag != EXT4_GET_BLOCKS_PRE_IO) > && ext4_can_extents_be_merged(inode, ex, newext)) { > ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", > ext4_ext_is_uninitialized(newext), > @@ -1740,7 +1740,7 @@ has_space: > > merge: > /* try to merge extents to the right */ > - if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) > + if (flag != EXT4_GET_BLOCKS_PRE_IO) > ext4_ext_try_to_merge(inode, path, nearex); > > /* try to merge extents to the left */ > @@ -2984,7 +2984,7 @@ fix_extent_len: > ext4_ext_dirty(handle, inode, path + depth); > return err; > } > -static int ext4_convert_unwritten_extents_dio(handle_t *handle, > +static int ext4_convert_unwritten_extents_endio(handle_t *handle, > struct inode *inode, > struct ext4_ext_path *path) > { > @@ -3064,8 +3064,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, > flags, allocated); > ext4_ext_show_leaf(inode, path); > > - /* DIO get_block() before submit the IO, split the extent */ > - if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { > + /* get_block() before submit the IO, split the extent */ > + if (flags == EXT4_GET_BLOCKS_PRE_IO) { > ret = ext4_split_unwritten_extents(handle, > inode, path, iblock, > max_blocks, flags); > @@ -3075,14 +3075,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, > * completed > */ > if (io) > - io->flag = DIO_AIO_UNWRITTEN; > + io->flag = EXT4_IO_UNWRITTEN; > else > ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); > goto out; > } > - /* async DIO end_io complete, convert the filled extent to written */ > - if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { > - ret = ext4_convert_unwritten_extents_dio(handle, inode, > + /* IO end_io complete, convert the filled extent to written */ > + if (flags == EXT4_GET_BLOCKS_CONVERT) { > + ret = ext4_convert_unwritten_extents_endio(handle, inode, > path); > if (ret >= 0) > ext4_update_inode_fsync_trans(handle, inode, 1); > @@ -3359,9 +3359,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, > * For non asycn direct IO case, flag the inode state > * that we need to perform convertion when IO is done. > */ > - if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { > + if (flags == EXT4_GET_BLOCKS_PRE_IO) { > if (io) > - io->flag = DIO_AIO_UNWRITTEN; > + io->flag = EXT4_IO_UNWRITTEN; > else > ext4_set_inode_state(inode, > EXT4_STATE_DIO_UNWRITTEN); > @@ -3656,7 +3656,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, > map_bh.b_state = 0; > ret = ext4_get_blocks(handle, inode, block, > max_blocks, &map_bh, > - EXT4_GET_BLOCKS_DIO_CONVERT_EXT); > + EXT4_GET_BLOCKS_IO_CONVERT_EXT); > if (ret <= 0) { > WARN_ON(ret <= 0); > printk(KERN_ERR "%s: ext4_ext_get_blocks " > diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c > index 98bd140..0d0c323 100644 > --- a/fs/ext4/fsync.c > +++ b/fs/ext4/fsync.c > @@ -63,7 +63,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) > if (inode->i_sb->s_flags & MS_RDONLY) > return 0; > > - ret = flush_aio_dio_completed_IO(inode); > + ret = flush_completed_IO(inode); > if (ret < 0) > return ret; > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 427f469..28f116b 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -3468,7 +3468,7 @@ out: > return ret; > } > > -static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, > +static int ext4_get_block_write(struct inode *inode, sector_t iblock, > struct buffer_head *bh_result, int create) > { > handle_t *handle = NULL; > @@ -3476,28 +3476,14 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, > unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; > int dio_credits; > > - ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", > + ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", > inode->i_ino, create); > /* > - * DIO VFS code passes create = 0 flag for write to > - * the middle of file. It does this to avoid block > - * allocation for holes, to prevent expose stale data > - * out when there is parallel buffered read (which does > - * not hold the i_mutex lock) while direct IO write has > - * not completed. DIO request on holes finally falls back > - * to buffered IO for this reason. > - * > - * For ext4 extent based file, since we support fallocate, > - * new allocated extent as uninitialized, for holes, we > - * could fallocate blocks for holes, thus parallel > - * buffered IO read will zero out the page when read on > - * a hole while parallel DIO write to the hole has not completed. > - * > - * when we come here, we know it's a direct IO write to > - * to the middle of file (<i_size) > - * so it's safe to override the create flag from VFS. > + * ext4_get_block in prepare for a DIO write or buffer write. > + * We allocate an uinitialized extent if blocks haven't been allocated. > + * The extent will be converted to initialized after IO complete. > */ > - create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; > + create = EXT4_GET_BLOCKS_IO_CREATE_EXT; > > if (max_blocks > DIO_MAX_BLOCKS) > max_blocks = DIO_MAX_BLOCKS; > @@ -3524,19 +3510,20 @@ static void ext4_free_io_end(ext4_io_end_t *io) > iput(io->inode); > kfree(io); > } > -static void dump_aio_dio_list(struct inode * inode) > + > +static void dump_completed_IO(struct inode * inode) > { > #ifdef EXT4_DEBUG > struct list_head *cur, *before, *after; > ext4_io_end_t *io, *io0, *io1; > > - if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ > - ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); > + if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ > + ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); > return; > } > > - ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); > - list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ > + ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); > + list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ > cur = &io->list; > before = cur->prev; > io0 = container_of(before, ext4_io_end_t, list); > @@ -3552,21 +3539,21 @@ static void dump_aio_dio_list(struct inode * inode) > /* > * check a range of space and convert unwritten extents to written. > */ > -static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) > +static int ext4_end_io_nolock(ext4_io_end_t *io) > { > struct inode *inode = io->inode; > loff_t offset = io->offset; > ssize_t size = io->size; > int ret = 0; > > - ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," > + ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," > "list->prev 0x%p\n", > io, inode->i_ino, io->list.next, io->list.prev); > > if (list_empty(&io->list)) > return ret; > > - if (io->flag != DIO_AIO_UNWRITTEN) > + if (io->flag != EXT4_IO_UNWRITTEN) > return ret; > > if (offset + size <= i_size_read(inode)) > @@ -3584,17 +3571,18 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) > io->flag = 0; > return ret; > } > + > /* > * work on completed aio dio IO, to convert unwritten extents to extents > */ > -static void ext4_end_aio_dio_work(struct work_struct *work) > +static void ext4_end_io_work(struct work_struct *work) > { > ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); > struct inode *inode = io->inode; > int ret = 0; > > mutex_lock(&inode->i_mutex); > - ret = ext4_end_aio_dio_nolock(io); > + ret = ext4_end_io_nolock(io); > if (ret >= 0) { > if (!list_empty(&io->list)) > list_del_init(&io->list); > @@ -3602,32 +3590,35 @@ static void ext4_end_aio_dio_work(struct work_struct *work) > } > mutex_unlock(&inode->i_mutex); > } > + > /* > * This function is called from ext4_sync_file(). > * > - * When AIO DIO IO is completed, the work to convert unwritten > - * extents to written is queued on workqueue but may not get immediately > + * When IO is completed, the work to convert unwritten extents to > + * written is queued on workqueue but may not get immediately > * scheduled. When fsync is called, we need to ensure the > * conversion is complete before fsync returns. > - * The inode keeps track of a list of completed AIO from DIO path > - * that might needs to do the conversion. This function walks through > - * the list and convert the related unwritten extents to written. > + * The inode keeps track of a list of pending/completed IO that > + * might needs to do the conversion. This function walks through > + * the list and convert the related unwritten extents for completed IO > + * to written. > + * The function return the number of pending IOs on success. > */ > -int flush_aio_dio_completed_IO(struct inode *inode) > +int flush_completed_IO(struct inode *inode) > { > ext4_io_end_t *io; > int ret = 0; > int ret2 = 0; > > - if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) > + if (list_empty(&EXT4_I(inode)->i_completed_io_list)) > return ret; > > - dump_aio_dio_list(inode); > - while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ > - io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, > + dump_completed_IO(inode); > + while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){ > + io = list_entry(EXT4_I(inode)->i_completed_io_list.next, > ext4_io_end_t, list); > /* > - * Calling ext4_end_aio_dio_nolock() to convert completed > + * Calling ext4_end_io_nolock() to convert completed > * IO to written. > * > * When ext4_sync_file() is called, run_queue() may already > @@ -3640,7 +3631,7 @@ int flush_aio_dio_completed_IO(struct inode *inode) > * avoid double converting from both fsync and background work > * queue work. > */ > - ret = ext4_end_aio_dio_nolock(io); > + ret = ext4_end_io_nolock(io); > if (ret < 0) > ret2 = ret; > else > @@ -3662,7 +3653,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) > io->offset = 0; > io->size = 0; > io->error = 0; > - INIT_WORK(&io->work, ext4_end_aio_dio_work); > + INIT_WORK(&io->work, ext4_end_io_work); > INIT_LIST_HEAD(&io->list); > } > > @@ -3685,7 +3676,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, > size); > > /* if not aio dio with unwritten extents, just free io and return */ > - if (io_end->flag != DIO_AIO_UNWRITTEN){ > + if (io_end->flag != EXT4_IO_UNWRITTEN){ > ext4_free_io_end(io_end); > iocb->private = NULL; > return; > @@ -3700,9 +3691,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, > > /* Add the io_end to per-inode completed aio dio list*/ > list_add_tail(&io_end->list, > - &EXT4_I(io_end->inode)->i_aio_dio_complete_list); > + &EXT4_I(io_end->inode)->i_completed_io_list); > iocb->private = NULL; > } > + > /* > * For ext4 extent files, ext4 will do direct-io write to holes, > * preallocated extents, and those write extend the file, no need to > @@ -3772,7 +3764,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, > ret = blockdev_direct_IO(rw, iocb, inode, > inode->i_sb->s_bdev, iov, > offset, nr_segs, > - ext4_get_block_dio_write, > + ext4_get_block_write, > ext4_end_io_dio); > if (iocb->private) > EXT4_I(inode)->cur_aio_dio = NULL; > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index c832508..dc7a97e 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -708,7 +708,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) > #ifdef CONFIG_QUOTA > ei->i_reserved_quota = 0; > #endif > - INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); > + INIT_LIST_HEAD(&ei->i_completed_io_list); > ei->cur_aio_dio = NULL; > ei->i_sync_tid = 0; > ei->i_datasync_tid = 0; > -- > 1.6.6.1.1.g974db.dirty > -aneesh -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html