Hi Tao, I like the direction in this patch however there are a couple nits below. Thanks, Benny On 2011-11-09 17:16, Peng Tao wrote: > As discussed earlier, it is better for block client to allocate memoroy for nit: "memory" > tracking extents state before submitting bio. So the patch does it by allocating > a short_extent for every INVALID extent touched by write pagelist and for > every zeroing page we created, saving them in layout header. > Then in end_io we can just use them to create commit list items and avoid > memory allocation there. > > Signed-off-by: Peng Tao <peng_tao@xxxxxxx> > --- > fs/nfs/blocklayout/blocklayout.c | 70 +++++++++++++++++++++++++++++--------- > fs/nfs/blocklayout/blocklayout.h | 3 +- > fs/nfs/blocklayout/extents.c | 13 ++----- > 3 files changed, 60 insertions(+), 26 deletions(-) > > diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c > index cb4ff0f..53cd332 100644 > --- a/fs/nfs/blocklayout/blocklayout.c > +++ b/fs/nfs/blocklayout/blocklayout.c > @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) > */ > struct parallel_io { > struct kref refcnt; > - void (*pnfs_callback) (void *data); > + void (*pnfs_callback) (void *data, int num_se); > void *data; > + int bse_count; > }; > > static inline struct parallel_io *alloc_parallel(void *data) > @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) > if (rv) { > rv->data = data; > kref_init(&rv->refcnt); > + rv->bse_count = 0; > } > return rv; > } > @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref) > struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); > > dprintk("%s enter\n", __func__); > - p->pnfs_callback(p->data); > + p->pnfs_callback(p->data, p->bse_count); > kfree(p); > } > > @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work) > } > > static void > -bl_end_par_io_read(void *data) > +bl_end_par_io_read(void *data, int unused) > { > struct nfs_read_data *rdata = data; > > @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl, > { > sector_t isect, end; > struct pnfs_block_extent *be; > + struct pnfs_block_short_extent *se; > > dprintk("%s(%llu, %u)\n", __func__, offset, count); > if (count == 0) > @@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl, > be = bl_find_get_extent(bl, isect, NULL); > BUG_ON(!be); /* FIXME */ > len = min(end, be->be_f_offset + be->be_length) - isect; > - if (be->be_state == PNFS_BLOCK_INVALID_DATA) > - bl_mark_for_commit(be, isect, len); /* What if fails? */ > + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { > + se = bl_pop_short_extent(be->be_inval, 1); > + BUG_ON(!se); > + bl_mark_for_commit(be, isect, len, se); > + } > isect += len; > bl_put_extent(be); > } > @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) > end_page_writeback(page); > page_cache_release(page); > } while (bvec >= bio->bi_io_vec); > - if (!uptodate) { > + > + if (unlikely(!uptodate)) { > if (!wdata->pnfs_error) > wdata->pnfs_error = -EIO; > pnfs_set_lo_fail(wdata->lseg); > @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) > put_parallel(par); > } > > -/* This is basically copied from mpage_end_io_read */ > static void bl_end_io_write(struct bio *bio, int err) > { > struct parallel_io *par = bio->bi_private; > @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work) > dprintk("%s enter\n", __func__); > task = container_of(work, struct rpc_task, u.tk_work); > wdata = container_of(task, struct nfs_write_data, task); > - if (!wdata->pnfs_error) { > + if (likely(!wdata->pnfs_error)) { > /* Marks for LAYOUTCOMMIT */ > mark_extents_written(BLK_LSEG2EXT(wdata->lseg), > wdata->args.offset, wdata->args.count); > @@ -391,9 +397,16 @@ static void bl_write_cleanup(struct work_struct *work) > } > > /* Called when last of bios associated with a bl_write_pagelist call finishes */ > -static void bl_end_par_io_write(void *data) > +static void bl_end_par_io_write(void *data, int num_se) > { > struct nfs_write_data *wdata = data; > + struct pnfs_block_short_extent *se; > + > + if (unlikely(wdata->pnfs_error)) { > + se = bl_pop_short_extent(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, > + num_se); > + kfree(se); > + } > > wdata->task.tk_status = wdata->pnfs_error; > wdata->verf.committed = NFS_FILE_SYNC; > @@ -548,7 +561,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) > */ > par = alloc_parallel(wdata); > if (!par) > - return PNFS_NOT_ATTEMPTED; > + goto out_mds; > par->pnfs_callback = bl_end_par_io_write; > /* At this point, have to be more careful with error handling */ > > @@ -556,12 +569,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) > be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); > if (!be || !is_writable(be, isect)) { > dprintk("%s no matching extents!\n", __func__); > - wdata->pnfs_error = -EINVAL; > - goto out; > + goto out_mds; > } > > /* First page inside INVALID extent */ > if (be->be_state == PNFS_BLOCK_INVALID_DATA) { > + if (likely(!bl_push_one_short_extent(be->be_inval))) > + par->bse_count++; > + else > + goto out_mds; > temp = offset >> PAGE_CACHE_SHIFT; > npg_zero = do_div(temp, npg_per_block); > isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & > @@ -598,6 +614,19 @@ fill_invalid_ext: > wdata->pnfs_error = ret; > goto out; > } > + if (likely(!bl_push_one_short_extent(be->be_inval))) > + par->bse_count++; > + else { > + end_page_writeback(page); > + page_cache_release(page); > + wdata->pnfs_error = -ENOMEM; > + goto out; > + } > + /* FIXME: This should be done in bi_end_io */ > + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), > + page->index << PAGE_CACHE_SHIFT, > + PAGE_CACHE_SIZE); > + > bio = bl_add_page_to_bio(bio, npg_zero, WRITE, > isect, page, be, > bl_end_io_write_zero, par); > @@ -606,10 +635,6 @@ fill_invalid_ext: > bio = NULL; > goto out; > } > - /* FIXME: This should be done in bi_end_io */ > - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), > - page->index << PAGE_CACHE_SHIFT, > - PAGE_CACHE_SIZE); > next_page: > isect += PAGE_CACHE_SECTORS; > extent_length -= PAGE_CACHE_SECTORS; > @@ -633,6 +658,14 @@ next_page: > wdata->pnfs_error = -EINVAL; > goto out; > } > + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { > + if (likely(!bl_push_one_short_extent(be->be_inval))) checkpatch nit: please use tabs, not spaces, for indent... Benny > + par->bse_count++; > + else { > + wdata->pnfs_error = -ENOMEM; > + goto out; > + } > + } > extent_length = be->be_length - > (isect - be->be_f_offset); > } > @@ -680,6 +713,11 @@ out: > bl_submit_bio(WRITE, bio); > put_parallel(par); > return PNFS_ATTEMPTED; > +out_mds: > + bl_put_extent(be); > + if (par) > + kfree(par); > + return PNFS_NOT_ATTEMPTED; > } > > /* FIXME - range ignored */ > diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h > index df0e0fb..4986c23 100644 > --- a/fs/nfs/blocklayout/blocklayout.h > +++ b/fs/nfs/blocklayout/blocklayout.h > @@ -201,7 +201,8 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, > int bl_add_merge_extent(struct pnfs_block_layout *bl, > struct pnfs_block_extent *new); > int bl_mark_for_commit(struct pnfs_block_extent *be, > - sector_t offset, sector_t length); > + sector_t offset, sector_t length, > + struct pnfs_block_short_extent *new); > int bl_push_one_short_extent(struct pnfs_inval_markings *marks); > struct pnfs_block_short_extent* > bl_pop_short_extent(struct pnfs_inval_markings *marks, int num_to_pop); > diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c > index 72c7fa1..21da3a3 100644 > --- a/fs/nfs/blocklayout/extents.c > +++ b/fs/nfs/blocklayout/extents.c > @@ -370,20 +370,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl, > > /* Note the range described by offset, length is guaranteed to be contained > * within be. > + * new will be freed, either by this function or add_to_commitlist if they > + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. > */ > int bl_mark_for_commit(struct pnfs_block_extent *be, > - sector_t offset, sector_t length) > + sector_t offset, sector_t length, > + struct pnfs_block_short_extent *new) > { > sector_t new_end, end = offset + length; > - struct pnfs_block_short_extent *new; > struct pnfs_block_layout *bl = container_of(be->be_inval, > struct pnfs_block_layout, > bl_inval); > > - new = kmalloc(sizeof(*new), GFP_NOFS); > - if (!new) > - return -ENOMEM; > - > mark_written_sectors(be->be_inval, offset, length); > /* We want to add the range to commit list, but it must be > * block-normalized, and verified that the normalized range has > @@ -413,9 +411,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be, > new->bse_mdev = be->be_mdev; > > spin_lock(&bl->bl_ext_lock); > - /* new will be freed, either by add_to_commitlist if it decides not > - * to use it, or after LAYOUTCOMMIT uses it in the commitlist. > - */ > add_to_commitlist(bl, new); > spin_unlock(&bl->bl_ext_lock); > return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html