On Mon, Nov 14, 2011 at 6:26 PM, Benny Halevy <bhalevy@xxxxxxxxxx> wrote: > Tao, The patch below doesn't apply. > Can you please re-send a clean patch, with tab indents (vs. spaces). oops, I will resend it. Thanks, Tao > > Thanks, > > Benny > > On 2011-11-11 04:05, tao.peng@xxxxxxx wrote: >> Hi, Bryan, >> >>> -----Original Message----- >>> From: Bryan Schumaker [mailto:bjschuma@xxxxxxxxxx] >>> Sent: Friday, November 11, 2011 12:44 AM >>> To: Peng Tao >>> Cc: linux-nfs@xxxxxxxxxxxxxxx; Trond.Myklebust@xxxxxxxxxx; bhalevy@xxxxxxxxxx; >>> Peng, Tao >>> Subject: Re: [PATCH-v2 7/7] pnfsblock: alloc short extent before submit bio >>> >>> On 11/10/2011 10:25 AM, Peng Tao wrote: >>>> As discussed earlier, it is better for block client to allocate memory for >>>> tracking extents state before submitting bio. So the patch does it by allocating >>>> a short_extent for every INVALID extent touched by write pagelist and for >>>> every zeroing page we created, saving them in layout header. Then in end_io we >>>> can just use them to create commit list items and avoid memory allocation there. >>>> >>>> Signed-off-by: Peng Tao <peng_tao@xxxxxxx> >>>> --- >>>> fs/nfs/blocklayout/blocklayout.c | 74 >>> +++++++++++++++++++++++++++++-------- >>>> fs/nfs/blocklayout/blocklayout.h | 9 ++++- >>>> fs/nfs/blocklayout/extents.c | 62 +++++++++++++++++++++++++++----- >>>> 3 files changed, 119 insertions(+), 26 deletions(-) >>>> >>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c >>>> index 3eaea2b..3fdfb29 100644 >>>> --- a/fs/nfs/blocklayout/blocklayout.c >>>> +++ b/fs/nfs/blocklayout/blocklayout.c >>>> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t >>> isect) >>>> */ >>>> struct parallel_io { >>>> struct kref refcnt; >>>> - void (*pnfs_callback) (void *data); >>>> + void (*pnfs_callback) (void *data, int num_se); >>>> void *data; >>>> + int bse_count; >>>> }; >>>> >>>> static inline struct parallel_io *alloc_parallel(void *data) >>>> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) >>>> if (rv) { >>>> rv->data = data; >>>> kref_init(&rv->refcnt); >>>> + rv->bse_count = 0; >>>> } >>>> return rv; >>>> } >>>> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref) >>>> struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); >>>> >>>> dprintk("%s enter\n", __func__); >>>> - p->pnfs_callback(p->data); >>>> + p->pnfs_callback(p->data, p->bse_count); >>>> kfree(p); >>>> } >>>> >>>> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work) >>>> } >>>> >>>> static void >>>> -bl_end_par_io_read(void *data) >>>> +bl_end_par_io_read(void *data, int unused) >>>> { >>>> struct nfs_read_data *rdata = data; >>>> >>>> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout >>> *bl, >>>> { >>>> sector_t isect, end; >>>> struct pnfs_block_extent *be; >>>> + struct pnfs_block_short_extent *se; >>>> >>>> dprintk("%s(%llu, %u)\n", __func__, offset, count); >>>> if (count == 0) >>>> @@ -324,8 +327,11 @@ static void mark_extents_written(struct >>> pnfs_block_layout *bl, >>>> be = bl_find_get_extent(bl, isect, NULL); >>>> BUG_ON(!be); /* FIXME */ >>>> len = min(end, be->be_f_offset + be->be_length) - isect; >>>> - if (be->be_state == PNFS_BLOCK_INVALID_DATA) >>>> - bl_mark_for_commit(be, isect, len); /* What if fails? */ >>>> + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { >>>> + se = bl_pop_one_short_extent(be->be_inval); >>>> + BUG_ON(!se); >>>> + bl_mark_for_commit(be, isect, len, se); >>>> + } >>>> isect += len; >>>> bl_put_extent(be); >>>> } >>>> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) >>>> end_page_writeback(page); >>>> page_cache_release(page); >>>> } while (bvec >= bio->bi_io_vec); >>>> - if (!uptodate) { >>>> + >>>> + if (unlikely(!uptodate)) { >>>> if (!wdata->pnfs_error) >>>> wdata->pnfs_error = -EIO; >>>> pnfs_set_lo_fail(wdata->lseg); >>>> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) >>>> put_parallel(par); >>>> } >>>> >>>> -/* This is basically copied from mpage_end_io_read */ >>>> static void bl_end_io_write(struct bio *bio, int err) >>>> { >>>> struct parallel_io *par = bio->bi_private; >>>> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work) >>>> dprintk("%s enter\n", __func__); >>>> task = container_of(work, struct rpc_task, u.tk_work); >>>> wdata = container_of(task, struct nfs_write_data, task); >>>> - if (!wdata->pnfs_error) { >>>> + if (likely(!wdata->pnfs_error)) { >>>> /* Marks for LAYOUTCOMMIT */ >>>> mark_extents_written(BLK_LSEG2EXT(wdata->lseg), >>>> wdata->args.offset, wdata->args.count); >>>> @@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work) >>>> } >>>> >>>> /* Called when last of bios associated with a bl_write_pagelist call finishes */ >>>> -static void bl_end_par_io_write(void *data) >>>> +static void bl_end_par_io_write(void *data, int num_se) >>>> { >>>> struct nfs_write_data *wdata = data; >>>> >>>> + if (unlikely(wdata->pnfs_error)) { >>>> + bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, >>>> + num_se); >>>> + } >>>> + >>>> wdata->task.tk_status = wdata->pnfs_error; >>>> wdata->verf.committed = NFS_FILE_SYNC; >>>> INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); >>>> @@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) >>>> */ >>>> par = alloc_parallel(wdata); >>>> if (!par) >>>> - return PNFS_NOT_ATTEMPTED; >>>> + goto out_mds; >>>> par->pnfs_callback = bl_end_par_io_write; >>>> /* At this point, have to be more careful with error handling */ >>>> >>>> @@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int >>> sync) >>>> be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); >>>> if (!be || !is_writable(be, isect)) { >>>> dprintk("%s no matching extents!\n", __func__); >>>> - wdata->pnfs_error = -EINVAL; >>>> - goto out; >>>> + goto out_mds; >>>> } >>>> >>>> /* First page inside INVALID extent */ >>>> if (be->be_state == PNFS_BLOCK_INVALID_DATA) { >>>> + if (likely(!bl_push_one_short_extent(be->be_inval))) >>>> + par->bse_count++; >>>> + else >>>> + goto out_mds; >>>> temp = offset >> PAGE_CACHE_SHIFT; >>>> npg_zero = do_div(temp, npg_per_block); >>>> isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & >>>> @@ -598,6 +612,19 @@ fill_invalid_ext: >>>> wdata->pnfs_error = ret; >>>> goto out; >>>> } >>>> + if (likely(!bl_push_one_short_extent(be->be_inval))) >>>> + par->bse_count++; >>>> + else { >>>> + end_page_writeback(page); >>>> + page_cache_release(page); >>>> + wdata->pnfs_error = -ENOMEM; >>>> + goto out; >>>> + } >>>> + /* FIXME: This should be done in bi_end_io */ >>>> + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), >>>> + page->index << PAGE_CACHE_SHIFT, >>>> + PAGE_CACHE_SIZE); >>>> + >>>> bio = bl_add_page_to_bio(bio, npg_zero, WRITE, >>>> isect, page, be, >>>> bl_end_io_write_zero, par); >>>> @@ -606,10 +633,6 @@ fill_invalid_ext: >>>> bio = NULL; >>>> goto out; >>>> } >>>> - /* FIXME: This should be done in bi_end_io */ >>>> - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), >>>> - page->index << PAGE_CACHE_SHIFT, >>>> - PAGE_CACHE_SIZE); >>>> next_page: >>>> isect += PAGE_CACHE_SECTORS; >>>> extent_length -= PAGE_CACHE_SECTORS; >>>> @@ -633,6 +656,15 @@ next_page: >>>> wdata->pnfs_error = -EINVAL; >>>> goto out; >>>> } >>>> + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { >>>> + if (likely(!bl_push_one_short_extent( >>>> + be->be_inval))) >>>> + par->bse_count++; >>>> + else { >>>> + wdata->pnfs_error = -ENOMEM; >>>> + goto out; >>>> + } >>>> + } >>>> extent_length = be->be_length - >>>> (isect - be->be_f_offset); >>>> } >>>> @@ -680,6 +712,10 @@ out: >>>> bl_submit_bio(WRITE, bio); >>>> put_parallel(par); >>>> return PNFS_ATTEMPTED; >>>> +out_mds: >>>> + bl_put_extent(be); >>>> + kfree(par); >>>> + return PNFS_NOT_ATTEMPTED; >>>> } >>>> >>>> /* FIXME - range ignored */ >>>> @@ -706,11 +742,17 @@ static void >>>> release_inval_marks(struct pnfs_inval_markings *marks) >>>> { >>>> struct pnfs_inval_tracking *pos, *temp; >>>> + struct pnfs_block_short_extent *se, *stemp; >>>> >>>> list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { >>>> list_del(&pos->it_link); >>>> kfree(pos); >>>> } >>>> + >>>> + list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { >>>> + list_del(&se->bse_node); >>>> + kfree(se); >>>> + } >>>> return; >>>> } >>>> >>>> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h >>>> index 60728ac..e31a2df 100644 >>>> --- a/fs/nfs/blocklayout/blocklayout.h >>>> +++ b/fs/nfs/blocklayout/blocklayout.h >>>> @@ -70,6 +70,7 @@ struct pnfs_inval_markings { >>>> spinlock_t im_lock; >>>> struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ >>>> sector_t im_block_size; /* Server blocksize in sectors */ >>>> + struct list_head im_extents; /* Short extents for INVAL->RW conversion */ >>>> }; >>>> >>>> struct pnfs_inval_tracking { >>>> @@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings >>> *marks, sector_t blocksize) >>>> { >>>> spin_lock_init(&marks->im_lock); >>>> INIT_LIST_HEAD(&marks->im_tree.mtt_stub); >>>> + INIT_LIST_HEAD(&marks->im_extents); >>>> marks->im_block_size = blocksize; >>>> marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, >>>> blocksize); >>>> @@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct >>> pnfs_block_layout *bl, >>>> int bl_add_merge_extent(struct pnfs_block_layout *bl, >>>> struct pnfs_block_extent *new); >>>> int bl_mark_for_commit(struct pnfs_block_extent *be, >>>> - sector_t offset, sector_t length); >>>> + sector_t offset, sector_t length, >>>> + struct pnfs_block_short_extent *new); >>>> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks); >>>> +struct pnfs_block_short_extent * >>>> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks); >>>> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); >>>> >>>> #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ >>>> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c >>>> index d0f52ed..db66e68 100644 >>>> --- a/fs/nfs/blocklayout/extents.c >>>> +++ b/fs/nfs/blocklayout/extents.c >>>> @@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout >>> *bl, >>>> >>>> /* Note the range described by offset, length is guaranteed to be contained >>>> * within be. >>>> + * new will be freed, either by this function or add_to_commitlist if they >>>> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. >>>> */ >>>> int bl_mark_for_commit(struct pnfs_block_extent *be, >>>> - sector_t offset, sector_t length) >>>> + sector_t offset, sector_t length, >>>> + struct pnfs_block_short_extent *new) >>>> { >>>> sector_t new_end, end = offset + length; >>>> - struct pnfs_block_short_extent *new; >>>> struct pnfs_block_layout *bl = container_of(be->be_inval, >>>> struct pnfs_block_layout, >>>> bl_inval); >>>> >>>> - new = kmalloc(sizeof(*new), GFP_NOFS); >>>> - if (!new) >>>> - return -ENOMEM; >>>> - >>>> mark_written_sectors(be->be_inval, offset, length); >>>> /* We want to add the range to commit list, but it must be >>>> * block-normalized, and verified that the normalized range has >>>> @@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be, >>>> new->bse_mdev = be->be_mdev; >>>> >>>> spin_lock(&bl->bl_ext_lock); >>>> - /* new will be freed, either by add_to_commitlist if it decides not >>>> - * to use it, or after LAYOUTCOMMIT uses it in the commitlist. >>>> - */ >>>> add_to_commitlist(bl, new); >>>> spin_unlock(&bl->bl_ext_lock); >>>> return 0; >>>> @@ -862,3 +857,52 @@ clean_pnfs_block_layoutupdate(struct >>> pnfs_block_layout *bl, >>>> } >>>> } >>>> } >>>> + >>>> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks) >>>> +{ >>>> + struct pnfs_block_short_extent *new; >>>> + >>>> + new = kmalloc(sizeof(*new), GFP_NOFS); >>>> + if (unlikely(!new)) >>>> + return -ENOMEM; >>>> + >>>> + spin_lock(&marks->im_lock); >>>> + list_add(&new->bse_node, &marks->im_extents); >>>> + spin_unlock(&marks->im_lock); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +struct pnfs_block_short_extent * >>>> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks) >>>> +{ >>>> + struct pnfs_block_short_extent *rv = NULL; >>>> + >>>> + spin_lock(&marks->im_lock); >>>> + if (!list_empty(&marks->im_extents)) { >>>> + rv = list_entry((&marks->im_extents)->next, >>>> + struct pnfs_block_short_extent, bse_node); >>>> + list_del_init(&rv->bse_node); >>>> + } >>>> + spin_unlock(&marks->im_lock); >>>> + >>>> + return rv; >>>> +} >>>> + >>>> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) >>>> +{ >>>> + struct pnfs_block_short_extent *se = NULL, *tmp; >>>> + >>>> + BUG_ON(num_to_free <= 0); >>> >>> Why is it a bug if num_to_free is <= 0? Couldn't you do: >>> >>> if (num_to_free <= 0) >>> return; >>> >>> instead? >> It used to be a sanity check but was changed to BUG_ON in the second version. >> Yeah, after giving it a second thought, I think it necessary to keep the sanity check. There are cases that num_to_free can be zero. E.g., a write_pagelist touches no INVALID extents, and one bio fails with disk failure. Then we will come to calling bl_free_short_extents() with num_to_free == 0. So the BUG_ON is really a bug itself... I will change it back to a normal sanity check. >> >> Thanks, >> Tao >> >> From d4f49b042411da140c95559895c57a00a1920fdd Mon Sep 17 00:00:00 2001 >> From: Peng Tao <bergwolf@xxxxxxxxx> >> Date: Thu, 10 Nov 2011 03:57:00 -0500 >> Subject: [PATCH 7/7] pnfsblock: alloc short extent before submit bio >> >> As discussed earlier, it is better for block client to allocate memory for >> tracking extents state before submitting bio. So the patch does it by allocating >> a short_extent for every INVALID extent touched by write pagelist and for >> every zeroing page we created, saving them in layout header. Then in end_io we >> can just use them to create commit list items and avoid memory allocation there. >> >> Signed-off-by: Peng Tao <peng_tao@xxxxxxx> >> --- >> fs/nfs/blocklayout/blocklayout.c | 74 +++++++++++++++++++++++++++++-------- >> fs/nfs/blocklayout/blocklayout.h | 9 ++++- >> fs/nfs/blocklayout/extents.c | 63 +++++++++++++++++++++++++++----- >> 3 files changed, 120 insertions(+), 26 deletions(-) >> >> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c >> index 7fc69c9..1dd2983 100644 >> --- a/fs/nfs/blocklayout/blocklayout.c >> +++ b/fs/nfs/blocklayout/blocklayout.c >> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) >> */ >> struct parallel_io { >> struct kref refcnt; >> - void (*pnfs_callback) (void *data); >> + void (*pnfs_callback) (void *data, int num_se); >> void *data; >> + int bse_count; >> }; >> >> static inline struct parallel_io *alloc_parallel(void *data) >> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) >> if (rv) { >> rv->data = data; >> kref_init(&rv->refcnt); >> + rv->bse_count = 0; >> } >> return rv; >> } >> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref) >> struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); >> >> dprintk("%s enter\n", __func__); >> - p->pnfs_callback(p->data); >> + p->pnfs_callback(p->data, p->bse_count); >> kfree(p); >> } >> >> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work) >> } >> >> static void >> -bl_end_par_io_read(void *data) >> +bl_end_par_io_read(void *data, int unused) >> { >> struct nfs_read_data *rdata = data; >> >> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl, >> { >> sector_t isect, end; >> struct pnfs_block_extent *be; >> + struct pnfs_block_short_extent *se; >> >> dprintk("%s(%llu, %u)\n", __func__, offset, count); >> if (count == 0) >> @@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl, >> be = bl_find_get_extent(bl, isect, NULL); >> BUG_ON(!be); /* FIXME */ >> len = min(end, be->be_f_offset + be->be_length) - isect; >> - if (be->be_state == PNFS_BLOCK_INVALID_DATA) >> - bl_mark_for_commit(be, isect, len); /* What if fails? */ >> + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { >> + se = bl_pop_one_short_extent(be->be_inval); >> + BUG_ON(!se); >> + bl_mark_for_commit(be, isect, len, se); >> + } >> isect += len; >> bl_put_extent(be); >> } >> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) >> end_page_writeback(page); >> page_cache_release(page); >> } while (bvec >= bio->bi_io_vec); >> - if (!uptodate) { >> + >> + if (unlikely(!uptodate)) { >> if (!wdata->pnfs_error) >> wdata->pnfs_error = -EIO; >> pnfs_set_lo_fail(wdata->lseg); >> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) >> put_parallel(par); >> } >> >> -/* This is basically copied from mpage_end_io_read */ >> static void bl_end_io_write(struct bio *bio, int err) >> { >> struct parallel_io *par = bio->bi_private; >> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work) >> dprintk("%s enter\n", __func__); >> task = container_of(work, struct rpc_task, u.tk_work); >> wdata = container_of(task, struct nfs_write_data, task); >> - if (!wdata->pnfs_error) { >> + if (likely(!wdata->pnfs_error)) { >> /* Marks for LAYOUTCOMMIT */ >> mark_extents_written(BLK_LSEG2EXT(wdata->lseg), >> wdata->args.offset, wdata->args.count); >> @@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work) >> } >> >> /* Called when last of bios associated with a bl_write_pagelist call finishes */ >> -static void bl_end_par_io_write(void *data) >> +static void bl_end_par_io_write(void *data, int num_se) >> { >> struct nfs_write_data *wdata = data; >> >> + if (unlikely(wdata->pnfs_error)) { >> + bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, >> + num_se); >> + } >> + >> wdata->task.tk_status = wdata->pnfs_error; >> wdata->verf.committed = NFS_FILE_SYNC; >> INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); >> @@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) >> */ >> par = alloc_parallel(wdata); >> if (!par) >> - return PNFS_NOT_ATTEMPTED; >> + goto out_mds; >> par->pnfs_callback = bl_end_par_io_write; >> /* At this point, have to be more careful with error handling */ >> >> @@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) >> be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); >> if (!be || !is_writable(be, isect)) { >> dprintk("%s no matching extents!\n", __func__); >> - wdata->pnfs_error = -EINVAL; >> - goto out; >> + goto out_mds; >> } >> >> /* First page inside INVALID extent */ >> if (be->be_state == PNFS_BLOCK_INVALID_DATA) { >> + if (likely(!bl_push_one_short_extent(be->be_inval))) >> + par->bse_count++; >> + else >> + goto out_mds; >> temp = offset >> PAGE_CACHE_SHIFT; >> npg_zero = do_div(temp, npg_per_block); >> isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & >> @@ -598,6 +612,19 @@ fill_invalid_ext: >> wdata->pnfs_error = ret; >> goto out; >> } >> + if (likely(!bl_push_one_short_extent(be->be_inval))) >> + par->bse_count++; >> + else { >> + end_page_writeback(page); >> + page_cache_release(page); >> + wdata->pnfs_error = -ENOMEM; >> + goto out; >> + } >> + /* FIXME: This should be done in bi_end_io */ >> + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), >> + page->index << PAGE_CACHE_SHIFT, >> + PAGE_CACHE_SIZE); >> + >> bio = bl_add_page_to_bio(bio, npg_zero, WRITE, >> isect, page, be, >> bl_end_io_write_zero, par); >> @@ -606,10 +633,6 @@ fill_invalid_ext: >> bio = NULL; >> goto out; >> } >> - /* FIXME: This should be done in bi_end_io */ >> - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), >> - page->index << PAGE_CACHE_SHIFT, >> - PAGE_CACHE_SIZE); >> next_page: >> isect += PAGE_CACHE_SECTORS; >> extent_length -= PAGE_CACHE_SECTORS; >> @@ -633,6 +656,15 @@ next_page: >> wdata->pnfs_error = -EINVAL; >> goto out; >> } >> + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { >> + if (likely(!bl_push_one_short_extent( >> + be->be_inval))) >> + par->bse_count++; >> + else { >> + wdata->pnfs_error = -ENOMEM; >> + goto out; >> + } >> + } >> extent_length = be->be_length - >> (isect - be->be_f_offset); >> } >> @@ -680,6 +712,10 @@ out: >> bl_submit_bio(WRITE, bio); >> put_parallel(par); >> return PNFS_ATTEMPTED; >> +out_mds: >> + bl_put_extent(be); >> + kfree(par); >> + return PNFS_NOT_ATTEMPTED; >> } >> >> /* FIXME - range ignored */ >> @@ -706,11 +742,17 @@ static void >> release_inval_marks(struct pnfs_inval_markings *marks) >> { >> struct pnfs_inval_tracking *pos, *temp; >> + struct pnfs_block_short_extent *se, *stemp; >> >> list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { >> list_del(&pos->it_link); >> kfree(pos); >> } >> + >> + list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { >> + list_del(&se->bse_node); >> + kfree(se); >> + } >> return; >> } >> >> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h >> index 60728ac..e31a2df 100644 >> --- a/fs/nfs/blocklayout/blocklayout.h >> +++ b/fs/nfs/blocklayout/blocklayout.h >> @@ -70,6 +70,7 @@ struct pnfs_inval_markings { >> spinlock_t im_lock; >> struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ >> sector_t im_block_size; /* Server blocksize in sectors */ >> + struct list_head im_extents; /* Short extents for INVAL->RW conversion */ >> }; >> >> struct pnfs_inval_tracking { >> @@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) >> { >> spin_lock_init(&marks->im_lock); >> INIT_LIST_HEAD(&marks->im_tree.mtt_stub); >> + INIT_LIST_HEAD(&marks->im_extents); >> marks->im_block_size = blocksize; >> marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, >> blocksize); >> @@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, >> int bl_add_merge_extent(struct pnfs_block_layout *bl, >> struct pnfs_block_extent *new); >> int bl_mark_for_commit(struct pnfs_block_extent *be, >> - sector_t offset, sector_t length); >> + sector_t offset, sector_t length, >> + struct pnfs_block_short_extent *new); >> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks); >> +struct pnfs_block_short_extent * >> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks); >> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); >> >> #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ >> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c >> index d0f52ed..6ce5a8d 100644 >> --- a/fs/nfs/blocklayout/extents.c >> +++ b/fs/nfs/blocklayout/extents.c >> @@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl, >> >> /* Note the range described by offset, length is guaranteed to be contained >> * within be. >> + * new will be freed, either by this function or add_to_commitlist if they >> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. >> */ >> int bl_mark_for_commit(struct pnfs_block_extent *be, >> - sector_t offset, sector_t length) >> + sector_t offset, sector_t length, >> + struct pnfs_block_short_extent *new) >> { >> sector_t new_end, end = offset + length; >> - struct pnfs_block_short_extent *new; >> struct pnfs_block_layout *bl = container_of(be->be_inval, >> struct pnfs_block_layout, >> bl_inval); >> >> - new = kmalloc(sizeof(*new), GFP_NOFS); >> - if (!new) >> - return -ENOMEM; >> - >> mark_written_sectors(be->be_inval, offset, length); >> /* We want to add the range to commit list, but it must be >> * block-normalized, and verified that the normalized range has >> @@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be, >> new->bse_mdev = be->be_mdev; >> >> spin_lock(&bl->bl_ext_lock); >> - /* new will be freed, either by add_to_commitlist if it decides not >> - * to use it, or after LAYOUTCOMMIT uses it in the commitlist. >> - */ >> add_to_commitlist(bl, new); >> spin_unlock(&bl->bl_ext_lock); >> return 0; >> @@ -862,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, >> } >> } >> } >> + >> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks) >> +{ >> + struct pnfs_block_short_extent *new; >> + >> + new = kmalloc(sizeof(*new), GFP_NOFS); >> + if (unlikely(!new)) >> + return -ENOMEM; >> + >> + spin_lock(&marks->im_lock); >> + list_add(&new->bse_node, &marks->im_extents); >> + spin_unlock(&marks->im_lock); >> + >> + return 0; >> +} >> + >> +struct pnfs_block_short_extent * >> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks) >> +{ >> + struct pnfs_block_short_extent *rv = NULL; >> + >> + spin_lock(&marks->im_lock); >> + if (!list_empty(&marks->im_extents)) { >> + rv = list_entry((&marks->im_extents)->next, >> + struct pnfs_block_short_extent, bse_node); >> + list_del_init(&rv->bse_node); >> + } >> + spin_unlock(&marks->im_lock); >> + >> + return rv; >> +} >> + >> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) >> +{ >> + struct pnfs_block_short_extent *se = NULL, *tmp; >> + >> + if (num_to_free <= 0) >> + return; >> + >> + spin_lock(&marks->im_lock); >> + list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { >> + list_del(&se->bse_node); >> + kfree(se); >> + if (--num_to_free == 0) >> + break; >> + } >> + spin_unlock(&marks->im_lock); >> + >> + BUG_ON(num_to_free > 0); >> +} >> -- >> 1.7.4.2 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- Thanks, -Bergwolf -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html