Re: [PATCH-v2 7/7] pnfsblock: alloc short extent before submit bio

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Nov 14, 2011 at 6:26 PM, Benny Halevy <bhalevy@xxxxxxxxxx> wrote:
> Tao, The patch below doesn't apply.
> Can you please re-send a clean patch, with tab indents (vs. spaces).
oops, I will resend it.

Thanks,
Tao
>
> Thanks,
>
> Benny
>
> On 2011-11-11 04:05, tao.peng@xxxxxxx wrote:
>> Hi, Bryan,
>>
>>> -----Original Message-----
>>> From: Bryan Schumaker [mailto:bjschuma@xxxxxxxxxx]
>>> Sent: Friday, November 11, 2011 12:44 AM
>>> To: Peng Tao
>>> Cc: linux-nfs@xxxxxxxxxxxxxxx; Trond.Myklebust@xxxxxxxxxx; bhalevy@xxxxxxxxxx;
>>> Peng, Tao
>>> Subject: Re: [PATCH-v2 7/7] pnfsblock: alloc short extent before submit bio
>>>
>>> On 11/10/2011 10:25 AM, Peng Tao wrote:
>>>> As discussed earlier, it is better for block client to allocate memory for
>>>> tracking extents state before submitting bio. So the patch does it by allocating
>>>> a short_extent for every INVALID extent touched by write pagelist and for
>>>> every zeroing page we created, saving them in layout header. Then in end_io we
>>>> can just use them to create commit list items and avoid memory allocation there.
>>>>
>>>> Signed-off-by: Peng Tao <peng_tao@xxxxxxx>
>>>> ---
>>>>  fs/nfs/blocklayout/blocklayout.c |   74
>>> +++++++++++++++++++++++++++++--------
>>>>  fs/nfs/blocklayout/blocklayout.h |    9 ++++-
>>>>  fs/nfs/blocklayout/extents.c     |   62 +++++++++++++++++++++++++++-----
>>>>  3 files changed, 119 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
>>>> index 3eaea2b..3fdfb29 100644
>>>> --- a/fs/nfs/blocklayout/blocklayout.c
>>>> +++ b/fs/nfs/blocklayout/blocklayout.c
>>>> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t
>>> isect)
>>>>   */
>>>>  struct parallel_io {
>>>>     struct kref refcnt;
>>>> -   void (*pnfs_callback) (void *data);
>>>> +   void (*pnfs_callback) (void *data, int num_se);
>>>>     void *data;
>>>> +   int bse_count;
>>>>  };
>>>>
>>>>  static inline struct parallel_io *alloc_parallel(void *data)
>>>> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
>>>>     if (rv) {
>>>>             rv->data = data;
>>>>             kref_init(&rv->refcnt);
>>>> +           rv->bse_count = 0;
>>>>     }
>>>>     return rv;
>>>>  }
>>>> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
>>>>     struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
>>>>
>>>>     dprintk("%s enter\n", __func__);
>>>> -   p->pnfs_callback(p->data);
>>>> +   p->pnfs_callback(p->data, p->bse_count);
>>>>     kfree(p);
>>>>  }
>>>>
>>>> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
>>>>  }
>>>>
>>>>  static void
>>>> -bl_end_par_io_read(void *data)
>>>> +bl_end_par_io_read(void *data, int unused)
>>>>  {
>>>>     struct nfs_read_data *rdata = data;
>>>>
>>>> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout
>>> *bl,
>>>>  {
>>>>     sector_t isect, end;
>>>>     struct pnfs_block_extent *be;
>>>> +   struct pnfs_block_short_extent *se;
>>>>
>>>>     dprintk("%s(%llu, %u)\n", __func__, offset, count);
>>>>     if (count == 0)
>>>> @@ -324,8 +327,11 @@ static void mark_extents_written(struct
>>> pnfs_block_layout *bl,
>>>>             be = bl_find_get_extent(bl, isect, NULL);
>>>>             BUG_ON(!be); /* FIXME */
>>>>             len = min(end, be->be_f_offset + be->be_length) - isect;
>>>> -           if (be->be_state == PNFS_BLOCK_INVALID_DATA)
>>>> -                   bl_mark_for_commit(be, isect, len); /* What if fails? */
>>>> +           if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
>>>> +                   se = bl_pop_one_short_extent(be->be_inval);
>>>> +                   BUG_ON(!se);
>>>> +                   bl_mark_for_commit(be, isect, len, se);
>>>> +           }
>>>>             isect += len;
>>>>             bl_put_extent(be);
>>>>     }
>>>> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>>>>             end_page_writeback(page);
>>>>             page_cache_release(page);
>>>>     } while (bvec >= bio->bi_io_vec);
>>>> -   if (!uptodate) {
>>>> +
>>>> +   if (unlikely(!uptodate)) {
>>>>             if (!wdata->pnfs_error)
>>>>                     wdata->pnfs_error = -EIO;
>>>>             pnfs_set_lo_fail(wdata->lseg);
>>>> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>>>>     put_parallel(par);
>>>>  }
>>>>
>>>> -/* This is basically copied from mpage_end_io_read */
>>>>  static void bl_end_io_write(struct bio *bio, int err)
>>>>  {
>>>>     struct parallel_io *par = bio->bi_private;
>>>> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
>>>>     dprintk("%s enter\n", __func__);
>>>>     task = container_of(work, struct rpc_task, u.tk_work);
>>>>     wdata = container_of(task, struct nfs_write_data, task);
>>>> -   if (!wdata->pnfs_error) {
>>>> +   if (likely(!wdata->pnfs_error)) {
>>>>             /* Marks for LAYOUTCOMMIT */
>>>>             mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>>>>                                  wdata->args.offset, wdata->args.count);
>>>> @@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work)
>>>>  }
>>>>
>>>>  /* Called when last of bios associated with a bl_write_pagelist call finishes */
>>>> -static void bl_end_par_io_write(void *data)
>>>> +static void bl_end_par_io_write(void *data, int num_se)
>>>>  {
>>>>     struct nfs_write_data *wdata = data;
>>>>
>>>> +   if (unlikely(wdata->pnfs_error)) {
>>>> +           bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
>>>> +                                   num_se);
>>>> +   }
>>>> +
>>>>     wdata->task.tk_status = wdata->pnfs_error;
>>>>     wdata->verf.committed = NFS_FILE_SYNC;
>>>>     INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
>>>> @@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>>>>      */
>>>>     par = alloc_parallel(wdata);
>>>>     if (!par)
>>>> -           return PNFS_NOT_ATTEMPTED;
>>>> +           goto out_mds;
>>>>     par->pnfs_callback = bl_end_par_io_write;
>>>>     /* At this point, have to be more careful with error handling */
>>>>
>>>> @@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int
>>> sync)
>>>>     be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
>>>>     if (!be || !is_writable(be, isect)) {
>>>>             dprintk("%s no matching extents!\n", __func__);
>>>> -           wdata->pnfs_error = -EINVAL;
>>>> -           goto out;
>>>> +           goto out_mds;
>>>>     }
>>>>
>>>>     /* First page inside INVALID extent */
>>>>     if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
>>>> +           if (likely(!bl_push_one_short_extent(be->be_inval)))
>>>> +                   par->bse_count++;
>>>> +           else
>>>> +                   goto out_mds;
>>>>             temp = offset >> PAGE_CACHE_SHIFT;
>>>>             npg_zero = do_div(temp, npg_per_block);
>>>>             isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
>>>> @@ -598,6 +612,19 @@ fill_invalid_ext:
>>>>                             wdata->pnfs_error = ret;
>>>>                             goto out;
>>>>                     }
>>>> +                   if (likely(!bl_push_one_short_extent(be->be_inval)))
>>>> +                           par->bse_count++;
>>>> +                   else {
>>>> +                           end_page_writeback(page);
>>>> +                           page_cache_release(page);
>>>> +                           wdata->pnfs_error = -ENOMEM;
>>>> +                           goto out;
>>>> +                   }
>>>> +                   /* FIXME: This should be done in bi_end_io */
>>>> +                   mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>>>> +                                        page->index << PAGE_CACHE_SHIFT,
>>>> +                                        PAGE_CACHE_SIZE);
>>>> +
>>>>                     bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
>>>>                                              isect, page, be,
>>>>                                              bl_end_io_write_zero, par);
>>>> @@ -606,10 +633,6 @@ fill_invalid_ext:
>>>>                             bio = NULL;
>>>>                             goto out;
>>>>                     }
>>>> -                   /* FIXME: This should be done in bi_end_io */
>>>> -                   mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>>>> -                                        page->index << PAGE_CACHE_SHIFT,
>>>> -                                        PAGE_CACHE_SIZE);
>>>>  next_page:
>>>>                     isect += PAGE_CACHE_SECTORS;
>>>>                     extent_length -= PAGE_CACHE_SECTORS;
>>>> @@ -633,6 +656,15 @@ next_page:
>>>>                             wdata->pnfs_error = -EINVAL;
>>>>                             goto out;
>>>>                     }
>>>> +                   if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
>>>> +                           if (likely(!bl_push_one_short_extent(
>>>> +                                                           be->be_inval)))
>>>> +                                   par->bse_count++;
>>>> +                           else {
>>>> +                                   wdata->pnfs_error = -ENOMEM;
>>>> +                                   goto out;
>>>> +                           }
>>>> +                   }
>>>>                     extent_length = be->be_length -
>>>>                         (isect - be->be_f_offset);
>>>>             }
>>>> @@ -680,6 +712,10 @@ out:
>>>>     bl_submit_bio(WRITE, bio);
>>>>     put_parallel(par);
>>>>     return PNFS_ATTEMPTED;
>>>> +out_mds:
>>>> +   bl_put_extent(be);
>>>> +   kfree(par);
>>>> +   return PNFS_NOT_ATTEMPTED;
>>>>  }
>>>>
>>>>  /* FIXME - range ignored */
>>>> @@ -706,11 +742,17 @@ static void
>>>>  release_inval_marks(struct pnfs_inval_markings *marks)
>>>>  {
>>>>     struct pnfs_inval_tracking *pos, *temp;
>>>> +   struct pnfs_block_short_extent *se, *stemp;
>>>>
>>>>     list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
>>>>             list_del(&pos->it_link);
>>>>             kfree(pos);
>>>>     }
>>>> +
>>>> +   list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
>>>> +           list_del(&se->bse_node);
>>>> +           kfree(se);
>>>> +   }
>>>>     return;
>>>>  }
>>>>
>>>> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
>>>> index 60728ac..e31a2df 100644
>>>> --- a/fs/nfs/blocklayout/blocklayout.h
>>>> +++ b/fs/nfs/blocklayout/blocklayout.h
>>>> @@ -70,6 +70,7 @@ struct pnfs_inval_markings {
>>>>     spinlock_t      im_lock;
>>>>     struct my_tree  im_tree;        /* Sectors that need LAYOUTCOMMIT */
>>>>     sector_t        im_block_size;  /* Server blocksize in sectors */
>>>> +   struct list_head im_extents;    /* Short extents for INVAL->RW conversion */
>>>>  };
>>>>
>>>>  struct pnfs_inval_tracking {
>>>> @@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings
>>> *marks, sector_t blocksize)
>>>>  {
>>>>     spin_lock_init(&marks->im_lock);
>>>>     INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
>>>> +   INIT_LIST_HEAD(&marks->im_extents);
>>>>     marks->im_block_size = blocksize;
>>>>     marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
>>>>                                        blocksize);
>>>> @@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct
>>> pnfs_block_layout *bl,
>>>>  int bl_add_merge_extent(struct pnfs_block_layout *bl,
>>>>                      struct pnfs_block_extent *new);
>>>>  int bl_mark_for_commit(struct pnfs_block_extent *be,
>>>> -                   sector_t offset, sector_t length);
>>>> +                   sector_t offset, sector_t length,
>>>> +                   struct pnfs_block_short_extent *new);
>>>> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
>>>> +struct pnfs_block_short_extent *
>>>> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
>>>> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
>>>>
>>>>  #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
>>>> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
>>>> index d0f52ed..db66e68 100644
>>>> --- a/fs/nfs/blocklayout/extents.c
>>>> +++ b/fs/nfs/blocklayout/extents.c
>>>> @@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout
>>> *bl,
>>>>
>>>>  /* Note the range described by offset, length is guaranteed to be contained
>>>>   * within be.
>>>> + * new will be freed, either by this function or add_to_commitlist if they
>>>> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
>>>>   */
>>>>  int bl_mark_for_commit(struct pnfs_block_extent *be,
>>>> -               sector_t offset, sector_t length)
>>>> +               sector_t offset, sector_t length,
>>>> +               struct pnfs_block_short_extent *new)
>>>>  {
>>>>     sector_t new_end, end = offset + length;
>>>> -   struct pnfs_block_short_extent *new;
>>>>     struct pnfs_block_layout *bl = container_of(be->be_inval,
>>>>                                                 struct pnfs_block_layout,
>>>>                                                 bl_inval);
>>>>
>>>> -   new = kmalloc(sizeof(*new), GFP_NOFS);
>>>> -   if (!new)
>>>> -           return -ENOMEM;
>>>> -
>>>>     mark_written_sectors(be->be_inval, offset, length);
>>>>     /* We want to add the range to commit list, but it must be
>>>>      * block-normalized, and verified that the normalized range has
>>>> @@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
>>>>     new->bse_mdev = be->be_mdev;
>>>>
>>>>     spin_lock(&bl->bl_ext_lock);
>>>> -   /* new will be freed, either by add_to_commitlist if it decides not
>>>> -    * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
>>>> -    */
>>>>     add_to_commitlist(bl, new);
>>>>     spin_unlock(&bl->bl_ext_lock);
>>>>     return 0;
>>>> @@ -862,3 +857,52 @@ clean_pnfs_block_layoutupdate(struct
>>> pnfs_block_layout *bl,
>>>>             }
>>>>     }
>>>>  }
>>>> +
>>>> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
>>>> +{
>>>> +   struct pnfs_block_short_extent *new;
>>>> +
>>>> +   new = kmalloc(sizeof(*new), GFP_NOFS);
>>>> +   if (unlikely(!new))
>>>> +           return -ENOMEM;
>>>> +
>>>> +   spin_lock(&marks->im_lock);
>>>> +   list_add(&new->bse_node, &marks->im_extents);
>>>> +   spin_unlock(&marks->im_lock);
>>>> +
>>>> +   return 0;
>>>> +}
>>>> +
>>>> +struct pnfs_block_short_extent *
>>>> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
>>>> +{
>>>> +   struct pnfs_block_short_extent *rv = NULL;
>>>> +
>>>> +   spin_lock(&marks->im_lock);
>>>> +   if (!list_empty(&marks->im_extents)) {
>>>> +           rv = list_entry((&marks->im_extents)->next,
>>>> +                           struct pnfs_block_short_extent, bse_node);
>>>> +           list_del_init(&rv->bse_node);
>>>> +   }
>>>> +   spin_unlock(&marks->im_lock);
>>>> +
>>>> +   return rv;
>>>> +}
>>>> +
>>>> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
>>>> +{
>>>> +   struct pnfs_block_short_extent *se = NULL, *tmp;
>>>> +
>>>> +   BUG_ON(num_to_free <= 0);
>>>
>>> Why is it a bug if num_to_free is <= 0?  Couldn't you do:
>>>
>>> if (num_to_free <= 0)
>>>       return;
>>>
>>> instead?
>> It used to be a sanity check but was changed to BUG_ON in the second version.
>> Yeah, after giving it a second thought, I think it necessary to keep the sanity check. There are cases that num_to_free can be zero. E.g., a write_pagelist touches no INVALID extents, and one bio fails with disk failure. Then we will come to calling bl_free_short_extents() with num_to_free == 0. So the BUG_ON is really a bug itself... I will change it back to a normal sanity check.
>>
>> Thanks,
>> Tao
>>
>> From d4f49b042411da140c95559895c57a00a1920fdd Mon Sep 17 00:00:00 2001
>> From: Peng Tao <bergwolf@xxxxxxxxx>
>> Date: Thu, 10 Nov 2011 03:57:00 -0500
>> Subject: [PATCH 7/7] pnfsblock: alloc short extent before submit bio
>>
>> As discussed earlier, it is better for block client to allocate memory for
>> tracking extents state before submitting bio. So the patch does it by allocating
>> a short_extent for every INVALID extent touched by write pagelist and for
>> every zeroing page we created, saving them in layout header. Then in end_io we
>> can just use them to create commit list items and avoid memory allocation there.
>>
>> Signed-off-by: Peng Tao <peng_tao@xxxxxxx>
>> ---
>>  fs/nfs/blocklayout/blocklayout.c |   74 +++++++++++++++++++++++++++++--------
>>  fs/nfs/blocklayout/blocklayout.h |    9 ++++-
>>  fs/nfs/blocklayout/extents.c     |   63 +++++++++++++++++++++++++++-----
>>  3 files changed, 120 insertions(+), 26 deletions(-)
>>
>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
>> index 7fc69c9..1dd2983 100644
>> --- a/fs/nfs/blocklayout/blocklayout.c
>> +++ b/fs/nfs/blocklayout/blocklayout.c
>> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
>>   */
>>  struct parallel_io {
>>         struct kref refcnt;
>> -       void (*pnfs_callback) (void *data);
>> +       void (*pnfs_callback) (void *data, int num_se);
>>         void *data;
>> +       int bse_count;
>>  };
>>
>>  static inline struct parallel_io *alloc_parallel(void *data)
>> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
>>         if (rv) {
>>                 rv->data = data;
>>                 kref_init(&rv->refcnt);
>> +               rv->bse_count = 0;
>>         }
>>         return rv;
>>  }
>> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
>>         struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
>>
>>         dprintk("%s enter\n", __func__);
>> -       p->pnfs_callback(p->data);
>> +       p->pnfs_callback(p->data, p->bse_count);
>>         kfree(p);
>>  }
>>
>> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
>>  }
>>
>>  static void
>> -bl_end_par_io_read(void *data)
>> +bl_end_par_io_read(void *data, int unused)
>>  {
>>         struct nfs_read_data *rdata = data;
>>
>> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
>>  {
>>         sector_t isect, end;
>>         struct pnfs_block_extent *be;
>> +       struct pnfs_block_short_extent *se;
>>
>>         dprintk("%s(%llu, %u)\n", __func__, offset, count);
>>         if (count == 0)
>> @@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
>>                 be = bl_find_get_extent(bl, isect, NULL);
>>                 BUG_ON(!be); /* FIXME */
>>                 len = min(end, be->be_f_offset + be->be_length) - isect;
>> -               if (be->be_state == PNFS_BLOCK_INVALID_DATA)
>> -                       bl_mark_for_commit(be, isect, len); /* What if fails? */
>> +               if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
>> +                       se = bl_pop_one_short_extent(be->be_inval);
>> +                       BUG_ON(!se);
>> +                       bl_mark_for_commit(be, isect, len, se);
>> +               }
>>                 isect += len;
>>                 bl_put_extent(be);
>>         }
>> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>>                 end_page_writeback(page);
>>                 page_cache_release(page);
>>         } while (bvec >= bio->bi_io_vec);
>> -       if (!uptodate) {
>> +
>> +       if (unlikely(!uptodate)) {
>>                 if (!wdata->pnfs_error)
>>                         wdata->pnfs_error = -EIO;
>>                 pnfs_set_lo_fail(wdata->lseg);
>> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>>         put_parallel(par);
>>  }
>>
>> -/* This is basically copied from mpage_end_io_read */
>>  static void bl_end_io_write(struct bio *bio, int err)
>>  {
>>         struct parallel_io *par = bio->bi_private;
>> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
>>         dprintk("%s enter\n", __func__);
>>         task = container_of(work, struct rpc_task, u.tk_work);
>>         wdata = container_of(task, struct nfs_write_data, task);
>> -       if (!wdata->pnfs_error) {
>> +       if (likely(!wdata->pnfs_error)) {
>>                 /* Marks for LAYOUTCOMMIT */
>>                 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>>                                      wdata->args.offset, wdata->args.count);
>> @@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work)
>>  }
>>
>>  /* Called when last of bios associated with a bl_write_pagelist call finishes */
>> -static void bl_end_par_io_write(void *data)
>> +static void bl_end_par_io_write(void *data, int num_se)
>>  {
>>         struct nfs_write_data *wdata = data;
>>
>> +       if (unlikely(wdata->pnfs_error)) {
>> +               bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
>> +                                       num_se);
>> +       }
>> +
>>         wdata->task.tk_status = wdata->pnfs_error;
>>         wdata->verf.committed = NFS_FILE_SYNC;
>>         INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
>> @@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>>          */
>>         par = alloc_parallel(wdata);
>>         if (!par)
>> -               return PNFS_NOT_ATTEMPTED;
>> +               goto out_mds;
>>         par->pnfs_callback = bl_end_par_io_write;
>>         /* At this point, have to be more careful with error handling */
>>
>> @@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>>         be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
>>         if (!be || !is_writable(be, isect)) {
>>                 dprintk("%s no matching extents!\n", __func__);
>> -               wdata->pnfs_error = -EINVAL;
>> -               goto out;
>> +               goto out_mds;
>>         }
>>
>>         /* First page inside INVALID extent */
>>         if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
>> +               if (likely(!bl_push_one_short_extent(be->be_inval)))
>> +                       par->bse_count++;
>> +               else
>> +                       goto out_mds;
>>                 temp = offset >> PAGE_CACHE_SHIFT;
>>                 npg_zero = do_div(temp, npg_per_block);
>>                 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
>> @@ -598,6 +612,19 @@ fill_invalid_ext:
>>                                 wdata->pnfs_error = ret;
>>                                 goto out;
>>                         }
>> +                       if (likely(!bl_push_one_short_extent(be->be_inval)))
>> +                               par->bse_count++;
>> +                       else {
>> +                               end_page_writeback(page);
>> +                               page_cache_release(page);
>> +                               wdata->pnfs_error = -ENOMEM;
>> +                               goto out;
>> +                       }
>> +                       /* FIXME: This should be done in bi_end_io */
>> +                       mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>> +                                            page->index << PAGE_CACHE_SHIFT,
>> +                                            PAGE_CACHE_SIZE);
>> +
>>                         bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
>>                                                  isect, page, be,
>>                                                  bl_end_io_write_zero, par);
>> @@ -606,10 +633,6 @@ fill_invalid_ext:
>>                                 bio = NULL;
>>                                 goto out;
>>                         }
>> -                       /* FIXME: This should be done in bi_end_io */
>> -                       mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>> -                                            page->index << PAGE_CACHE_SHIFT,
>> -                                            PAGE_CACHE_SIZE);
>>  next_page:
>>                         isect += PAGE_CACHE_SECTORS;
>>                         extent_length -= PAGE_CACHE_SECTORS;
>> @@ -633,6 +656,15 @@ next_page:
>>                                 wdata->pnfs_error = -EINVAL;
>>                                 goto out;
>>                         }
>> +                       if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
>> +                               if (likely(!bl_push_one_short_extent(
>> +                                                               be->be_inval)))
>> +                                       par->bse_count++;
>> +                               else {
>> +                                       wdata->pnfs_error = -ENOMEM;
>> +                                       goto out;
>> +                               }
>> +                       }
>>                         extent_length = be->be_length -
>>                             (isect - be->be_f_offset);
>>                 }
>> @@ -680,6 +712,10 @@ out:
>>         bl_submit_bio(WRITE, bio);
>>         put_parallel(par);
>>         return PNFS_ATTEMPTED;
>> +out_mds:
>> +       bl_put_extent(be);
>> +       kfree(par);
>> +       return PNFS_NOT_ATTEMPTED;
>>  }
>>
>>  /* FIXME - range ignored */
>> @@ -706,11 +742,17 @@ static void
>>  release_inval_marks(struct pnfs_inval_markings *marks)
>>  {
>>         struct pnfs_inval_tracking *pos, *temp;
>> +       struct pnfs_block_short_extent *se, *stemp;
>>
>>         list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
>>                 list_del(&pos->it_link);
>>                 kfree(pos);
>>         }
>> +
>> +       list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
>> +               list_del(&se->bse_node);
>> +               kfree(se);
>> +       }
>>         return;
>>  }
>>
>> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
>> index 60728ac..e31a2df 100644
>> --- a/fs/nfs/blocklayout/blocklayout.h
>> +++ b/fs/nfs/blocklayout/blocklayout.h
>> @@ -70,6 +70,7 @@ struct pnfs_inval_markings {
>>         spinlock_t      im_lock;
>>         struct my_tree  im_tree;        /* Sectors that need LAYOUTCOMMIT */
>>         sector_t        im_block_size;  /* Server blocksize in sectors */
>> +       struct list_head im_extents;    /* Short extents for INVAL->RW conversion */
>>  };
>>
>>  struct pnfs_inval_tracking {
>> @@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
>>  {
>>         spin_lock_init(&marks->im_lock);
>>         INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
>> +       INIT_LIST_HEAD(&marks->im_extents);
>>         marks->im_block_size = blocksize;
>>         marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
>>                                            blocksize);
>> @@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
>>  int bl_add_merge_extent(struct pnfs_block_layout *bl,
>>                          struct pnfs_block_extent *new);
>>  int bl_mark_for_commit(struct pnfs_block_extent *be,
>> -                       sector_t offset, sector_t length);
>> +                       sector_t offset, sector_t length,
>> +                       struct pnfs_block_short_extent *new);
>> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
>> +struct pnfs_block_short_extent *
>> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
>> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
>>
>>  #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
>> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
>> index d0f52ed..6ce5a8d 100644
>> --- a/fs/nfs/blocklayout/extents.c
>> +++ b/fs/nfs/blocklayout/extents.c
>> @@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
>>
>>  /* Note the range described by offset, length is guaranteed to be contained
>>   * within be.
>> + * new will be freed, either by this function or add_to_commitlist if they
>> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
>>   */
>>  int bl_mark_for_commit(struct pnfs_block_extent *be,
>> -                   sector_t offset, sector_t length)
>> +                   sector_t offset, sector_t length,
>> +                   struct pnfs_block_short_extent *new)
>>  {
>>         sector_t new_end, end = offset + length;
>> -       struct pnfs_block_short_extent *new;
>>         struct pnfs_block_layout *bl = container_of(be->be_inval,
>>                                                     struct pnfs_block_layout,
>>                                                     bl_inval);
>>
>> -       new = kmalloc(sizeof(*new), GFP_NOFS);
>> -       if (!new)
>> -               return -ENOMEM;
>> -
>>         mark_written_sectors(be->be_inval, offset, length);
>>         /* We want to add the range to commit list, but it must be
>>          * block-normalized, and verified that the normalized range has
>> @@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
>>         new->bse_mdev = be->be_mdev;
>>
>>         spin_lock(&bl->bl_ext_lock);
>> -       /* new will be freed, either by add_to_commitlist if it decides not
>> -        * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
>> -        */
>>         add_to_commitlist(bl, new);
>>         spin_unlock(&bl->bl_ext_lock);
>>         return 0;
>> @@ -862,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
>>                 }
>>         }
>>  }
>> +
>> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
>> +{
>> +       struct pnfs_block_short_extent *new;
>> +
>> +       new = kmalloc(sizeof(*new), GFP_NOFS);
>> +       if (unlikely(!new))
>> +               return -ENOMEM;
>> +
>> +       spin_lock(&marks->im_lock);
>> +       list_add(&new->bse_node, &marks->im_extents);
>> +       spin_unlock(&marks->im_lock);
>> +
>> +       return 0;
>> +}
>> +
>> +struct pnfs_block_short_extent *
>> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
>> +{
>> +       struct pnfs_block_short_extent *rv = NULL;
>> +
>> +       spin_lock(&marks->im_lock);
>> +       if (!list_empty(&marks->im_extents)) {
>> +               rv = list_entry((&marks->im_extents)->next,
>> +                               struct pnfs_block_short_extent, bse_node);
>> +               list_del_init(&rv->bse_node);
>> +       }
>> +       spin_unlock(&marks->im_lock);
>> +
>> +       return rv;
>> +}
>> +
>> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
>> +{
>> +       struct pnfs_block_short_extent *se = NULL, *tmp;
>> +
>> +       if (num_to_free <= 0)
>> +               return;
>> +
>> +       spin_lock(&marks->im_lock);
>> +       list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
>> +               list_del(&se->bse_node);
>> +               kfree(se);
>> +               if (--num_to_free == 0)
>> +                       break;
>> +       }
>> +       spin_unlock(&marks->im_lock);
>> +
>> +       BUG_ON(num_to_free > 0);
>> +}
>> --
>> 1.7.4.2
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>



-- 
Thanks,
-Bergwolf
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux