Re: [PATCH-v2 7/7] pnfsblock: alloc short extent before submit bio

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/10/2011 10:25 AM, Peng Tao wrote:
> As discussed earlier, it is better for block client to allocate memory for
> tracking extents state before submitting bio. So the patch does it by allocating
> a short_extent for every INVALID extent touched by write pagelist and for
> every zeroing page we created, saving them in layout header. Then in end_io we
> can just use them to create commit list items and avoid memory allocation there.
> 
> Signed-off-by: Peng Tao <peng_tao@xxxxxxx>
> ---
>  fs/nfs/blocklayout/blocklayout.c |   74 +++++++++++++++++++++++++++++--------
>  fs/nfs/blocklayout/blocklayout.h |    9 ++++-
>  fs/nfs/blocklayout/extents.c     |   62 +++++++++++++++++++++++++++-----
>  3 files changed, 119 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index 3eaea2b..3fdfb29 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
>   */
>  struct parallel_io {
>  	struct kref refcnt;
> -	void (*pnfs_callback) (void *data);
> +	void (*pnfs_callback) (void *data, int num_se);
>  	void *data;
> +	int bse_count;
>  };
>  
>  static inline struct parallel_io *alloc_parallel(void *data)
> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
>  	if (rv) {
>  		rv->data = data;
>  		kref_init(&rv->refcnt);
> +		rv->bse_count = 0;
>  	}
>  	return rv;
>  }
> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
>  	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
>  
>  	dprintk("%s enter\n", __func__);
> -	p->pnfs_callback(p->data);
> +	p->pnfs_callback(p->data, p->bse_count);
>  	kfree(p);
>  }
>  
> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
>  }
>  
>  static void
> -bl_end_par_io_read(void *data)
> +bl_end_par_io_read(void *data, int unused)
>  {
>  	struct nfs_read_data *rdata = data;
>  
> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
>  {
>  	sector_t isect, end;
>  	struct pnfs_block_extent *be;
> +	struct pnfs_block_short_extent *se;
>  
>  	dprintk("%s(%llu, %u)\n", __func__, offset, count);
>  	if (count == 0)
> @@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
>  		be = bl_find_get_extent(bl, isect, NULL);
>  		BUG_ON(!be); /* FIXME */
>  		len = min(end, be->be_f_offset + be->be_length) - isect;
> -		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
> -			bl_mark_for_commit(be, isect, len); /* What if fails? */
> +		if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> +			se = bl_pop_one_short_extent(be->be_inval);
> +			BUG_ON(!se);
> +			bl_mark_for_commit(be, isect, len, se);
> +		}
>  		isect += len;
>  		bl_put_extent(be);
>  	}
> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>  		end_page_writeback(page);
>  		page_cache_release(page);
>  	} while (bvec >= bio->bi_io_vec);
> -	if (!uptodate) {
> +
> +	if (unlikely(!uptodate)) {
>  		if (!wdata->pnfs_error)
>  			wdata->pnfs_error = -EIO;
>  		pnfs_set_lo_fail(wdata->lseg);
> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>  	put_parallel(par);
>  }
>  
> -/* This is basically copied from mpage_end_io_read */
>  static void bl_end_io_write(struct bio *bio, int err)
>  {
>  	struct parallel_io *par = bio->bi_private;
> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
>  	dprintk("%s enter\n", __func__);
>  	task = container_of(work, struct rpc_task, u.tk_work);
>  	wdata = container_of(task, struct nfs_write_data, task);
> -	if (!wdata->pnfs_error) {
> +	if (likely(!wdata->pnfs_error)) {
>  		/* Marks for LAYOUTCOMMIT */
>  		mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>  				     wdata->args.offset, wdata->args.count);
> @@ -391,10 +397,15 @@ static void bl_write_cleanup(struct work_struct *work)
>  }
>  
>  /* Called when last of bios associated with a bl_write_pagelist call finishes */
> -static void bl_end_par_io_write(void *data)
> +static void bl_end_par_io_write(void *data, int num_se)
>  {
>  	struct nfs_write_data *wdata = data;
>  
> +	if (unlikely(wdata->pnfs_error)) {
> +		bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
> +					num_se);
> +	}
> +
>  	wdata->task.tk_status = wdata->pnfs_error;
>  	wdata->verf.committed = NFS_FILE_SYNC;
>  	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
> @@ -547,7 +558,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>  	 */
>  	par = alloc_parallel(wdata);
>  	if (!par)
> -		return PNFS_NOT_ATTEMPTED;
> +		goto out_mds;
>  	par->pnfs_callback = bl_end_par_io_write;
>  	/* At this point, have to be more careful with error handling */
>  
> @@ -555,12 +566,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>  	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
>  	if (!be || !is_writable(be, isect)) {
>  		dprintk("%s no matching extents!\n", __func__);
> -		wdata->pnfs_error = -EINVAL;
> -		goto out;
> +		goto out_mds;
>  	}
>  
>  	/* First page inside INVALID extent */
>  	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> +		if (likely(!bl_push_one_short_extent(be->be_inval)))
> +			par->bse_count++;
> +		else
> +			goto out_mds;
>  		temp = offset >> PAGE_CACHE_SHIFT;
>  		npg_zero = do_div(temp, npg_per_block);
>  		isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
> @@ -598,6 +612,19 @@ fill_invalid_ext:
>  				wdata->pnfs_error = ret;
>  				goto out;
>  			}
> +			if (likely(!bl_push_one_short_extent(be->be_inval)))
> +				par->bse_count++;
> +			else {
> +				end_page_writeback(page);
> +				page_cache_release(page);
> +				wdata->pnfs_error = -ENOMEM;
> +				goto out;
> +			}
> +			/* FIXME: This should be done in bi_end_io */
> +			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> +					     page->index << PAGE_CACHE_SHIFT,
> +					     PAGE_CACHE_SIZE);
> +
>  			bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
>  						 isect, page, be,
>  						 bl_end_io_write_zero, par);
> @@ -606,10 +633,6 @@ fill_invalid_ext:
>  				bio = NULL;
>  				goto out;
>  			}
> -			/* FIXME: This should be done in bi_end_io */
> -			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> -					     page->index << PAGE_CACHE_SHIFT,
> -					     PAGE_CACHE_SIZE);
>  next_page:
>  			isect += PAGE_CACHE_SECTORS;
>  			extent_length -= PAGE_CACHE_SECTORS;
> @@ -633,6 +656,15 @@ next_page:
>  				wdata->pnfs_error = -EINVAL;
>  				goto out;
>  			}
> +			if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> +				if (likely(!bl_push_one_short_extent(
> +								be->be_inval)))
> +					par->bse_count++;
> +				else {
> +					wdata->pnfs_error = -ENOMEM;
> +					goto out;
> +				}
> +			}
>  			extent_length = be->be_length -
>  			    (isect - be->be_f_offset);
>  		}
> @@ -680,6 +712,10 @@ out:
>  	bl_submit_bio(WRITE, bio);
>  	put_parallel(par);
>  	return PNFS_ATTEMPTED;
> +out_mds:
> +	bl_put_extent(be);
> +	kfree(par);
> +	return PNFS_NOT_ATTEMPTED;
>  }
>  
>  /* FIXME - range ignored */
> @@ -706,11 +742,17 @@ static void
>  release_inval_marks(struct pnfs_inval_markings *marks)
>  {
>  	struct pnfs_inval_tracking *pos, *temp;
> +	struct pnfs_block_short_extent *se, *stemp;
>  
>  	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
>  		list_del(&pos->it_link);
>  		kfree(pos);
>  	}
> +
> +	list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
> +		list_del(&se->bse_node);
> +		kfree(se);
> +	}
>  	return;
>  }
>  
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index 60728ac..e31a2df 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -70,6 +70,7 @@ struct pnfs_inval_markings {
>  	spinlock_t	im_lock;
>  	struct my_tree	im_tree;	/* Sectors that need LAYOUTCOMMIT */
>  	sector_t	im_block_size;	/* Server blocksize in sectors */
> +	struct list_head im_extents;	/* Short extents for INVAL->RW conversion */
>  };
>  
>  struct pnfs_inval_tracking {
> @@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
>  {
>  	spin_lock_init(&marks->im_lock);
>  	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
> +	INIT_LIST_HEAD(&marks->im_extents);
>  	marks->im_block_size = blocksize;
>  	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
>  					   blocksize);
> @@ -199,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
>  int bl_add_merge_extent(struct pnfs_block_layout *bl,
>  			 struct pnfs_block_extent *new);
>  int bl_mark_for_commit(struct pnfs_block_extent *be,
> -			sector_t offset, sector_t length);
> +			sector_t offset, sector_t length,
> +			struct pnfs_block_short_extent *new);
> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
> +struct pnfs_block_short_extent *
> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
>  
>  #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
> index d0f52ed..db66e68 100644
> --- a/fs/nfs/blocklayout/extents.c
> +++ b/fs/nfs/blocklayout/extents.c
> @@ -369,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
>  
>  /* Note the range described by offset, length is guaranteed to be contained
>   * within be.
> + * new will be freed, either by this function or add_to_commitlist if they
> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
>   */
>  int bl_mark_for_commit(struct pnfs_block_extent *be,
> -		    sector_t offset, sector_t length)
> +		    sector_t offset, sector_t length,
> +		    struct pnfs_block_short_extent *new)
>  {
>  	sector_t new_end, end = offset + length;
> -	struct pnfs_block_short_extent *new;
>  	struct pnfs_block_layout *bl = container_of(be->be_inval,
>  						    struct pnfs_block_layout,
>  						    bl_inval);
>  
> -	new = kmalloc(sizeof(*new), GFP_NOFS);
> -	if (!new)
> -		return -ENOMEM;
> -
>  	mark_written_sectors(be->be_inval, offset, length);
>  	/* We want to add the range to commit list, but it must be
>  	 * block-normalized, and verified that the normalized range has
> @@ -412,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
>  	new->bse_mdev = be->be_mdev;
>  
>  	spin_lock(&bl->bl_ext_lock);
> -	/* new will be freed, either by add_to_commitlist if it decides not
> -	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
> -	 */
>  	add_to_commitlist(bl, new);
>  	spin_unlock(&bl->bl_ext_lock);
>  	return 0;
> @@ -862,3 +857,52 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
>  		}
>  	}
>  }
> +
> +int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
> +{
> +	struct pnfs_block_short_extent *new;
> +
> +	new = kmalloc(sizeof(*new), GFP_NOFS);
> +	if (unlikely(!new))
> +		return -ENOMEM;
> +
> +	spin_lock(&marks->im_lock);
> +	list_add(&new->bse_node, &marks->im_extents);
> +	spin_unlock(&marks->im_lock);
> +
> +	return 0;
> +}
> +
> +struct pnfs_block_short_extent *
> +bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
> +{
> +	struct pnfs_block_short_extent *rv = NULL;
> +
> +	spin_lock(&marks->im_lock);
> +	if (!list_empty(&marks->im_extents)) {
> +		rv = list_entry((&marks->im_extents)->next,
> +				struct pnfs_block_short_extent, bse_node);
> +		list_del_init(&rv->bse_node);
> +	}
> +	spin_unlock(&marks->im_lock);
> +
> +	return rv;
> +}
> +
> +void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
> +{
> +	struct pnfs_block_short_extent *se = NULL, *tmp;
> +
> +	BUG_ON(num_to_free <= 0);

Why is it a bug if num_to_free is <= 0?  Couldn't you do:

if (num_to_free <= 0)
	return;

instead?

- Bryan

> +
> +	spin_lock(&marks->im_lock);
> +	list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
> +		list_del(&se->bse_node);
> +		kfree(se);
> +		if (--num_to_free == 0)
> +			break;
> +	}
> +	spin_unlock(&marks->im_lock);
> +
> +	BUG_ON(num_to_free > 0);
> +}

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux