Re: [PATCH 8/8] pnfsblock: alloc short extent before submit bio

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Tao,

I like the direction in this patch however
there are a couple nits below.

Thanks,

Benny

On 2011-11-09 17:16, Peng Tao wrote:
> As discussed earlier, it is better for block client to allocate memoroy for

nit: "memory"

> tracking extents state before submitting bio. So the patch does it by allocating
> a short_extent for every INVALID extent touched by write pagelist and for
> every zeroing page we created, saving them in layout header.
> Then in end_io we can just use them to create commit list items and avoid
> memory allocation there.
> 
> Signed-off-by: Peng Tao <peng_tao@xxxxxxx>
> ---
>  fs/nfs/blocklayout/blocklayout.c |   70 +++++++++++++++++++++++++++++---------
>  fs/nfs/blocklayout/blocklayout.h |    3 +-
>  fs/nfs/blocklayout/extents.c     |   13 ++-----
>  3 files changed, 60 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index cb4ff0f..53cd332 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
>   */
>  struct parallel_io {
>  	struct kref refcnt;
> -	void (*pnfs_callback) (void *data);
> +	void (*pnfs_callback) (void *data, int num_se);
>  	void *data;
> +	int bse_count;
>  };
>  
>  static inline struct parallel_io *alloc_parallel(void *data)
> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
>  	if (rv) {
>  		rv->data = data;
>  		kref_init(&rv->refcnt);
> +		rv->bse_count = 0;
>  	}
>  	return rv;
>  }
> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
>  	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
>  
>  	dprintk("%s enter\n", __func__);
> -	p->pnfs_callback(p->data);
> +	p->pnfs_callback(p->data, p->bse_count);
>  	kfree(p);
>  }
>  
> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
>  }
>  
>  static void
> -bl_end_par_io_read(void *data)
> +bl_end_par_io_read(void *data, int unused)
>  {
>  	struct nfs_read_data *rdata = data;
>  
> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
>  {
>  	sector_t isect, end;
>  	struct pnfs_block_extent *be;
> +	struct pnfs_block_short_extent *se;
>  
>  	dprintk("%s(%llu, %u)\n", __func__, offset, count);
>  	if (count == 0)
> @@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
>  		be = bl_find_get_extent(bl, isect, NULL);
>  		BUG_ON(!be); /* FIXME */
>  		len = min(end, be->be_f_offset + be->be_length) - isect;
> -		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
> -			bl_mark_for_commit(be, isect, len); /* What if fails? */
> +		if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> +			se = bl_pop_short_extent(be->be_inval, 1);
> +			BUG_ON(!se);
> +			bl_mark_for_commit(be, isect, len, se);
> +		}
>  		isect += len;
>  		bl_put_extent(be);
>  	}
> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>  		end_page_writeback(page);
>  		page_cache_release(page);
>  	} while (bvec >= bio->bi_io_vec);
> -	if (!uptodate) {
> +
> +	if (unlikely(!uptodate)) {
>  		if (!wdata->pnfs_error)
>  			wdata->pnfs_error = -EIO;
>  		pnfs_set_lo_fail(wdata->lseg);
> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
>  	put_parallel(par);
>  }
>  
> -/* This is basically copied from mpage_end_io_read */
>  static void bl_end_io_write(struct bio *bio, int err)
>  {
>  	struct parallel_io *par = bio->bi_private;
> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
>  	dprintk("%s enter\n", __func__);
>  	task = container_of(work, struct rpc_task, u.tk_work);
>  	wdata = container_of(task, struct nfs_write_data, task);
> -	if (!wdata->pnfs_error) {
> +	if (likely(!wdata->pnfs_error)) {
>  		/* Marks for LAYOUTCOMMIT */
>  		mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
>  				     wdata->args.offset, wdata->args.count);
> @@ -391,9 +397,16 @@ static void bl_write_cleanup(struct work_struct *work)
>  }
>  
>  /* Called when last of bios associated with a bl_write_pagelist call finishes */
> -static void bl_end_par_io_write(void *data)
> +static void bl_end_par_io_write(void *data, int num_se)
>  {
>  	struct nfs_write_data *wdata = data;
> +	struct pnfs_block_short_extent *se;
> +
> +	if (unlikely(wdata->pnfs_error)) {
> +		se = bl_pop_short_extent(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
> +					num_se);
> +		kfree(se);
> +	}
>  
>  	wdata->task.tk_status = wdata->pnfs_error;
>  	wdata->verf.committed = NFS_FILE_SYNC;
> @@ -548,7 +561,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>  	 */
>  	par = alloc_parallel(wdata);
>  	if (!par)
> -		return PNFS_NOT_ATTEMPTED;
> +		goto out_mds;
>  	par->pnfs_callback = bl_end_par_io_write;
>  	/* At this point, have to be more careful with error handling */
>  
> @@ -556,12 +569,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
>  	be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
>  	if (!be || !is_writable(be, isect)) {
>  		dprintk("%s no matching extents!\n", __func__);
> -		wdata->pnfs_error = -EINVAL;
> -		goto out;
> +		goto out_mds;
>  	}
>  
>  	/* First page inside INVALID extent */
>  	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> +		if (likely(!bl_push_one_short_extent(be->be_inval)))
> +			par->bse_count++;
> +		else
> +			goto out_mds;
>  		temp = offset >> PAGE_CACHE_SHIFT;
>  		npg_zero = do_div(temp, npg_per_block);
>  		isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
> @@ -598,6 +614,19 @@ fill_invalid_ext:
>  				wdata->pnfs_error = ret;
>  				goto out;
>  			}
> +			if (likely(!bl_push_one_short_extent(be->be_inval)))
> +				par->bse_count++;
> +			else {
> +				end_page_writeback(page);
> +				page_cache_release(page);
> +				wdata->pnfs_error = -ENOMEM;
> +				goto out;
> +			}
> +			/* FIXME: This should be done in bi_end_io */
> +			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> +					     page->index << PAGE_CACHE_SHIFT,
> +					     PAGE_CACHE_SIZE);
> +
>  			bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
>  						 isect, page, be,
>  						 bl_end_io_write_zero, par);
> @@ -606,10 +635,6 @@ fill_invalid_ext:
>  				bio = NULL;
>  				goto out;
>  			}
> -			/* FIXME: This should be done in bi_end_io */
> -			mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> -					     page->index << PAGE_CACHE_SHIFT,
> -					     PAGE_CACHE_SIZE);
>  next_page:
>  			isect += PAGE_CACHE_SECTORS;
>  			extent_length -= PAGE_CACHE_SECTORS;
> @@ -633,6 +658,14 @@ next_page:
>  				wdata->pnfs_error = -EINVAL;
>  				goto out;
>  			}
> +			if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> +			    if (likely(!bl_push_one_short_extent(be->be_inval)))

checkpatch nit: please use tabs, not spaces, for indent...

Benny

> +				par->bse_count++;
> +			    else {
> +				wdata->pnfs_error = -ENOMEM;
> +				goto out;
> +			    }
> +			}
>  			extent_length = be->be_length -
>  			    (isect - be->be_f_offset);
>  		}
> @@ -680,6 +713,11 @@ out:
>  	bl_submit_bio(WRITE, bio);
>  	put_parallel(par);
>  	return PNFS_ATTEMPTED;
> +out_mds:
> +	bl_put_extent(be);
> +	if (par)
> +		kfree(par);
> +	return PNFS_NOT_ATTEMPTED;
>  }
>  
>  /* FIXME - range ignored */
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index df0e0fb..4986c23 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -201,7 +201,8 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
>  int bl_add_merge_extent(struct pnfs_block_layout *bl,
>  			 struct pnfs_block_extent *new);
>  int bl_mark_for_commit(struct pnfs_block_extent *be,
> -			sector_t offset, sector_t length);
> +			sector_t offset, sector_t length,
> +			struct pnfs_block_short_extent *new);
>  int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
>  struct pnfs_block_short_extent*
>  bl_pop_short_extent(struct pnfs_inval_markings *marks, int num_to_pop);
> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
> index 72c7fa1..21da3a3 100644
> --- a/fs/nfs/blocklayout/extents.c
> +++ b/fs/nfs/blocklayout/extents.c
> @@ -370,20 +370,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
>  
>  /* Note the range described by offset, length is guaranteed to be contained
>   * within be.
> + * new will be freed, either by this function or add_to_commitlist if they
> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
>   */
>  int bl_mark_for_commit(struct pnfs_block_extent *be,
> -		    sector_t offset, sector_t length)
> +		    sector_t offset, sector_t length,
> +		    struct pnfs_block_short_extent *new)
>  {
>  	sector_t new_end, end = offset + length;
> -	struct pnfs_block_short_extent *new;
>  	struct pnfs_block_layout *bl = container_of(be->be_inval,
>  						    struct pnfs_block_layout,
>  						    bl_inval);
>  
> -	new = kmalloc(sizeof(*new), GFP_NOFS);
> -	if (!new)
> -		return -ENOMEM;
> -
>  	mark_written_sectors(be->be_inval, offset, length);
>  	/* We want to add the range to commit list, but it must be
>  	 * block-normalized, and verified that the normalized range has
> @@ -413,9 +411,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
>  	new->bse_mdev = be->be_mdev;
>  
>  	spin_lock(&bl->bl_ext_lock);
> -	/* new will be freed, either by add_to_commitlist if it decides not
> -	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
> -	 */
>  	add_to_commitlist(bl, new);
>  	spin_unlock(&bl->bl_ext_lock);
>  	return 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux