Re: [PATCH v5] rbd: fix the memory leak of bio_chain_clone

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 09/06/2012 06:30 AM, Guangliang Zhao wrote:
> The bio_pair alloced in bio_chain_clone would not be freed,
> this will cause a memory leak. It could be freed actually only
> after 3 times release, because the reference count of bio_pair
> is initialized to 3 when bio_split and bio_pair_release only
> drops the reference count.
> 
> The function bio_pair_release must be called three times for
> releasing bio_pair, and the callback functions of bios on the
> requests will be called when the last release time in bio_pair_release,
> however, these functions will also be called in rbd_req_cb. In
> other words, they will be called twice, and it may cause serious
> consequences.


I just want you to know I'm looking at this patch now.  This is a
pretty complex bit of code though, so it may take me a bit to get
back to you.

					-Alex

> This patch clones bio chain from the origin directly instead of
> bio_split. The old bios which will be split may be modified by
> the callback fn, so their copys need to be saved(called split_bio).
> The new bio chain can be released whenever we don't need it.
> 
> This patch can just handle the split of *single page* bios, but
> it's enough here for the following reasons:
> 
> Only bios span across multiple osds need to be split, and these bios
> *must* be single page because of rbd_merge_bvec. With the function,
> the new bvec will not permitted to merge, if it make the bio cross
> the osd boundary, except it is the first one. In other words, there
> are two types of bio:
> 
> 	- the bios don't cross the osd boundary
> 	  They have one or more pages. The value of offset will
> 	  always be 0 in this case, so nothing will be changed, and
> 	  the code changes tmp bios doesn't take effact at all.
> 
> 	- the bios cross the osd boundary
> 	  Each one have only one page. These bios need to be split,
> 	  and the offset is used to indicate the next bio, it makes
> 	  sense only in this instance.
> 
> The original bios may be modifid by the callback fn before the next 
> bio_chain_clone() called, when a bio need to be split, so its copy 
> will be saved.
> 
> Signed-off-by: Guangliang Zhao <gzhao@xxxxxxxx>
> ---
> 
>  drivers/block/rbd.c |  102 ++++++++++++++++++++++++++++++---------------------
>  1 file changed, 60 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 9917943..a605e1c 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -717,50 +717,70 @@ static void zero_bio_chain(struct bio *chain, int start_ofs)
>  	}
>  }
>  
> -/*
> +/**
>   * bio_chain_clone - clone a chain of bios up to a certain length.
> - * might return a bio_pair that will need to be released.
> + * @old: bio to clone
> + * @split_bio: bio which will be split
> + * @offset: start point for bio clone
> + * @len: length of bio chain
> + * @gfp_mask: allocation priority
> + *
> + * Value of split_bio will be !NULL only when there is a bio need to be
> + * split. NULL otherwise.
> + *
> + * RETURNS:
> + * Pointer to new bio chain on success, NULL on failure.
>   */
> -static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
> -				   struct bio_pair **bp,
> -				   int len, gfp_t gfpmask)
> +static struct bio *bio_chain_clone(struct bio **old, struct bio **split_bio,
> +				   int *offset, int len, gfp_t gfpmask)
>  {
> -	struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
> -	int total = 0;
> -
> -	if (*bp) {
> -		bio_pair_release(*bp);
> -		*bp = NULL;
> -	}
> +	struct bio *tmp, *old_chain, *split, *new_chain = NULL, *tail = NULL;
> +	int total = 0, need = len;
>  
> +	split = *split_bio;
> +	old_chain = split ? split : *old;
>  	while (old_chain && (total < len)) {
>  		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
>  		if (!tmp)
>  			goto err_out;
>  
> -		if (total + old_chain->bi_size > len) {
> -			struct bio_pair *bp;
> -
> -			/*
> -			 * this split can only happen with a single paged bio,
> -			 * split_bio will BUG_ON if this is not the case
> -			 */
> -			dout("bio_chain_clone split! total=%d remaining=%d"
> -			     "bi_size=%u\n",
> -			     total, len - total, old_chain->bi_size);
> -
> -			/* split the bio. We'll release it either in the next
> -			   call, or it will have to be released outside */
> -			bp = bio_split(old_chain, (len - total) / SECTOR_SIZE);
> -			if (!bp)
> -				goto err_out;
> -
> -			__bio_clone(tmp, &bp->bio1);
> -
> -			*next = &bp->bio2;
> +		__bio_clone(tmp, old_chain);
> +		tmp->bi_sector += *offset >> SECTOR_SHIFT;
> +		tmp->bi_io_vec->bv_offset += *offset >> SECTOR_SHIFT;
> +		/*
> +		 * The bios span across multiple osd objects must be
> +		 * single paged, rbd_merge_bvec would guarantee it.
> +		 * So we needn't worry about other things.
> +		 */
> +		if (tmp->bi_size - *offset > need) {
> +			tmp->bi_size = need;
> +			tmp->bi_io_vec->bv_len = need;
> +			*offset += need;
> +			if (!split) {
> +				/*
> +				 * Because the old bio may be modified by the
> +				 * callback function, its copy should be saved.
> +				 */
> +				split = bio_clone(old_chain, gfpmask);
> +				/*
> +				 * This is always the last allocation in this
> +				 * loop, so we only need release the bio chain
> +				 * when failed.
> +				 */
> +				if (!split)
> +					goto err_out;
> +
> +				split->bi_next = old_chain->bi_next;
> +			}
>  		} else {
> -			__bio_clone(tmp, old_chain);
> -			*next = old_chain->bi_next;
> +			old_chain = old_chain->bi_next;
> +			tmp->bi_size -= *offset;
> +			tmp->bi_io_vec->bv_len -= *offset;
> +			*offset = 0;
> +			if (split) {
> +				bio_put(split);
> +				split = NULL;
> +			}
>  		}
>  
>  		tmp->bi_bdev = NULL;
> @@ -773,9 +793,9 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
>  			tail->bi_next = tmp;
>  			tail = tmp;
>  		}
> -		old_chain = old_chain->bi_next;
>  
>  		total += tmp->bi_size;
> +		need -= tmp->bi_size;
>  	}
>  
>  	BUG_ON(total < len);
> @@ -784,6 +804,7 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
>  		tail->bi_next = NULL;
>  
>  	*old = old_chain;
> +	*split_bio = split;
>  
>  	return new_chain;
>  
> @@ -1434,16 +1455,15 @@ static void rbd_rq_fn(struct request_queue *q)
>  {
>  	struct rbd_device *rbd_dev = q->queuedata;
>  	struct request *rq;
> -	struct bio_pair *bp = NULL;
>  
>  	while ((rq = blk_fetch_request(q))) {
>  		struct bio *bio;
> -		struct bio *rq_bio, *next_bio = NULL;
> +		struct bio *rq_bio, *split_bio = NULL;
>  		bool do_write;
>  		unsigned int size;
>  		u64 op_size = 0;
>  		u64 ofs;
> -		int num_segs, cur_seg = 0;
> +		int num_segs, cur_seg = 0, offset = 0;
>  		struct rbd_req_coll *coll;
>  		struct ceph_snap_context *snapc;
>  
> @@ -1507,7 +1527,7 @@ static void rbd_rq_fn(struct request_queue *q)
>  						  ofs, size,
>  						  NULL, NULL);
>  			kref_get(&coll->kref);
> -			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
> +			bio = bio_chain_clone(&rq_bio, &split_bio, &offset,
>  					      op_size, GFP_ATOMIC);
>  			if (!bio) {
>  				rbd_coll_end_req_index(rq, coll, cur_seg,
> @@ -1535,12 +1555,10 @@ next_seg:
>  			ofs += op_size;
>  
>  			cur_seg++;
> -			rq_bio = next_bio;
>  		} while (size > 0);
>  		kref_put(&coll->kref, rbd_coll_release);
>  
> -		if (bp)
> -			bio_pair_release(bp);
> +		BUG_ON(split_bio);
>  		spin_lock_irq(q->queue_lock);
>  
>  		ceph_put_snap_context(snapc);
> 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux