Re: [PATCH RFC] allow bio code to unmap sg io requests and have blk_execute_rq_nowait bounce bios

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jul 29 2005, Mike Christie wrote:
> Hey Jens and James,
> 
> The inlined patch moves the bounce buffer handling to blk_execute_rq_nowait
> so the scsi, sg io and cdrom code does not have to handle it. To accomplish
> this I moved the bio_uncopy_user to a bi_end_io function and bio_unmap_user
> to a work struct that is schedule from a bi_end_io functions. Did you say
> you disliked the idea of calling bio_unmap_user from a work struct - don't
> remember and I lost my emails when I moved? :(

It's probably alright, it cleans up the code a lot. It will cost some
extra context switches, but I'm naively hoping that for busy io we will
have good batching of the processing anyways.

> In my patch there is the possiblity of pages being marked dirty after the
> request has been returned to userspace instead of the pages being dirtied
> before in the bio_unmap_user. Is this OK?

I _think_ this is ok, since we still have the page pinned at this point.
Andrew?

> And, as I work on the adding of BLKERR_* error values in replacement
> of the dm-multupath/bio sense patch, I was thinking about your comment
> about having one true make_request function. It seems like if we
> extended bios to handle more request stuff (like what is needed for SG
> IO) we could just pass the bio to __make_request - with some
> modifications to __make_request - instead of doing it request based
> and moving the blk_queue_bounce call to blk_execute_rq_nowait  like I
> did in my patch. Is this what you were thinking when adding the bio
> sense code?

I'm cautiously not extending struct bio purposely, this sort of
functionality should remain with struct request. bio is just a page
container, lets not get away from that concept.

My plan for dm etc stacked drivers is to have them register a
request_fn() that will handle processing of special requests.

> Patch was made against James scsi-block-2.6 tree.
> Signed-off-by: Mike Christie <michaelc@xxxxxxxxxxx>

I will update the 2.6 block tree.
> 
> 
> diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
> --- a/drivers/block/ll_rw_blk.c
> +++ b/drivers/block/ll_rw_blk.c
> @@ -2193,31 +2193,6 @@ int blk_rq_map_user_iov(request_queue_t 
>  EXPORT_SYMBOL(blk_rq_map_user_iov);
>  
>  /**
> - * blk_rq_unmap_user - unmap a request with user data
> - * @rq:		request to be unmapped
> - * @bio:	bio for the request
> - * @ulen:	length of user buffer
> - *
> - * Description:
> - *    Unmap a request previously mapped by blk_rq_map_user().
> - */
> -int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
> -{
> -	int ret = 0;
> -
> -	if (bio) {
> -		if (bio_flagged(bio, BIO_USER_MAPPED))
> -			bio_unmap_user(bio);
> -		else
> -			ret = bio_uncopy_user(bio);
> -	}
> -
> -	return 0;
> -}
> -
> -EXPORT_SYMBOL(blk_rq_unmap_user);
> -
> -/**
>   * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
>   * @q:		request queue where request should be inserted
>   * @rw:		READ or WRITE data
> @@ -2257,6 +2232,9 @@ void blk_execute_rq_nowait(request_queue
>  {
>  	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
>  
> +        if (rq->bio)
> +                blk_queue_bounce(q, &rq->bio);
> +
>  	rq->rq_disk = bd_disk;
>  	rq->flags |= REQ_NOMERGE;
>  	rq->end_io = done;
> diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c
> --- a/drivers/block/scsi_ioctl.c
> +++ b/drivers/block/scsi_ioctl.c
> @@ -218,7 +218,6 @@ static int sg_io(struct file *file, requ
>  	unsigned long start_time;
>  	int writing = 0, ret = 0;
>  	struct request *rq;
> -	struct bio *bio;
>  	char sense[SCSI_SENSE_BUFFERSIZE];
>  	unsigned char cmd[BLK_MAX_CDB];
>  
> @@ -287,14 +286,6 @@ static int sg_io(struct file *file, requ
>  	rq->sense_len = 0;
>  
>  	rq->flags |= REQ_BLOCK_PC;
> -	bio = rq->bio;
> -
> -	/*
> -	 * bounce this after holding a reference to the original bio, it's
> -	 * needed for proper unmapping
> -	 */
> -	if (rq->bio)
> -		blk_queue_bounce(q, &rq->bio);
>  
>  	rq->timeout = (hdr->timeout * HZ) / 1000;
>  	if (!rq->timeout)
> @@ -330,9 +321,6 @@ static int sg_io(struct file *file, requ
>  			hdr->sb_len_wr = len;
>  	}
>  
> -	if (blk_rq_unmap_user(bio, hdr->dxfer_len))
> -		ret = -EFAULT;
> -
>  	/* may not have succeeded, but output values written to control
>  	 * structure (struct sg_io_hdr).  */
>  out:
> diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
> --- a/drivers/cdrom/cdrom.c
> +++ b/drivers/cdrom/cdrom.c
> @@ -2090,7 +2090,6 @@ static int cdrom_read_cdda_bpc(struct cd
>  {
>  	request_queue_t *q = cdi->disk->queue;
>  	struct request *rq;
> -	struct bio *bio;
>  	unsigned int len;
>  	int nr, ret = 0;
>  
> @@ -2131,10 +2130,6 @@ static int cdrom_read_cdda_bpc(struct cd
>  		rq->cmd_len = 12;
>  		rq->flags |= REQ_BLOCK_PC;
>  		rq->timeout = 60 * HZ;
> -		bio = rq->bio;
> -
> -		if (rq->bio)
> -			blk_queue_bounce(q, &rq->bio);
>  
>  		if (blk_execute_rq(q, cdi->disk, rq, 0)) {
>  			struct request_sense *s = rq->sense;
> @@ -2142,9 +2137,6 @@ static int cdrom_read_cdda_bpc(struct cd
>  			cdi->last_sense = s->sense_key;
>  		}
>  
> -		if (blk_rq_unmap_user(bio, len))
> -			ret = -EFAULT;
> -
>  		if (ret)
>  			break;
>  
> diff --git a/fs/bio.c b/fs/bio.c
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -432,13 +432,15 @@ static struct bio_map_data *bio_alloc_ma
>  }
>  
>  /**
> - *	bio_uncopy_user	-	finish previously mapped bio
> + *	bio_uncopy_user_endio	-	finish previously mapped bio
>   *	@bio: bio being terminated
> + *	@bytes: bytes completed
> + *	@err: completion status
>   *
>   *	Free pages allocated from bio_copy_user() and write back data
>   *	to user space in case of a read.
>   */
> -int bio_uncopy_user(struct bio *bio)
> +static int bio_uncopy_user_endio(struct bio *bio, unsigned int bytes, int err)
>  {
>  	struct bio_map_data *bmd = bio->bi_private;
>  	const int read = bio_data_dir(bio) == READ;
> @@ -457,7 +459,7 @@ int bio_uncopy_user(struct bio *bio)
>  	}
>  	bio_free_map_data(bmd);
>  	bio_put(bio);
> -	return ret;
> +	return 0;
>  }
>  
>  /**
> @@ -494,6 +496,7 @@ struct bio *bio_copy_user(request_queue_
>  		goto out_bmd;
>  
>  	bio->bi_rw |= (!write_to_vm << BIO_RW);
> +	bio->bi_end_io = bio_uncopy_user_endio;
>  
>  	ret = 0;
>  	while (len) {
> @@ -550,6 +553,55 @@ out_bmd:
>  	return ERR_PTR(ret);
>  }
>  
> +static void __bio_unmap_user(struct bio *bio)
> +{
> +	struct bio_vec *bvec;
> +	int i;
> +
> +	/*
> +	 * make sure we dirty pages we wrote to
> +	 */
> +	__bio_for_each_segment(bvec, bio, i, 0) {
> +		if (bio_data_dir(bio) == READ)
> +			set_page_dirty_lock(bvec->bv_page);
> +
> +		page_cache_release(bvec->bv_page);
> +	}
> +
> +	kfree(bio->bi_private);
> +}
> +
> +/**
> + *	bio_unmap_user	-	unmap a bio
> + *	@bio:		the bio being unmapped
> + *
> + *	Unmap a bio previously mapped by bio_map_user(). Must be called with
> + *	a process context.
> + *
> + *	bio_unmap_user() may sleep.
> + */
> +static void bio_unmap_user(void *data)
> +{
> +	struct bio *bio = data;
> +
> +	__bio_unmap_user(bio);
> +	bio_put(bio);
> +}
> +
> +
> +static int bio_unmap_user_endio(struct bio *bio, unsigned int bytes, int err)
> +{
> +	struct work_struct *work;
> +
> +	if (bio->bi_size)
> +		return 1;
> +
> +	work = bio->bi_private;
> +	INIT_WORK(work, bio_unmap_user, bio); 
> +	schedule_work(work);
> +	return 0;
> +}
> +
>  static struct bio *__bio_map_user_iov(request_queue_t *q,
>  				      struct block_device *bdev,
>  				      struct sg_iovec *iov, int iov_count,
> @@ -557,7 +609,7 @@ static struct bio *__bio_map_user_iov(re
>  {
>  	int i, j;
>  	int nr_pages = 0;
> -	struct page **pages;
> +	struct page **pages = NULL;
>  	struct bio *bio;
>  	int cur_page = 0;
>  	int ret, offset;
> @@ -585,6 +637,10 @@ static struct bio *__bio_map_user_iov(re
>  		return ERR_PTR(-ENOMEM);
>  
>  	ret = -ENOMEM;
> +	bio->bi_private = kmalloc(GFP_KERNEL, sizeof(struct work_struct));
> +	if (!bio->bi_private)
> +		goto out;
> +
>  	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
>  	if (!pages)
>  		goto out;
> @@ -645,6 +701,7 @@ static struct bio *__bio_map_user_iov(re
>  	if (!write_to_vm)
>  		bio->bi_rw |= (1 << BIO_RW);
>  
> +	bio->bi_end_io = bio_unmap_user_endio;
>  	bio->bi_bdev = bdev;
>  	bio->bi_flags |= (1 << BIO_USER_MAPPED);
>  	return bio;
> @@ -656,6 +713,7 @@ static struct bio *__bio_map_user_iov(re
>  		page_cache_release(pages[i]);
>  	}
>   out:
> +	kfree(bio->bi_private);
>  	kfree(pages);
>  	bio_put(bio);
>  	return ERR_PTR(ret);
> @@ -706,14 +764,6 @@ struct bio *bio_map_user_iov(request_que
>  	if (IS_ERR(bio))
>  		return bio;
>  
> -	/*
> -	 * subtle -- if __bio_map_user() ended up bouncing a bio,
> -	 * it would normally disappear when its bi_end_io is run.
> -	 * however, we need it for the unmap, so grab an extra
> -	 * reference to it
> -	 */
> -	bio_get(bio);
> -
>  	for (i = 0; i < iov_count; i++)
>  		len += iov[i].iov_len;
>  
> @@ -724,43 +774,9 @@ struct bio *bio_map_user_iov(request_que
>  	 * don't support partial mappings
>  	 */
>  	bio_endio(bio, bio->bi_size, 0);
> -	bio_unmap_user(bio);
>  	return ERR_PTR(-EINVAL);
>  }
>  
> -static void __bio_unmap_user(struct bio *bio)
> -{
> -	struct bio_vec *bvec;
> -	int i;
> -
> -	/*
> -	 * make sure we dirty pages we wrote to
> -	 */
> -	__bio_for_each_segment(bvec, bio, i, 0) {
> -		if (bio_data_dir(bio) == READ)
> -			set_page_dirty_lock(bvec->bv_page);
> -
> -		page_cache_release(bvec->bv_page);
> -	}
> -
> -	bio_put(bio);
> -}
> -
> -/**
> - *	bio_unmap_user	-	unmap a bio
> - *	@bio:		the bio being unmapped
> - *
> - *	Unmap a bio previously mapped by bio_map_user(). Must be called with
> - *	a process context.
> - *
> - *	bio_unmap_user() may sleep.
> - */
> -void bio_unmap_user(struct bio *bio)
> -{
> -	__bio_unmap_user(bio);
> -	bio_put(bio);
> -}
> -
>  static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
>  {
>  	if (bio->bi_size)
> @@ -1223,13 +1239,11 @@ EXPORT_SYMBOL(bio_hw_segments);
>  EXPORT_SYMBOL(bio_add_page);
>  EXPORT_SYMBOL(bio_get_nr_vecs);
>  EXPORT_SYMBOL(bio_map_user);
> -EXPORT_SYMBOL(bio_unmap_user);
>  EXPORT_SYMBOL(bio_map_kern);
>  EXPORT_SYMBOL(bio_pair_release);
>  EXPORT_SYMBOL(bio_split);
>  EXPORT_SYMBOL(bio_split_pool);
>  EXPORT_SYMBOL(bio_copy_user);
> -EXPORT_SYMBOL(bio_uncopy_user);
>  EXPORT_SYMBOL(bioset_create);
>  EXPORT_SYMBOL(bioset_free);
>  EXPORT_SYMBOL(bio_alloc_bioset);
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -285,13 +285,11 @@ struct sg_iovec;
>  extern struct bio *bio_map_user_iov(struct request_queue *,
>  				    struct block_device *,
>  				    struct sg_iovec *, int, int);
> -extern void bio_unmap_user(struct bio *);
>  extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
>  				unsigned int);
>  extern void bio_set_pages_dirty(struct bio *bio);
>  extern void bio_check_pages_dirty(struct bio *bio);
>  extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
> -extern int bio_uncopy_user(struct bio *);
>  void zero_fill_bio(struct bio *bio);
>  
>  #ifdef CONFIG_HIGHMEM
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -553,7 +553,6 @@ extern void __blk_stop_queue(request_que
>  extern void blk_run_queue(request_queue_t *);
>  extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
>  extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
> -extern int blk_rq_unmap_user(struct bio *, unsigned int);
>  extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int);
>  extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int);
>  extern int blk_execute_rq(request_queue_t *, struct gendisk *,
> 
> 
> 

-- 
Jens Axboe

-
: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux