Re: [PATCH v2 0/4] block/dm: use BIOSET_PERCPU_CACHE from bio_alloc_bioset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 24 2022 at  3:39P -0400,
Christoph Hellwig <hch@xxxxxx> wrote:

> On Wed, Mar 23, 2022 at 03:45:20PM -0400, Mike Snitzer wrote:
> > I tried to address your review of the previous set. Patch 1 and 2 can
> > obviously be folded but I left them split out for review purposes.
> > Feel free to see if these changes are meaningful for nvme's use.
> > Happy for either you to take on iterating on these block changes
> > further or you letting me know what changes you'd like made.
> 
> I'd be tempted to go with something like the version below, which
> does away with the bio flag and the bio_alloc_kiocb wrapper to
> further simplify the interface.  The additional changes neeed for
> dm like the bioset_init_from_src changes and move of bio_clear_polled
> can then built on top of that.

Sure, should work fine, I'll rebase ontop of this and send out v3
later today.

FYI, I kept BIO_PERCPU_CACHE in v2 was because it gave the flexibility
of each bio allocating layer above and below a particular device
autonomy relative to whether or not they provided a bio alloc
cache. But thinking further after seeing your patch: it seems
reasonable for stacked devices to just require the entire stack enable
and use a bio alloc cache.  And it does prevent developers from
hijacking REQ_ALLOC_CACHE for their own needs (completely independent
of a bioset's alloc cache).

Thanks,
Mike


 
> ---
> From ec0493b86a3240e7f9f2d46a1298bd40ccf15e80 Mon Sep 17 00:00:00 2001
> From: Mike Snitzer <snitzer@xxxxxxxxxx>
> Date: Wed, 23 Mar 2022 15:45:21 -0400
> Subject: block: allow using the per-cpu bio cache from bio_alloc_bioset
> 
> Replace the BIO_PERCPU_CACHE bio-internal flag with a REQ_ALLOC_CACHE
> one that can be passed to bio_alloc / bio_alloc_bioset, and implement
> the percpu cache allocation logic in a helper called from
> bio_alloc_bioset.  This allows any bio_alloc_bioset user to use the
> percpu caches instead of having the functionality tied to struct kiocb.
> 
> Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx>
> [hch: refactored a bit]
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  block/bio.c               | 86 +++++++++++++++++++--------------------
>  block/blk.h               |  3 +-
>  block/fops.c              | 11 +++--
>  include/linux/bio.h       |  2 -
>  include/linux/blk_types.h |  3 +-
>  5 files changed, 52 insertions(+), 53 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 33979f306e9e7..d780e2cbea437 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -420,6 +420,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
>  	queue_work(bs->rescue_workqueue, &bs->rescue_work);
>  }
>  
> +static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
> +		unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
> +		struct bio_set *bs)
> +{
> +	struct bio_alloc_cache *cache;
> +	struct bio *bio;
> +
> +	cache = per_cpu_ptr(bs->cache, get_cpu());
> +	if (!cache->free_list) {
> +		put_cpu();
> +		return NULL;
> +	}
> +	bio = cache->free_list;
> +	cache->free_list = bio->bi_next;
> +	cache->nr--;
> +	put_cpu();
> +
> +	bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
> +	bio->bi_pool = bs;
> +	return bio;
> +}
> +
>  /**
>   * bio_alloc_bioset - allocate a bio for I/O
>   * @bdev:	block device to allocate the bio for (can be %NULL)
> @@ -452,6 +474,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
>   * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
>   * for per bio allocations.
>   *
> + * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
> + * context, not hard/soft IRQ.
> + *
>   * Returns: Pointer to new bio on success, NULL on failure.
>   */
>  struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
> @@ -466,6 +491,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
>  	if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
>  		return NULL;
>  
> +	if (opf & REQ_ALLOC_CACHE) {
> +		if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
> +			bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
> +						     gfp_mask, bs);
> +			if (bio)
> +				return bio;
> +			/*
> +			 * No cached bio available, mark bio returned below to
> +			 * particpate in per-cpu alloc cache.
> +			 */
> +		} else {
> +			opf &= ~REQ_ALLOC_CACHE;
> +		}
> +	}
> +
>  	/*
>  	 * submit_bio_noacct() converts recursion to iteration; this means if
>  	 * we're running beneath it, any bios we allocate and submit will not be
> @@ -712,7 +752,7 @@ void bio_put(struct bio *bio)
>  			return;
>  	}
>  
> -	if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
> +	if (bio->bi_opf & REQ_ALLOC_CACHE) {
>  		struct bio_alloc_cache *cache;
>  
>  		bio_uninit(bio);
> @@ -1734,50 +1774,6 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
>  }
>  EXPORT_SYMBOL(bioset_init_from_src);
>  
> -/**
> - * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
> - * @kiocb:	kiocb describing the IO
> - * @bdev:	block device to allocate the bio for (can be %NULL)
> - * @nr_vecs:	number of iovecs to pre-allocate
> - * @opf:	operation and flags for bio
> - * @bs:		bio_set to allocate from
> - *
> - * Description:
> - *    Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
> - *    used to check if we should dip into the per-cpu bio_set allocation
> - *    cache. The allocation uses GFP_KERNEL internally. On return, the
> - *    bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
> - *    MUST be done from process context, not hard/soft IRQ.
> - *
> - */
> -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
> -		unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
> -{
> -	struct bio_alloc_cache *cache;
> -	struct bio *bio;
> -
> -	if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
> -		return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
> -
> -	cache = per_cpu_ptr(bs->cache, get_cpu());
> -	if (cache->free_list) {
> -		bio = cache->free_list;
> -		cache->free_list = bio->bi_next;
> -		cache->nr--;
> -		put_cpu();
> -		bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
> -			 nr_vecs, opf);
> -		bio->bi_pool = bs;
> -		bio_set_flag(bio, BIO_PERCPU_CACHE);
> -		return bio;
> -	}
> -	put_cpu();
> -	bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
> -	bio_set_flag(bio, BIO_PERCPU_CACHE);
> -	return bio;
> -}
> -EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
> -
>  static int __init init_bio(void)
>  {
>  	int i;
> diff --git a/block/blk.h b/block/blk.h
> index 6f21859c7f0ff..9cb04f24ba8a7 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -454,8 +454,7 @@ extern struct device_attribute dev_attr_events_poll_msecs;
>  static inline void bio_clear_polled(struct bio *bio)
>  {
>  	/* can't support alloc cache if we turn off polling */
> -	bio_clear_flag(bio, BIO_PERCPU_CACHE);
> -	bio->bi_opf &= ~REQ_POLLED;
> +	bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE);
>  }
>  
>  long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
> diff --git a/block/fops.c b/block/fops.c
> index e49096354dcd6..d1da85bdec31e 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -198,8 +198,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>  	    (bdev_logical_block_size(bdev) - 1))
>  		return -EINVAL;
>  
> -	bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
> -
> +	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
> +		opf |= REQ_ALLOC_CACHE;
> +	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
> +			       &blkdev_dio_pool);
>  	dio = container_of(bio, struct blkdev_dio, bio);
>  	atomic_set(&dio->ref, 1);
>  	/*
> @@ -322,7 +324,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
>  	    (bdev_logical_block_size(bdev) - 1))
>  		return -EINVAL;
>  
> -	bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
> +	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
> +		opf |= REQ_ALLOC_CACHE;
> +	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
> +			       &blkdev_dio_pool);
>  	dio = container_of(bio, struct blkdev_dio, bio);
>  	dio->flags = 0;
>  	dio->iocb = iocb;
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 4c21f6e69e182..10406f57d339e 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -408,8 +408,6 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
>  struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
>  			     unsigned int opf, gfp_t gfp_mask,
>  			     struct bio_set *bs);
> -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
> -		unsigned short nr_vecs, unsigned int opf, struct bio_set *bs);
>  struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
>  extern void bio_put(struct bio *);
>  
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 0c3563b45fe90..d4ba5251a3a0b 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -328,7 +328,6 @@ enum {
>  	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
>  	BIO_REMAPPED,
>  	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
> -	BIO_PERCPU_CACHE,	/* can participate in per-cpu alloc cache */
>  	BIO_FLAG_LAST
>  };
>  
> @@ -415,6 +414,7 @@ enum req_flag_bits {
>  	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
>  
>  	__REQ_POLLED,		/* caller polls for completion using bio_poll */
> +	__REQ_ALLOC_CACHE,	/* allocate IO from cache if available */
>  
>  	/* for driver use */
>  	__REQ_DRV,
> @@ -440,6 +440,7 @@ enum req_flag_bits {
>  
>  #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
>  #define REQ_POLLED		(1ULL << __REQ_POLLED)
> +#define REQ_ALLOC_CACHE		(1ULL << __REQ_ALLOC_CACHE)
>  
>  #define REQ_DRV			(1ULL << __REQ_DRV)
>  #define REQ_SWAP		(1ULL << __REQ_SWAP)
> -- 
> 2.30.2
> 

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux