On Thu, Mar 24 2022 at 3:39P -0400, Christoph Hellwig <hch@xxxxxx> wrote: > On Wed, Mar 23, 2022 at 03:45:20PM -0400, Mike Snitzer wrote: > > I tried to address your review of the previous set. Patch 1 and 2 can > > obviously be folded but I left them split out for review purposes. > > Feel free to see if these changes are meaningful for nvme's use. > > Happy for either you to take on iterating on these block changes > > further or you letting me know what changes you'd like made. > > I'd be tempted to go with something like the version below, which > does away with the bio flag and the bio_alloc_kiocb wrapper to > further simplify the interface. The additional changes neeed for > dm like the bioset_init_from_src changes and move of bio_clear_polled > can then built on top of that. Sure, should work fine, I'll rebase ontop of this and send out v3 later today. FYI, I kept BIO_PERCPU_CACHE in v2 was because it gave the flexibility of each bio allocating layer above and below a particular device autonomy relative to whether or not they provided a bio alloc cache. But thinking further after seeing your patch: it seems reasonable for stacked devices to just require the entire stack enable and use a bio alloc cache. And it does prevent developers from hijacking REQ_ALLOC_CACHE for their own needs (completely independent of a bioset's alloc cache). Thanks, Mike > --- > From ec0493b86a3240e7f9f2d46a1298bd40ccf15e80 Mon Sep 17 00:00:00 2001 > From: Mike Snitzer <snitzer@xxxxxxxxxx> > Date: Wed, 23 Mar 2022 15:45:21 -0400 > Subject: block: allow using the per-cpu bio cache from bio_alloc_bioset > > Replace the BIO_PERCPU_CACHE bio-internal flag with a REQ_ALLOC_CACHE > one that can be passed to bio_alloc / bio_alloc_bioset, and implement > the percpu cache allocation logic in a helper called from > bio_alloc_bioset. This allows any bio_alloc_bioset user to use the > percpu caches instead of having the functionality tied to struct kiocb. > > Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> > [hch: refactored a bit] > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > block/bio.c | 86 +++++++++++++++++++-------------------- > block/blk.h | 3 +- > block/fops.c | 11 +++-- > include/linux/bio.h | 2 - > include/linux/blk_types.h | 3 +- > 5 files changed, 52 insertions(+), 53 deletions(-) > > diff --git a/block/bio.c b/block/bio.c > index 33979f306e9e7..d780e2cbea437 100644 > --- a/block/bio.c > +++ b/block/bio.c > @@ -420,6 +420,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs) > queue_work(bs->rescue_workqueue, &bs->rescue_work); > } > > +static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, > + unsigned short nr_vecs, unsigned int opf, gfp_t gfp, > + struct bio_set *bs) > +{ > + struct bio_alloc_cache *cache; > + struct bio *bio; > + > + cache = per_cpu_ptr(bs->cache, get_cpu()); > + if (!cache->free_list) { > + put_cpu(); > + return NULL; > + } > + bio = cache->free_list; > + cache->free_list = bio->bi_next; > + cache->nr--; > + put_cpu(); > + > + bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf); > + bio->bi_pool = bs; > + return bio; > +} > + > /** > * bio_alloc_bioset - allocate a bio for I/O > * @bdev: block device to allocate the bio for (can be %NULL) > @@ -452,6 +474,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs) > * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad > * for per bio allocations. > * > + * If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process > + * context, not hard/soft IRQ. > + * > * Returns: Pointer to new bio on success, NULL on failure. > */ > struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, > @@ -466,6 +491,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, > if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0)) > return NULL; > > + if (opf & REQ_ALLOC_CACHE) { > + if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { > + bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, > + gfp_mask, bs); > + if (bio) > + return bio; > + /* > + * No cached bio available, mark bio returned below to > + * particpate in per-cpu alloc cache. > + */ > + } else { > + opf &= ~REQ_ALLOC_CACHE; > + } > + } > + > /* > * submit_bio_noacct() converts recursion to iteration; this means if > * we're running beneath it, any bios we allocate and submit will not be > @@ -712,7 +752,7 @@ void bio_put(struct bio *bio) > return; > } > > - if (bio_flagged(bio, BIO_PERCPU_CACHE)) { > + if (bio->bi_opf & REQ_ALLOC_CACHE) { > struct bio_alloc_cache *cache; > > bio_uninit(bio); > @@ -1734,50 +1774,6 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src) > } > EXPORT_SYMBOL(bioset_init_from_src); > > -/** > - * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb > - * @kiocb: kiocb describing the IO > - * @bdev: block device to allocate the bio for (can be %NULL) > - * @nr_vecs: number of iovecs to pre-allocate > - * @opf: operation and flags for bio > - * @bs: bio_set to allocate from > - * > - * Description: > - * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only > - * used to check if we should dip into the per-cpu bio_set allocation > - * cache. The allocation uses GFP_KERNEL internally. On return, the > - * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio > - * MUST be done from process context, not hard/soft IRQ. > - * > - */ > -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, > - unsigned short nr_vecs, unsigned int opf, struct bio_set *bs) > -{ > - struct bio_alloc_cache *cache; > - struct bio *bio; > - > - if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) > - return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs); > - > - cache = per_cpu_ptr(bs->cache, get_cpu()); > - if (cache->free_list) { > - bio = cache->free_list; > - cache->free_list = bio->bi_next; > - cache->nr--; > - put_cpu(); > - bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, > - nr_vecs, opf); > - bio->bi_pool = bs; > - bio_set_flag(bio, BIO_PERCPU_CACHE); > - return bio; > - } > - put_cpu(); > - bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs); > - bio_set_flag(bio, BIO_PERCPU_CACHE); > - return bio; > -} > -EXPORT_SYMBOL_GPL(bio_alloc_kiocb); > - > static int __init init_bio(void) > { > int i; > diff --git a/block/blk.h b/block/blk.h > index 6f21859c7f0ff..9cb04f24ba8a7 100644 > --- a/block/blk.h > +++ b/block/blk.h > @@ -454,8 +454,7 @@ extern struct device_attribute dev_attr_events_poll_msecs; > static inline void bio_clear_polled(struct bio *bio) > { > /* can't support alloc cache if we turn off polling */ > - bio_clear_flag(bio, BIO_PERCPU_CACHE); > - bio->bi_opf &= ~REQ_POLLED; > + bio->bi_opf &= ~(REQ_POLLED | REQ_ALLOC_CACHE); > } > > long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); > diff --git a/block/fops.c b/block/fops.c > index e49096354dcd6..d1da85bdec31e 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -198,8 +198,10 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, > (bdev_logical_block_size(bdev) - 1)) > return -EINVAL; > > - bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool); > - > + if (iocb->ki_flags & IOCB_ALLOC_CACHE) > + opf |= REQ_ALLOC_CACHE; > + bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, > + &blkdev_dio_pool); > dio = container_of(bio, struct blkdev_dio, bio); > atomic_set(&dio->ref, 1); > /* > @@ -322,7 +324,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, > (bdev_logical_block_size(bdev) - 1)) > return -EINVAL; > > - bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool); > + if (iocb->ki_flags & IOCB_ALLOC_CACHE) > + opf |= REQ_ALLOC_CACHE; > + bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, > + &blkdev_dio_pool); > dio = container_of(bio, struct blkdev_dio, bio); > dio->flags = 0; > dio->iocb = iocb; > diff --git a/include/linux/bio.h b/include/linux/bio.h > index 4c21f6e69e182..10406f57d339e 100644 > --- a/include/linux/bio.h > +++ b/include/linux/bio.h > @@ -408,8 +408,6 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); > struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, > unsigned int opf, gfp_t gfp_mask, > struct bio_set *bs); > -struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev, > - unsigned short nr_vecs, unsigned int opf, struct bio_set *bs); > struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); > extern void bio_put(struct bio *); > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index 0c3563b45fe90..d4ba5251a3a0b 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -328,7 +328,6 @@ enum { > BIO_QOS_MERGED, /* but went through rq_qos merge path */ > BIO_REMAPPED, > BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ > - BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ > BIO_FLAG_LAST > }; > > @@ -415,6 +414,7 @@ enum req_flag_bits { > __REQ_NOUNMAP, /* do not free blocks when zeroing */ > > __REQ_POLLED, /* caller polls for completion using bio_poll */ > + __REQ_ALLOC_CACHE, /* allocate IO from cache if available */ > > /* for driver use */ > __REQ_DRV, > @@ -440,6 +440,7 @@ enum req_flag_bits { > > #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) > #define REQ_POLLED (1ULL << __REQ_POLLED) > +#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE) > > #define REQ_DRV (1ULL << __REQ_DRV) > #define REQ_SWAP (1ULL << __REQ_SWAP) > -- > 2.30.2 >