On Wed, Aug 11, 2021 at 01:35:30PM -0600, Jens Axboe wrote: > + struct bio *bio; > + unsigned int i; > + > + i = 0; Initialize at declaration time? > +static inline bool __bio_put(struct bio *bio) > +{ > + if (!bio_flagged(bio, BIO_REFFED)) > + return true; > + > + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); > + > + /* > + * last put frees it > + */ > + return atomic_dec_and_test(&bio->__bi_cnt); > +} Please avoid this helper, we can trivially do the check inside of bio_put: if (bio_flagged(bio, BIO_REFFED)) { BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); if (!atomic_dec_and_test(&bio->__bi_cnt)) return; } > - bio_free(bio); > + if (bio_flagged(bio, BIO_PERCPU_CACHE)) { > + struct bio_alloc_cache *cache; > + > + bio_uninit(bio); > + cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); > + bio_list_add_head(&cache->free_list, bio); > + cache->nr++; > + if (cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK) Folding the increment as a prefix here would make the increment and test semantics a little more obvious. > +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, gfp_t gfp, > + unsigned short nr_vecs, struct bio_set *bs) > +{ > + struct bio_alloc_cache *cache = NULL; > + struct bio *bio; > + > + if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) > + goto normal_alloc; > + > + cache = per_cpu_ptr(bs->cache, get_cpu()); > + bio = bio_list_pop(&cache->free_list); > + if (bio) { > + cache->nr--; > + put_cpu(); > + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); > + bio_set_flag(bio, BIO_PERCPU_CACHE); > + return bio; > + } > + put_cpu(); > +normal_alloc: > + bio = bio_alloc_bioset(gfp, nr_vecs, bs); > + if (cache) > + bio_set_flag(bio, BIO_PERCPU_CACHE); > + return bio; The goto here is pretty obsfucating and adds an extra patch to the fast path. Also I don't think we need the gfp argument here at all - it should always be GFP_KERNEL. In fact I plan to kill these arguments from much of the block layer as the places that need NOIO of NOFS semantics can and should move to set that in the per-thread context. > -static inline struct bio *bio_list_pop(struct bio_list *bl) > +static inline void bio_list_del_head(struct bio_list *bl, struct bio *head) > { > - struct bio *bio = bl->head; > - > - if (bio) { > + if (head) { > bl->head = bl->head->bi_next; > if (!bl->head) > bl->tail = NULL; > > - bio->bi_next = NULL; > + head->bi_next = NULL; > } > +} > + > +static inline struct bio *bio_list_pop(struct bio_list *bl) > +{ > + struct bio *bio = bl->head; > > + bio_list_del_head(bl, bio); > return bio; > } No need for this change. > > > @@ -699,6 +706,12 @@ struct bio_set { > struct kmem_cache *bio_slab; > unsigned int front_pad; > > + /* > + * per-cpu bio alloc cache and notifier > + */ > + struct bio_alloc_cache __percpu *cache; > + struct hlist_node cpuhp_dead; > + I'd keep the hotplug list entry at the end instead of bloating the cache line used in the fast path.