On Fri, Dec 03, 2021 at 08:35:40AM -0700, Jens Axboe wrote: > refcount_t is not as expensive as it used to be, but it's still more > expensive than the io_uring method of using atomic_t and just checking > for potential over/underflow. > > This borrows that same implementation, which in turn is based on the > mm implementation from Linus. If refcount_t isn't good enough for a normal kernel fast path we have a problem. Can we discuss that with the maintainers instead of coming up with our home grown schemes again? > > Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> > > --- > > diff --git a/block/blk-flush.c b/block/blk-flush.c > index f78bb39e589e..e4df894189ce 100644 > --- a/block/blk-flush.c > +++ b/block/blk-flush.c > @@ -229,7 +229,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) > /* release the tag's ownership to the req cloned from */ > spin_lock_irqsave(&fq->mq_flush_lock, flags); > > - if (!refcount_dec_and_test(&flush_rq->ref)) { > + if (!req_ref_put_and_test(flush_rq)) { > fq->rq_status = error; > spin_unlock_irqrestore(&fq->mq_flush_lock, flags); > return; > @@ -349,7 +349,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, > * and READ flush_rq->end_io > */ > smp_wmb(); > - refcount_set(&flush_rq->ref, 1); > + req_ref_set(flush_rq, 1); > > blk_flush_queue_rq(flush_rq, false); > } > diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c > index 995336abee33..380e2dd31bfc 100644 > --- a/block/blk-mq-tag.c > +++ b/block/blk-mq-tag.c > @@ -228,7 +228,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, > > spin_lock_irqsave(&tags->lock, flags); > rq = tags->rqs[bitnr]; > - if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref)) > + if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq)) > rq = NULL; > spin_unlock_irqrestore(&tags->lock, flags); > return rq; > diff --git a/block/blk-mq.c b/block/blk-mq.c > index fa464a3e2f9a..22ec21aa0c22 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -386,7 +386,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, > INIT_LIST_HEAD(&rq->queuelist); > /* tag was already set */ > WRITE_ONCE(rq->deadline, 0); > - refcount_set(&rq->ref, 1); > + req_ref_set(rq, 1); > > if (rq->rq_flags & RQF_ELV) { > struct elevator_queue *e = data->q->elevator; > @@ -634,7 +634,7 @@ void blk_mq_free_request(struct request *rq) > rq_qos_done(q, rq); > > WRITE_ONCE(rq->state, MQ_RQ_IDLE); > - if (refcount_dec_and_test(&rq->ref)) > + if (req_ref_put_and_test(rq)) > __blk_mq_free_request(rq); > } > EXPORT_SYMBOL_GPL(blk_mq_free_request); > @@ -930,7 +930,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) > rq_qos_done(rq->q, rq); > > WRITE_ONCE(rq->state, MQ_RQ_IDLE); > - if (!refcount_dec_and_test(&rq->ref)) > + if (!req_ref_put_and_test(rq)) > continue; > > blk_crypto_free_request(rq); > @@ -1373,7 +1373,7 @@ void blk_mq_put_rq_ref(struct request *rq) > { > if (is_flush_rq(rq)) > rq->end_io(rq, 0); > - else if (refcount_dec_and_test(&rq->ref)) > + else if (req_ref_put_and_test(rq)) > __blk_mq_free_request(rq); > } > > @@ -3003,7 +3003,7 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags, > unsigned long rq_addr = (unsigned long)rq; > > if (rq_addr >= start && rq_addr < end) { > - WARN_ON_ONCE(refcount_read(&rq->ref) != 0); > + WARN_ON_ONCE(req_ref_read(rq) != 0); > cmpxchg(&drv_tags->rqs[i], rq, NULL); > } > } > @@ -3337,7 +3337,7 @@ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, > if (!tags) > return; > > - WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0); > + WARN_ON_ONCE(req_ref_read(flush_rq) != 0); > > for (i = 0; i < queue_depth; i++) > cmpxchg(&tags->rqs[i], flush_rq, NULL); > diff --git a/block/blk.h b/block/blk.h > index 296411900c55..f869f4b2dec9 100644 > --- a/block/blk.h > +++ b/block/blk.h > @@ -473,4 +473,35 @@ static inline bool should_fail_request(struct block_device *part, > } > #endif /* CONFIG_FAIL_MAKE_REQUEST */ > > +/* > + * Optimized request reference counting. Ideally we'd make timeouts be more > + * clever, as that's the only reason we need references at all... But until > + * this happens, this is faster than using refcount_t. Also see: > + * > + * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count") > + */ > +#define req_ref_zero_or_close_to_overflow(req) \ > + ((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u) > + > +static inline bool req_ref_inc_not_zero(struct request *req) > +{ > + return atomic_inc_not_zero(&req->ref); > +} > + > +static inline bool req_ref_put_and_test(struct request *req) > +{ > + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); > + return atomic_dec_and_test(&req->ref); > +} > + > +static inline void req_ref_set(struct request *req, int value) > +{ > + atomic_set(&req->ref, value); > +} > + > +static inline int req_ref_read(struct request *req) > +{ > + return atomic_read(&req->ref); > +} > + > #endif /* BLK_INTERNAL_H */ > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index bfc3cc61f653..ecdc049b52fa 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -138,7 +138,7 @@ struct request { > unsigned short ioprio; > > enum mq_rq_state state; > - refcount_t ref; > + atomic_t ref; > > unsigned long deadline; > > -- > Jens Axboe > ---end quoted text---