On Fri, Sep 08, 2017 at 04:52:26PM -0700, Bart Van Assche wrote: > Implement the following approach for blk-mq: > - Either make blk_get_request() wait or make it fail when a > request queue is not in status RPM_ACTIVE. > - While suspending, suspended or resuming, only process power > management requests (REQ_PM). > > Reported-by: Oleksandr Natalenko <oleksandr@xxxxxxxxxxxxxx> > References: "I/O hangs after resuming from suspend-to-ram" (https://marc.info/?l=linux-block&m=150340235201348). This patch is nothing to do with Oleksandr's report, please remove the above two lines. For example, runttime PM can be bypassed via sysfs, and suspend/resume still can work well. > Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxx> > Cc: Christoph Hellwig <hch@xxxxxx> > Cc: Hannes Reinecke <hare@xxxxxxxx> > Cc: Johannes Thumshirn <jthumshirn@xxxxxxx> > Cc: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> > Cc: Ming Lei <ming.lei@xxxxxxxxxx> > --- > block/blk-core.c | 20 ++++++++++++++++---- > block/blk-mq.c | 34 ++++++++++++++++++++++++++++++++++ > 2 files changed, 50 insertions(+), 4 deletions(-) > > diff --git a/block/blk-core.c b/block/blk-core.c > index cd2700c763ed..49a4cd5b255e 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -3438,10 +3438,6 @@ EXPORT_SYMBOL(blk_finish_plug); > */ > void blk_pm_runtime_init(struct request_queue *q, struct device *dev) > { > - /* not support for RQF_PM and ->rpm_status in blk-mq yet */ > - if (q->mq_ops) > - return; > - > q->dev = dev; > q->rpm_status = RPM_ACTIVE; > init_waitqueue_head(&q->rpm_active_wq); > @@ -3478,6 +3474,19 @@ int blk_pre_runtime_suspend(struct request_queue *q) > if (!q->dev) > return ret; > > + if (q->mq_ops) { > + percpu_ref_switch_to_atomic_nowait(&q->q_usage_counter); > + if (!percpu_ref_is_zero(&q->q_usage_counter)) { > + ret = -EBUSY; > + pm_runtime_mark_last_busy(q->dev); > + } else { > + spin_lock_irq(q->queue_lock); > + q->rpm_status = RPM_SUSPENDING; > + spin_unlock_irq(q->queue_lock); > + } > + return ret; > + } > + > spin_lock_irq(q->queue_lock); > if (q->nr_pending) { > ret = -EBUSY; > @@ -3561,6 +3570,9 @@ void blk_post_runtime_resume(struct request_queue *q, int err) > if (!q->dev) > return; > > + if (q->mq_ops) > + percpu_ref_switch_to_percpu(&q->q_usage_counter); > + > spin_lock_irq(q->queue_lock); > if (!err) { > q->rpm_status = RPM_ACTIVE; > diff --git a/block/blk-mq.c b/block/blk-mq.c > index 3f18cff80050..cbd680dc194a 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -383,6 +383,29 @@ static struct request *blk_mq_get_request(struct request_queue *q, > return rq; > } > > +#ifdef CONFIG_PM > +static bool blk_mq_wait_until_active(struct request_queue *q, bool wait) > +{ > + if (!wait) > + return false; > + /* > + * Note: the q->rpm_status check below races against the changes of > + * that variable by the blk_{pre,post}_runtime_{suspend,resume}() > + * functions. The worst possible consequence of these races is that a > + * small number of requests gets passed to the block driver associated > + * with the request queue after rpm_status has been changed into > + * RPM_SUSPENDING and before it is changed into RPM_SUSPENDED. > + */ > + wait_event(q->rpm_active_wq, q->rpm_status == RPM_ACTIVE); > + return true; > +} > +#else > +static bool blk_mq_wait_until_active(struct request_queue *q, bool nowait) > +{ > + return true; > +} > +#endif > + > struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, > unsigned int flags) > { > @@ -390,6 +413,17 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, > struct request *rq; > int ret; > > + WARN_ON_ONCE((op & REQ_PM) && blk_pm_suspended(q)); > + > + /* > + * Wait if the request queue is suspended or in the process of > + * suspending/resuming and the request being allocated will not be > + * used for power management purposes. > + */ > + if (!(op & REQ_PM) && > + !blk_mq_wait_until_active(q, !(op & REQ_NOWAIT))) > + return ERR_PTR(-EAGAIN); > + > ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); > if (ret) > return ERR_PTR(ret); > -- > 2.14.1 > One issue is that pm_runtime_mark_last_busy() isn't set accurately because it can't check if the freeing req is the last active one, and set it if yes. -- Ming