Re: [PATCH v3] scsi_debug: add new defer_type for mq_poll

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Feb 10, 2021 at 4:26 AM Douglas Gilbert <dgilbert@xxxxxxxxxxxx> wrote:
>
> Add a new sdeb_defer_type enumeration: SDEB_DEFER_POLL for requests
> that have REQ_HIPRI set in cmd_flags field. It is expected that
> these requests will be polled via the mq_poll entry point which
> is driven by calls to blk_poll() in the block layer. Therefore
> timer events are not 'wired up' in the normal fashion.
>
> There are still cases with short delays (e.g. < 10 microseconds)
> where by the time the command response processing occurs, the delay
> is already exceeded in which case the code calls scsi_done()
> directly. In such cases there is no window for mq_poll() to be
> called.
>
> Add 'mq_polls' counter that increments on each scsi_done() called
> via the mq_poll entry point. Can be used to show (with 'cat
> /proc/scsi/scsi_debug/<host_id>') that blk_poll() is causing
> completions rather than some other mechanism.
>
>
> This patch is against the 5.12/scsi-staging branch which includes
>    a98d6bdf181eb71bf4686666eaf47c642a061642
>    scsi_debug : iouring iopoll support
> which it alters. So this patch is a fix of that patch.
>
> Changes since version 2 [sent 20210206 to linux-scsi list]
>   - the sdebug_blk_mq_poll() callback didn't cope with the
>     uncommon case where sqcp->sd_dp is NULL. Fix.

Hi Doug, I tried this patch on top of below iouring patch series -
"[v3,1/4] add io_uring with IOPOLL support in scsi layer"

After applying patch, I am seeing an IO hang issue - I see
io_wqe_worker goes into a tight loop.

18210 root      20   0 1316348   3552   2376 R  99.1   0.0   0:24.09
fio                                       18303 root      20   0
0      0      0 D  78.8   0.0   0:01.75 io_wqe_worker-0
           18219 root      20   0       0      0      0 R  71.7   0.0
 0:06.59 io_wqe_worker-0


I used below command -
insmod drivers/scsi/scsi_debug.ko dev_size_mb=1024 sector_size=512
add_host=24 per_host_store=1 ndelay=10000 host_max_queue=32
submit_queues=76 num_parts=1 poll_queues=8

and here is my fio script -
[global]
ioengine=io_uring
hipri=1
direct=1
runtime=30s
rw=randread
norandommap
bs=4k
iodepth=64

[seqprecon]
filename=/dev/sdd
[seqprecon]
filename=/dev/sde
[seqprecon]
filename=/dev/sdf


I will ask Martin to pick all the patches from "[v3,1/4] add io_uring
with IOPOLL support in the scsi layer" except scsi_debug. We can work
on scsi_debug and send standalone patch.

Kashyap




>
> Changes since version 1 [sent 20210201 to linux-scsi list]
>   - harden SDEB_DEFER_POLL which broke under testing
>   - add mq_polls counter for debug output
>
> Signed-off-by: Douglas Gilbert <dgilbert@xxxxxxxxxxxx>
> ---
>  drivers/scsi/scsi_debug.c | 141 +++++++++++++++++++++++++-------------
>  1 file changed, 94 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
> index 746eec521f79..b9ec3a47fbf1 100644
> --- a/drivers/scsi/scsi_debug.c
> +++ b/drivers/scsi/scsi_debug.c
> @@ -322,17 +322,19 @@ struct sdeb_store_info {
>         container_of(d, struct sdebug_host_info, dev)
>
>  enum sdeb_defer_type {SDEB_DEFER_NONE = 0, SDEB_DEFER_HRT = 1,
> -                     SDEB_DEFER_WQ = 2};
> +                     SDEB_DEFER_WQ = 2, SDEB_DEFER_POLL = 3};
>
>  struct sdebug_defer {
>         struct hrtimer hrt;
>         struct execute_work ew;
> +       ktime_t cmpl_ts;/* time since boot to complete this cmd */
>         int sqa_idx;    /* index of sdebug_queue array */
>         int qc_idx;     /* index of sdebug_queued_cmd array within sqa_idx */
>         int hc_idx;     /* hostwide tag index */
>         int issuing_cpu;
>         bool init_hrt;
>         bool init_wq;
> +       bool init_poll;
>         bool aborted;   /* true when blk_abort_request() already called */
>         enum sdeb_defer_type defer_t;
>  };
> @@ -357,6 +359,7 @@ static atomic_t sdebug_completions;  /* count of deferred completions */
>  static atomic_t sdebug_miss_cpus;    /* submission + completion cpus differ */
>  static atomic_t sdebug_a_tsf;       /* 'almost task set full' counter */
>  static atomic_t sdeb_inject_pending;
> +static atomic_t sdeb_mq_poll_count;  /* bumped when mq_poll returns > 0 */
>
>  struct opcode_info_t {
>         u8 num_attached;        /* 0 if this is it (i.e. a leaf); use 0xff */
> @@ -4730,7 +4733,6 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
>         struct scsi_cmnd *scp;
>         struct sdebug_dev_info *devip;
>
> -       sd_dp->defer_t = SDEB_DEFER_NONE;
>         if (unlikely(aborted))
>                 sd_dp->aborted = false;
>         qc_idx = sd_dp->qc_idx;
> @@ -4745,6 +4747,7 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
>                 return;
>         }
>         spin_lock_irqsave(&sqp->qc_lock, iflags);
> +       sd_dp->defer_t = SDEB_DEFER_NONE;
>         sqcp = &sqp->qc_arr[qc_idx];
>         scp = sqcp->a_cmnd;
>         if (unlikely(scp == NULL)) {
> @@ -5517,40 +5520,66 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
>                                 kt -= d;
>                         }
>                 }
> -               if (!sd_dp->init_hrt) {
> -                       sd_dp->init_hrt = true;
> -                       sqcp->sd_dp = sd_dp;
> -                       hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC,
> -                                    HRTIMER_MODE_REL_PINNED);
> -                       sd_dp->hrt.function = sdebug_q_cmd_hrt_complete;
> -                       sd_dp->sqa_idx = sqp - sdebug_q_arr;
> -                       sd_dp->qc_idx = k;
> +               sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt);
> +               if (cmnd->request->cmd_flags & REQ_HIPRI) {
> +                       spin_lock_irqsave(&sqp->qc_lock, iflags);
> +                       if (!sd_dp->init_poll) {
> +                               sd_dp->init_poll = true;
> +                               sqcp->sd_dp = sd_dp;
> +                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
> +                               sd_dp->qc_idx = k;
> +                       }
> +                       sd_dp->defer_t = SDEB_DEFER_POLL;
> +                       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> +               } else {
> +                       if (!sd_dp->init_hrt) {
> +                               sd_dp->init_hrt = true;
> +                               sqcp->sd_dp = sd_dp;
> +                               hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC,
> +                                            HRTIMER_MODE_REL_PINNED);
> +                               sd_dp->hrt.function = sdebug_q_cmd_hrt_complete;
> +                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
> +                               sd_dp->qc_idx = k;
> +                       }
> +                       sd_dp->defer_t = SDEB_DEFER_HRT;
> +                       /* schedule the invocation of scsi_done() for a later time */
> +                       hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED);
>                 }
>                 if (sdebug_statistics)
>                         sd_dp->issuing_cpu = raw_smp_processor_id();
> -               sd_dp->defer_t = SDEB_DEFER_HRT;
> -               /* schedule the invocation of scsi_done() for a later time */
> -               hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED);
>         } else {        /* jdelay < 0, use work queue */
> -               if (!sd_dp->init_wq) {
> -                       sd_dp->init_wq = true;
> -                       sqcp->sd_dp = sd_dp;
> -                       sd_dp->sqa_idx = sqp - sdebug_q_arr;
> -                       sd_dp->qc_idx = k;
> -                       INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
> -               }
> -               if (sdebug_statistics)
> -                       sd_dp->issuing_cpu = raw_smp_processor_id();
> -               sd_dp->defer_t = SDEB_DEFER_WQ;
>                 if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
>                              atomic_read(&sdeb_inject_pending)))
>                         sd_dp->aborted = true;
> -               schedule_work(&sd_dp->ew.work);
> -               if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
> -                            atomic_read(&sdeb_inject_pending))) {
> +               sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot);
> +               if (cmnd->request->cmd_flags & REQ_HIPRI) {
> +                       spin_lock_irqsave(&sqp->qc_lock, iflags);
> +                       if (!sd_dp->init_poll) {
> +                               sd_dp->init_poll = true;
> +                               sqcp->sd_dp = sd_dp;
> +                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
> +                               sd_dp->qc_idx = k;
> +                       }
> +                       sd_dp->defer_t = SDEB_DEFER_POLL;
> +                       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> +               } else {
> +                       if (!sd_dp->init_wq) {
> +                               sd_dp->init_wq = true;
> +                               sqcp->sd_dp = sd_dp;
> +                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
> +                               sd_dp->qc_idx = k;
> +                               INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
> +                       }
> +                       sd_dp->defer_t = SDEB_DEFER_WQ;
> +                       schedule_work(&sd_dp->ew.work);
> +               }
> +               if (sdebug_statistics)
> +                       sd_dp->issuing_cpu = raw_smp_processor_id();
> +               if (unlikely(sd_dp->aborted)) {
>                         sdev_printk(KERN_INFO, sdp, "abort request tag %d\n", cmnd->request->tag);
>                         blk_abort_request(cmnd->request);
>                         atomic_set(&sdeb_inject_pending, 0);
> +                       sd_dp->aborted = false;
>                 }
>         }
>         if (unlikely((SDEBUG_OPT_Q_NOISE & sdebug_opts) && scsi_result == device_qfull_result))
> @@ -5779,11 +5808,12 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
>                    dix_reads, dix_writes, dif_errors);
>         seq_printf(m, "usec_in_jiffy=%lu, statistics=%d\n", TICK_NSEC / 1000,
>                    sdebug_statistics);
> -       seq_printf(m, "cmnd_count=%d, completions=%d, %s=%d, a_tsf=%d\n",
> +       seq_printf(m, "cmnd_count=%d, completions=%d, %s=%d, a_tsf=%d, mq_polls=%d\n",
>                    atomic_read(&sdebug_cmnd_count),
>                    atomic_read(&sdebug_completions),
>                    "miss_cpus", atomic_read(&sdebug_miss_cpus),
> -                  atomic_read(&sdebug_a_tsf));
> +                  atomic_read(&sdebug_a_tsf),
> +                  atomic_read(&sdeb_mq_poll_count));
>
>         seq_printf(m, "submit_queues=%d\n", submit_queues);
>         for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
> @@ -7246,52 +7276,68 @@ static int sdebug_map_queues(struct Scsi_Host *shost)
>
>  static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
>  {
> -       int qc_idx;
> -       int retiring = 0;
> +       bool first;
> +       bool retiring = false;
> +       int num_entries = 0;
> +       unsigned int qc_idx = 0;
>         unsigned long iflags;
> +       ktime_t kt_from_boot = ktime_get_boottime();
>         struct sdebug_queue *sqp;
>         struct sdebug_queued_cmd *sqcp;
>         struct scsi_cmnd *scp;
>         struct sdebug_dev_info *devip;
> -       int num_entries = 0;
> +       struct sdebug_defer *sd_dp;
>
>         sqp = sdebug_q_arr + queue_num;
> +       spin_lock_irqsave(&sqp->qc_lock, iflags);
>
> -       do {
> -               spin_lock_irqsave(&sqp->qc_lock, iflags);
> -               qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
> -               if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
> -                       goto out;
> +       for (first = true; first || qc_idx + 1 < sdebug_max_queue; )   {
> +               if (first) {
> +                       qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
> +                       first = false;
> +               } else {
> +                       qc_idx = find_next_bit(sqp->in_use_bm, sdebug_max_queue, qc_idx + 1);
> +               }
> +               if (unlikely(qc_idx >= sdebug_max_queue))
> +                       break;
>
>                 sqcp = &sqp->qc_arr[qc_idx];
> +               sd_dp = sqcp->sd_dp;
> +               if (unlikely(!sd_dp))
> +                       continue;
>                 scp = sqcp->a_cmnd;
>                 if (unlikely(scp == NULL)) {
> -                       pr_err("scp is NULL, queue_num=%d, qc_idx=%d from %s\n",
> +                       pr_err("scp is NULL, queue_num=%d, qc_idx=%u from %s\n",
>                                queue_num, qc_idx, __func__);
> -                       goto out;
> +                       break;
>                 }
> +               if (sd_dp->defer_t == SDEB_DEFER_POLL) {
> +                       if (kt_from_boot < sd_dp->cmpl_ts)
> +                               continue;
> +
> +               } else          /* ignoring non REQ_HIPRI requests */
> +                       continue;
>                 devip = (struct sdebug_dev_info *)scp->device->hostdata;
>                 if (likely(devip))
>                         atomic_dec(&devip->num_in_q);
>                 else
>                         pr_err("devip=NULL from %s\n", __func__);
>                 if (unlikely(atomic_read(&retired_max_queue) > 0))
> -                       retiring = 1;
> +                       retiring = true;
>
>                 sqcp->a_cmnd = NULL;
>                 if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
> -                       pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%d from %s\n",
> +                       pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u from %s\n",
>                                 sqp, queue_num, qc_idx, __func__);
> -                       goto out;
> +                       break;
>                 }
> -
>                 if (unlikely(retiring)) {       /* user has reduced max_queue */
>                         int k, retval;
>
>                         retval = atomic_read(&retired_max_queue);
>                         if (qc_idx >= retval) {
>                                 pr_err("index %d too large\n", retval);
> -                               goto out;
> +                               break;
>                         }
>                         k = find_last_bit(sqp->in_use_bm, retval);
>                         if ((k < sdebug_max_queue) || (k == retval))
> @@ -7299,17 +7345,18 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
>                         else
>                                 atomic_set(&retired_max_queue, k + 1);
>                 }
> +               sd_dp->defer_t = SDEB_DEFER_NONE;
>                 spin_unlock_irqrestore(&sqp->qc_lock, iflags);
>                 scp->scsi_done(scp); /* callback to mid level */
> +               spin_lock_irqsave(&sqp->qc_lock, iflags);
>                 num_entries++;
> -       } while (1);
> -
> -out:
> +       }
>         spin_unlock_irqrestore(&sqp->qc_lock, iflags);
> +       if (num_entries > 0)
> +               atomic_add(num_entries, &sdeb_mq_poll_count);
>         return num_entries;
>  }
>
> -
>  static int scsi_debug_queuecommand(struct Scsi_Host *shost,
>                                    struct scsi_cmnd *scp)
>  {
> --
> 2.25.1
>

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux