The struct sdebug_queue is to manage tags and hold the associated queued command entry pointer (for that tag). Since the tagset iters are now used for functions like sdebug_blk_mq_poll(), there is no real need to manage these queues. Indeed, blk-mq already provides what we need for managing tags and requests. However the sdebug_queue concept was still being used in max_queue_store() to find tags which we need to "retire" when lowering sdebug_max_queue. However the method to reduce sdebug_max_queue separate from shost->can_queue is fundamentally broken. The shost->can_queue value is initially used to set per-HW queue context tag depth in the block layer. This ensures that the shost is not sent too many commands which it can deal with. However lowering sdebug_max_queue separately meant that we can easily overload the shost. Indeed, since sdebug_q_arr is shared amount all shosts, it is possible to easily consume all tags in sdebug_q_arr while the shosts are not overloaded according to SCSI ml/block layer, i.e. shosts may be sent more commands than they can consume. Resolve this by changing per-HW queue context tag depth when we change sdebug_max_queue. The submit queue tags info shown in scsi_debug_show_info() is dropped - tag info may be got from /sys/kernel/debug/block/ path. Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- We should not call sbitmap_queue_resize(), below - any better ideas? drivers/scsi/scsi_debug.c | 246 +++++++------------------------------- 1 file changed, 42 insertions(+), 204 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 7ef195072d97..2bfb6db538fc 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -341,8 +341,6 @@ struct sdebug_defer { struct hrtimer hrt; struct execute_work ew; ktime_t cmpl_ts;/* time since boot to complete this cmd */ - int sqa_idx; /* index of sdebug_queue array */ - int hc_idx; /* hostwide tag index */ int issuing_cpu; bool aborted; /* true when blk_abort_request() already called */ enum sdeb_defer_type defer_t; @@ -360,12 +358,6 @@ struct sdebug_scsi_cmd { spinlock_t lock; }; -struct sdebug_queue { - struct sdebug_queued_cmd *qc_arr[SDEBUG_CANQUEUE]; - unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS]; - spinlock_t qc_lock; -}; - static atomic_t sdebug_cmnd_count; /* number of incoming commands */ static atomic_t sdebug_completions; /* count of deferred completions */ static atomic_t sdebug_miss_cpus; /* submission + completion cpus differ */ @@ -848,8 +840,8 @@ static int sdeb_zbc_max_open = DEF_ZBC_MAX_OPEN_ZONES; static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES; static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */ + static int poll_queues = 2; /* iouring iopoll interface.*/ -static struct sdebug_queue *sdebug_q_arr; /* ptr to array of submit queues */ static DEFINE_RWLOCK(atomic_rw); static DEFINE_RWLOCK(atomic_rw2); @@ -4904,20 +4896,6 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) return res; } -static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd) -{ - u16 hwq; - u32 tag = blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)); - - hwq = blk_mq_unique_tag_to_hwq(tag); - - pr_debug("tag=%#x, hwq=%d\n", tag, hwq); - if (WARN_ON_ONCE(hwq >= submit_queues)) - hwq = 0; - - return sdebug_q_arr + hwq; -} - static u32 get_tag(struct scsi_cmnd *cmnd) { return blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)); @@ -4927,68 +4905,30 @@ static u32 get_tag(struct scsi_cmnd *cmnd) static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) { struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp); - int qc_idx; - int retiring = 0; - unsigned long flags, iflags; + unsigned long flags; struct scsi_cmnd *scp = sqcp->scmd; struct sdebug_scsi_cmd *sdsc; bool aborted; - struct sdebug_queue *sqp; - qc_idx = sd_dp->sqa_idx; if (sdebug_statistics) { atomic_inc(&sdebug_completions); if (raw_smp_processor_id() != sd_dp->issuing_cpu) atomic_inc(&sdebug_miss_cpus); } + if (!scp) { pr_err("scmd=NULL\n"); goto out; } - if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) { - pr_err("wild qc_idx=%d\n", qc_idx); - goto out; - } sdsc = scsi_cmd_priv(scp); - sqp = get_queue(scp); - spin_lock_irqsave(&sqp->qc_lock, iflags); spin_lock_irqsave(&sdsc->lock, flags); aborted = sd_dp->aborted; if (unlikely(aborted)) sd_dp->aborted = false; ASSIGN_QEUEUED_CMD(scp, NULL); - if (unlikely(atomic_read(&retired_max_queue) > 0)) - retiring = 1; - - sqp->qc_arr[qc_idx] = NULL; - if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - pr_err("Unexpected completion qc_idx=%d\n", qc_idx); - goto out; - } - - if (unlikely(retiring)) { /* user has reduced max_queue */ - int k, retval; - - retval = atomic_read(&retired_max_queue); - if (qc_idx >= retval) { - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - pr_err("index %d too large\n", retval); - goto out; - } - k = find_last_bit(sqp->in_use_bm, retval); - if ((k < sdebug_max_queue) || (k == retval)) - atomic_set(&retired_max_queue, 0); - else - atomic_set(&retired_max_queue, k + 1); - } - spin_unlock_irqrestore(&sdsc->lock, flags); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (aborted) { pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n"); @@ -5275,21 +5215,18 @@ static bool stop_qc_helper(struct sdebug_defer *sd_dp, } -static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx) +static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd) { enum sdeb_defer_type l_defer_t; - struct sdebug_queued_cmd *sqcp; struct sdebug_defer *sd_dp; struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); + struct sdebug_queued_cmd *sqcp = TO_QEUEUED_CMD(cmnd); lockdep_assert_held(&sdsc->lock); - sqcp = TO_QEUEUED_CMD(cmnd); if (!sqcp) return false; sd_dp = &sqcp->sd_dp; - if (sqa_idx) - *sqa_idx = sd_dp->sqa_idx; l_defer_t = READ_ONCE(sd_dp->defer_t); ASSIGN_QEUEUED_CMD(cmnd, NULL); @@ -5305,22 +5242,13 @@ static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx) static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd) { struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); - struct sdebug_queue *sqp = get_queue(cmnd); - unsigned long flags, iflags; - int k = -1; + unsigned long flags; bool res; spin_lock_irqsave(&sdsc->lock, flags); - res = scsi_debug_stop_cmnd(cmnd, &k); + res = scsi_debug_stop_cmnd(cmnd); spin_unlock_irqrestore(&sdsc->lock, flags); - if (k >= 0) { - spin_lock_irqsave(&sqp->qc_lock, iflags); - clear_bit(k, sqp->in_use_bm); - sqp->qc_arr[k] = NULL; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - } - return res; } @@ -5578,7 +5506,6 @@ struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd) INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete); sqcp->scmd = scmd; - sd_dp->sqa_idx = -1; return sqcp; } @@ -5597,13 +5524,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, struct request *rq = scsi_cmd_to_rq(cmnd); bool polled = rq->cmd_flags & REQ_POLLED; struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); - unsigned long iflags, flags; + unsigned long flags; u64 ns_from_boot = 0; - struct sdebug_queue *sqp; struct sdebug_queued_cmd *sqcp; struct scsi_device *sdp; struct sdebug_defer *sd_dp; - int k; if (unlikely(devip == NULL)) { if (scsi_result == 0) @@ -5615,8 +5540,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (delta_jiff == 0) goto respond_in_thread; - sqp = get_queue(cmnd); - spin_lock_irqsave(&sqp->qc_lock, iflags); if (unlikely(sdebug_every_nth && (SDEBUG_OPT_RARE_TSF & sdebug_opts) && (scsi_result == 0))) { @@ -5635,33 +5558,12 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } } - k = find_first_zero_bit(sqp->in_use_bm, sdebug_max_queue); - if (unlikely(k >= sdebug_max_queue)) { - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (scsi_result) - goto respond_in_thread; - scsi_result = device_qfull_result; - if (SDEBUG_OPT_Q_NOISE & sdebug_opts) - sdev_printk(KERN_INFO, sdp, "%s: max_queue=%d exceeded: TASK SET FULL\n", - __func__, sdebug_max_queue); - goto respond_in_thread; - } - set_bit(k, sqp->in_use_bm); - sqcp = sdebug_alloc_queued_cmd(cmnd); if (!sqcp) { - clear_bit(k, sqp->in_use_bm); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); + pr_err("%s no alloc\n", __func__); return SCSI_MLQUEUE_HOST_BUSY; } sd_dp = &sqcp->sd_dp; - sd_dp->sqa_idx = k; - sqp->qc_arr[k] = sqcp; - spin_unlock_irqrestore(&sqp->qc_lock, iflags); - - /* Set the hostwide tag */ - if (sdebug_host_max_queue) - sd_dp->hc_idx = get_tag(cmnd); if (polled) ns_from_boot = ktime_get_boottime_ns(); @@ -5708,10 +5610,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, u64 d = ktime_get_boottime_ns() - ns_from_boot; if (kt <= d) { /* elapsed duration >= kt */ - spin_lock_irqsave(&sqp->qc_lock, iflags); - sqp->qc_arr[k] = NULL; - clear_bit(k, sqp->in_use_bm); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); /* call scsi_done() from this thread */ sdebug_free_queued_cmd(sqcp); scsi_done(cmnd); @@ -5974,8 +5872,7 @@ static int scsi_debug_write_info(struct Scsi_Host *host, char *buffer, * output are not atomics so might be inaccurate in a busy system. */ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) { - int f, j, l; - struct sdebug_queue *sqp; + int j; struct sdebug_host_info *sdhp; seq_printf(m, "scsi_debug adapter driver, version %s [%s]\n", @@ -6003,16 +5900,6 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) atomic_read(&sdebug_a_tsf), atomic_read(&sdeb_mq_poll_count)); - seq_printf(m, "submit_queues=%d\n", submit_queues); - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { - seq_printf(m, " queue %d:\n", j); - f = find_first_bit(sqp->in_use_bm, sdebug_max_queue); - if (f != sdebug_max_queue) { - l = find_last_bit(sqp->in_use_bm, sdebug_max_queue); - seq_printf(m, " in_use_bm BUSY: %s: %d,%d\n", - "first,last bits", f, l); - } - } seq_printf(m, "this host_no=%d\n", host->host_no); if (!xa_empty(per_store_ap)) { @@ -6428,30 +6315,36 @@ static ssize_t max_queue_show(struct device_driver *ddp, char *buf) static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, size_t count) { - int j, n, k, a; - struct sdebug_queue *sqp; + int n; if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) && - (n <= SDEBUG_CANQUEUE) && - (sdebug_host_max_queue == 0)) { + (n <= SDEBUG_CANQUEUE)) { + struct sdebug_host_info *sdhp; + mutex_lock(&sdebug_host_list_mutex); - block_unblock_all_queues(true); - k = 0; - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; - ++j, ++sqp) { - a = find_last_bit(sqp->in_use_bm, SDEBUG_CANQUEUE); - if (a > k) - k = a; + list_for_each_entry(sdhp, &sdebug_host_list, host_list) { + struct Scsi_Host *shost = sdhp->shost; + struct blk_mq_tag_set *tag_set = &shost->tag_set; + struct scsi_device *sdev; + int i; + + scsi_block_requests(shost); + shost_for_each_device(sdev, shost) + scsi_device_quiesce(sdev); + + for (i = 0; i < tag_set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = tag_set->tags[i]; + + /* yuck, we should not do this */ + sbitmap_queue_resize(&tags->bitmap_tags, n); + } + + shost_for_each_device(sdev, shost) + scsi_device_resume(sdev); + scsi_unblock_requests(shost); } - sdebug_max_queue = n; - if (k == SDEBUG_CANQUEUE) - atomic_set(&retired_max_queue, 0); - else if (k >= n) - atomic_set(&retired_max_queue, k + 1); - else - atomic_set(&retired_max_queue, 0); - block_unblock_all_queues(false); mutex_unlock(&sdebug_host_list_mutex); + sdebug_max_queue = n; return count; } return -EINVAL; @@ -6975,13 +6868,6 @@ static int __init scsi_debug_init(void) sdebug_max_queue); } - sdebug_q_arr = kcalloc(submit_queues, sizeof(struct sdebug_queue), - GFP_KERNEL); - if (sdebug_q_arr == NULL) - return -ENOMEM; - for (k = 0; k < submit_queues; ++k) - spin_lock_init(&sdebug_q_arr[k].qc_lock); - /* * check for host managed zoned block device specified with * ptype=0x14 or zbc=XXX. @@ -6990,10 +6876,8 @@ static int __init scsi_debug_init(void) sdeb_zbc_model = BLK_ZONED_HM; } else if (sdeb_zbc_model_s && *sdeb_zbc_model_s) { k = sdeb_zbc_model_str(sdeb_zbc_model_s); - if (k < 0) { - ret = k; - goto free_q_arr; - } + if (k < 0) + return k; sdeb_zbc_model = k; switch (sdeb_zbc_model) { case BLK_ZONED_NONE: @@ -7005,8 +6889,7 @@ static int __init scsi_debug_init(void) break; default: pr_err("Invalid ZBC model\n"); - ret = -EINVAL; - goto free_q_arr; + return -EINVAL; } } if (sdeb_zbc_model != BLK_ZONED_NONE) { @@ -7053,17 +6936,14 @@ static int __init scsi_debug_init(void) sdebug_unmap_granularity <= sdebug_unmap_alignment) { pr_err("ERR: unmap_granularity <= unmap_alignment\n"); - ret = -EINVAL; - goto free_q_arr; + return -EINVAL; } } xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); if (want_store) { idx = sdebug_add_store(); - if (idx < 0) { - ret = idx; - goto free_q_arr; - } + if (idx < 0) + return idx; } pseudo_primary = root_device_register("pseudo_0"); @@ -7120,8 +7000,6 @@ static int __init scsi_debug_init(void) root_device_unregister(pseudo_primary); free_vm: sdebug_erase_store(idx, NULL); -free_q_arr: - kfree(sdebug_q_arr); return ret; } @@ -7138,7 +7016,6 @@ static void __exit scsi_debug_exit(void) sdebug_erase_all_stores(false); xa_destroy(per_store_ap); - kfree(sdebug_q_arr); } device_initcall(scsi_debug_init); @@ -7514,11 +7391,8 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) u32 unique_tag = blk_mq_unique_tag(rq); u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag); struct sdebug_queued_cmd *sqcp; - struct sdebug_queue *sqp; unsigned long flags; int queue_num = data->queue_num; - bool retiring = false; - int qc_idx; ktime_t time; /* We're only interested in one queue for this iteration */ @@ -7537,7 +7411,7 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) spin_unlock_irqrestore(&sdsc->lock, flags); return true; } - sqp = sdebug_q_arr + queue_num; + sd_dp = &sqcp->sd_dp; @@ -7552,36 +7426,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) return true; } - if (unlikely(atomic_read(&retired_max_queue) > 0)) - retiring = true; - - qc_idx = sd_dp->sqa_idx; - sqp->qc_arr[qc_idx] = NULL; - if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) { - spin_unlock_irqrestore(&sdsc->lock, flags); - pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u\n", - sqp, queue_num, qc_idx); - sdebug_free_queued_cmd(sqcp); - return true; - } - - if (unlikely(retiring)) { /* user has reduced max_queue */ - int k, retval = atomic_read(&retired_max_queue); - - if (qc_idx >= retval) { - pr_err("index %d too large\n", retval); - spin_unlock_irqrestore(&sdsc->lock, flags); - sdebug_free_queued_cmd(sqcp); - return true; - } - - k = find_last_bit(sqp->in_use_bm, retval); - if ((k < sdebug_max_queue) || (k == retval)) - atomic_set(&retired_max_queue, 0); - else - atomic_set(&retired_max_queue, k + 1); - } - ASSIGN_QEUEUED_CMD(cmd, NULL); spin_unlock_irqrestore(&sdsc->lock, flags); @@ -7601,20 +7445,14 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) { int num_entries = 0; - unsigned long iflags; - struct sdebug_queue *sqp; struct sdebug_blk_mq_poll_data data = { .queue_num = queue_num, .num_entries = &num_entries, }; - sqp = sdebug_q_arr + queue_num; - - spin_lock_irqsave(&sqp->qc_lock, iflags); blk_mq_tagset_busy_iter(&shost->tag_set, sdebug_blk_mq_poll_iter, &data); - spin_unlock_irqrestore(&sqp->qc_lock, iflags); if (num_entries > 0) atomic_add(num_entries, &sdeb_mq_poll_count); return num_entries; -- 2.35.3