Re: [PATCH] blk-mq: only run mapped hw queues in blk_mq_run_hw_queues()

Christian Borntraeger <borntraeger@xxxxxxxxxx> · Fri, 6 Apr 2018 10:35:05 +0200

On 04/05/2018 06:05 PM, Ming Lei wrote:
[...]
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 90838e998f66..996f8a963026 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1324,9 +1324,18 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
>  	 */
>  	if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
>  		cpu_online(hctx->next_cpu)) {
> -		printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n",
> -			raw_smp_processor_id(),
> +		int cpu;
> +		printk(KERN_WARNING "run queue from wrong CPU %d/%d, hctx-%d %s\n",
> +			raw_smp_processor_id(), hctx->next_cpu,
> +			hctx->queue_num,
>  			cpumask_empty(hctx->cpumask) ? "inactive": "active");
> +		printk("dump CPUs mapped to this hctx:\n");
> +		for_each_cpu(cpu, hctx->cpumask)
> +			printk("%d ", cpu);
> +		printk("\n");
> +		printk("nr_cpu_ids is %d, and dump online cpus:\n", nr_cpu_ids);
> +		for_each_cpu(cpu, cpu_online_mask)
> +			printk("%d ", cpu);
>  		dump_stack();
>  	}
> 

FWIW, with things like

[    4.049828] dump CPUs mapped to this hctx:
[    4.049829] 18 
[    4.049829] 82 
[    4.049830] 146 
[    4.049830] 210 
[    4.049831] 274 

[    4.049832] nr_cpu_ids is 282, and dump online cpus:
[    4.049833] 0 
[    4.049833] 1 
[    4.049834] 2 
[    4.049834] 3 
[    4.049835] 4 
[    4.049835] 5 
[    4.049836] 6 
[    4.049836] 7 
[    4.049837] 8 
[    4.049837] 9 
[    4.049838] 10 
[    4.049839] 11 
[    4.049839] 12 
[    4.049840] 13 
[    4.049840] 14 
[    4.049841] 15 

So the hctx has only "possible CPUs", but all are offline.

Doesnt that always make this run unbound? See blk_mq_hctx_next_cpu  below.

/*
 * It'd be great if the workqueue API had a way to pass
 * in a mask and had some smarts for more clever placement.
 * For now we just round-robin here, switching for every
 * BLK_MQ_CPU_WORK_BATCH queued items.
 */
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
{
        bool tried = false;

        if (hctx->queue->nr_hw_queues == 1)
                return WORK_CPU_UNBOUND;

        if (--hctx->next_cpu_batch <= 0) {
                int next_cpu;
select_cpu:     
                next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
                                cpu_online_mask);
                if (next_cpu >= nr_cpu_ids)
                        next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);

                /*
                 * No online CPU is found, so have to make sure hctx->next_cpu
                 * is set correctly for not breaking workqueue.
                 */
                if (next_cpu >= nr_cpu_ids)
                        hctx->next_cpu = cpumask_first(hctx->cpumask);
                else
                        hctx->next_cpu = next_cpu;
                hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
        }

        /*
         * Do unbound schedule if we can't find a online CPU for this hctx,
         * and it should only happen in the path of handling CPU DEAD.
         */
        if (!cpu_online(hctx->next_cpu)) {
                if (!tried) {
                        tried = true;
                        goto select_cpu;
                }

                /*
                 * Make sure to re-select CPU next time once after CPUs
                 * in hctx->cpumask become online again.
                 */
                hctx->next_cpu_batch = 1;
                return WORK_CPU_UNBOUND;
        }
        return hctx->next_cpu;
}