2016-09-21 23:06 GMT+09:00 Khan, Imran <kimran@xxxxxxxxxxxxxx>: >>> commit 084ee793ec1ff4e2171b481642bfbdaa2e10664c >>> Author: Imran Khan <kimran@xxxxxxxxxxxxxx> >>> Date: Thu Sep 15 16:44:02 2016 +0530 >>> >>> blk-mq: use static mapping >>> >>> blk-mq layer performs a remapping between s/w and h/w contexts and also >>> between h/w contexts and CPUs, whenever a CPU hotplug event happens. >>> This remapping has to wait for queue freezing which may take tens of >>> miliseconds, resulting in a high latency in CPU hotplug path. >>> This patch makes the above mentioned mappings static so that we can >>> avoid remapping when CPU hotplug event happens and this results in >>> improved CPU hotplug latency. >>> >>> Change-Id: Idf38cb6c4e78c91fda3c86608c6d0441f01ab435 >>> Signed-off-by: Imran Khan <kimran@xxxxxxxxxxxxxx> >>> >>> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c >>> index 8764c24..85fdb88 100644 >>> --- a/block/blk-mq-cpumap.c >>> +++ b/block/blk-mq-cpumap.c >>> @@ -52,18 +52,13 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, >>> >>> queue = 0; >>> for_each_possible_cpu(i) { >>> - if (!cpumask_test_cpu(i, online_mask)) { >>> - map[i] = 0; >>> - continue; >>> - } >>> - >>> /* >>> * Easy case - we have equal or more hardware queues. Or >>> * there are no thread siblings to take into account. Do >>> * 1:1 if enough, or sequential mapping if less. >>> */ >>> - if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) { >>> - map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue); >>> + if (nr_queues >= nr_cpu_ids) { >>> + map[i] = cpu_to_queue_index(nr_cpu_ids, nr_queues, queue); >>> queue++; >>> continue; >>> } >>> @@ -75,7 +70,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, >>> */ >>> first_sibling = get_first_sibling(i); >>> if (first_sibling == i) { >>> - map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues, >>> + map[i] = cpu_to_queue_index(nr_cpu_ids, nr_queues, >>> queue); >>> queue++; >>> } else >>> diff --git a/block/blk-mq.c b/block/blk-mq.c >>> index 6d6f8fe..0753fdf 100644 >>> --- a/block/blk-mq.c >>> +++ b/block/blk-mq.c >>> @@ -1783,10 +1783,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, >>> INIT_LIST_HEAD(&__ctx->rq_list); >>> __ctx->queue = q; >>> >>> - /* If the cpu isn't online, the cpu is mapped to first hctx */ >>> - if (!cpu_online(i)) >>> - continue; >>> - >>> hctx = q->mq_ops->map_queue(q, i); >>> >>> /* >>> @@ -1820,12 +1816,9 @@ static void blk_mq_map_swqueue(struct request_queue *q, >>> * Map software to hardware queues >>> */ >>> queue_for_each_ctx(q, ctx, i) { >>> - /* If the cpu isn't online, the cpu is mapped to first hctx */ >>> - if (!cpumask_test_cpu(i, online_mask)) >>> - continue; >>> - >>> hctx = q->mq_ops->map_queue(q, i); >>> - cpumask_set_cpu(i, hctx->cpumask); >>> + if (cpumask_test_cpu(i, online_mask)) >>> + cpumask_set_cpu(i, hctx->cpumask); >>> ctx->index_hw = hctx->nr_ctx; >>> hctx->ctxs[hctx->nr_ctx++] = ctx; >>> } >>> @@ -1863,17 +1856,22 @@ static void blk_mq_map_swqueue(struct request_queue *q, >>> >>> /* >>> * Initialize batch roundrobin counts >>> + * Set next_cpu for only those hctxs that have an online CPU >>> + * in their cpumask field. For hctxs that belong to few online >>> + * and few offline CPUs, this will always provide one CPU from >>> + * online ones. For hctxs belonging to all offline CPUs, their >>> + * cpumask will be updated in reinit_notify. >>> */ >>> - hctx->next_cpu = cpumask_first(hctx->cpumask); >>> - hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; >>> + if(cpumask_first(hctx->cpumask) < nr_cpu_ids) { >>> + hctx->next_cpu = cpumask_first(hctx->cpumask); >>> + hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; >>> + } >>> } >>> >>> queue_for_each_ctx(q, ctx, i) { >>> - if (!cpumask_test_cpu(i, online_mask)) >>> - continue; >>> - >>> hctx = q->mq_ops->map_queue(q, i); >>> - cpumask_set_cpu(i, hctx->tags->cpumask); >>> + if (cpumask_test_cpu(i, online_mask)) >>> + cpumask_set_cpu(i, hctx->tags->cpumask); >>> } >>> } >>> >>> @@ -2101,31 +2099,12 @@ void blk_mq_free_queue(struct request_queue *q) >>> blk_mq_free_hw_queues(q, set); >>> } >>> >>> -/* Basically redo blk_mq_init_queue with queue frozen */ >>> -static void blk_mq_queue_reinit(struct request_queue *q, >>> - const struct cpumask *online_mask) >>> -{ >>> - WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); >>> - >>> - blk_mq_sysfs_unregister(q); >>> - >>> - blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask); Now blk_mq_update_queue_map() is only used in block/blk-mq-cpumap.c, so you can make blk_mq_update_queue_map() static function. >>> - >>> - /* >>> - * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe >>> - * we should change hctx numa_node according to new topology (this >>> - * involves free and re-allocate memory, worthy doing?) >>> - */ >>> - >>> - blk_mq_map_swqueue(q, online_mask); >>> - >>> - blk_mq_sysfs_register(q); >>> -} >>> - >>> static int blk_mq_queue_reinit_notify(struct notifier_block *nb, >>> unsigned long action, void *hcpu) >>> { >>> struct request_queue *q; >>> + struct blk_mq_hw_ctx *hctx; >>> + int i; >>> int cpu = (unsigned long)hcpu; >>> /* >>> * New online cpumask which is going to be set in this hotplug event. >>> @@ -2155,43 +2134,29 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, >>> case CPU_DEAD: >>> case CPU_UP_CANCELED: >>> cpumask_copy(&online_new, cpu_online_mask); >>> + list_for_each_entry(q, &all_q_list, all_q_node) { >>> + queue_for_each_hw_ctx(q, hctx, i) { >>> + cpumask_clear_cpu(cpu, hctx->cpumask); >>> + cpumask_clear_cpu(cpu, hctx->tags->cpumask); >>> + } >>> + } Do we need to hold all_q_mutex while iterating through all_q_list? >>> break; >>> case CPU_UP_PREPARE: >>> cpumask_copy(&online_new, cpu_online_mask); >>> cpumask_set_cpu(cpu, &online_new); >>> + /* Update hctx->cpumask for newly onlined CPUs */ >>> + list_for_each_entry(q, &all_q_list, all_q_node) { >>> + queue_for_each_hw_ctx(q, hctx, i) { >>> + cpumask_set_cpu(cpu, hctx->cpumask); >>> + hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; >>> + cpumask_set_cpu(cpu, hctx->tags->cpumask); >>> + } >>> + } >>> break; >>> default: >>> return NOTIFY_OK; >>> } >>> >>> - mutex_lock(&all_q_mutex); >>> - >>> - /* >>> - * We need to freeze and reinit all existing queues. Freezing >>> - * involves synchronous wait for an RCU grace period and doing it >>> - * one by one may take a long time. Start freezing all queues in >>> - * one swoop and then wait for the completions so that freezing can >>> - * take place in parallel. >>> - */ >>> - list_for_each_entry(q, &all_q_list, all_q_node) >>> - blk_mq_freeze_queue_start(q); >>> - list_for_each_entry(q, &all_q_list, all_q_node) { >>> - blk_mq_freeze_queue_wait(q); >>> - >>> - /* >>> - * timeout handler can't touch hw queue during the >>> - * reinitialization >>> - */ >>> - del_timer_sync(&q->timeout); >>> - } >>> - >>> - list_for_each_entry(q, &all_q_list, all_q_node) >>> - blk_mq_queue_reinit(q, &online_new); >>> - >>> - list_for_each_entry(q, &all_q_list, all_q_node) >>> - blk_mq_unfreeze_queue(q); >>> - >>> - mutex_unlock(&all_q_mutex); This change makes 'online_new' cpumask useless in blk_mq_queue_reinit_notify(), so you can remove it completely. >>> return NOTIFY_OK; >>> } >>> >>> >>> >>> >> With the patch I have collected some numbers, running some android monkey tests along with >> an app that does random hootplugging of CPUs. The numbers obtained are as below: >> >> CPU UP time(in micro seconds): >> default blk-mq: 58629 54207 61792 58458 59286 >> blk-mq with above patch: 2755 2588 3412 3239 3010 >> >> CPU DOWN time(in micros seconds): >> default blk-mq: 95990 103379 103686 93214 128274 >> blk-mq with above patch: 67178 49876 70027 61261 59656 >> > Could someone please review the patch and provide some feedback? > Even if the feedback is about the approach we are trying here or some > suggestions to move ahead in that direction. >>>>> Did you tried Tejun's percpu_ref patchset that I said in the last email? >>>>> (http://www.mail-archive.com/linux-kernel@xxxxxxxxxxxxxxx/msg984823.html) >>>>> >>>>> >>>> I have tried the suggested patch but did not see any improvements in cpu >>>> hotplug latencies >>>> >>> >>> >> >> > > > -- > Imran Khan > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a\nmember of the Code Aurora Forum, hosted by The Linux Foundation -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html