blk_mq_tags/requests of specific hardware queue are mostly used in specific cpus, which might not be in the same numa node as disk. For example, a nvme card is in node 0. half hardware queue will be used by node 0, the other node 1. Signed-off-by: Shaohua Li <shli@xxxxxx> --- block/blk-mq.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c3400b5..90a37ba 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1594,9 +1594,13 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags; unsigned int i, j, entries_per_page, max_order = 4; size_t rq_size, left; + int node; + + node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + if (node == NUMA_NO_NODE) + node = set->numa_node; - tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, - set->numa_node, + tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; @@ -1605,7 +1609,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - set->numa_node); + node); if (!tags->rqs) { blk_mq_free_tags(tags); return NULL; @@ -1629,7 +1633,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, this_order--; do { - page = alloc_pages_node(set->numa_node, + page = alloc_pages_node(node, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, this_order); if (page) @@ -1660,7 +1664,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, tags->rqs[i], hctx_idx, i, - set->numa_node)) { + node)) { tags->rqs[i] = NULL; goto fail; } -- 2.9.3 -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html