[RFC PATCH 2/2] null_blk: add support weighted round robin submition queue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Weiping Zhang <zhangweiping@xxxxxxxxxxxxxx>
---
 drivers/block/null_blk.h      |   7 +
 drivers/block/null_blk_main.c | 294 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 288 insertions(+), 13 deletions(-)

diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 34b22d6523ba..aa53c4b6de49 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -12,6 +12,7 @@
 
 struct nullb_cmd {
 	struct list_head list;
+	struct list_head wrr_node;
 	struct llist_node ll_list;
 	struct __call_single_data csd;
 	struct request *rq;
@@ -23,6 +24,8 @@ struct nullb_cmd {
 };
 
 struct nullb_queue {
+	spinlock_t wrr_lock;
+	struct list_head wrr_head;
 	unsigned long *tag_map;
 	wait_queue_head_t wait;
 	unsigned int queue_depth;
@@ -83,6 +86,10 @@ struct nullb {
 	struct nullb_queue *queues;
 	unsigned int nr_queues;
 	char disk_name[DISK_NAME_LEN];
+
+	struct task_struct *wrr_thread;
+	atomic_long_t wrrd_inflight;
+	wait_queue_head_t wrrd_wait;
 };
 
 #ifdef CONFIG_BLK_DEV_ZONED
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 447d635c79a2..100fc0e13036 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -4,6 +4,8 @@
  * Shaohua Li <shli@xxxxxx>
  */
 #include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/blk-cgroup.h>
 
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
@@ -75,6 +77,7 @@ enum {
 	NULL_IRQ_NONE		= 0,
 	NULL_IRQ_SOFTIRQ	= 1,
 	NULL_IRQ_TIMER		= 2,
+	NULL_IRQ_WRR		= 3,
 };
 
 enum {
@@ -87,10 +90,23 @@ static int g_no_sched;
 module_param_named(no_sched, g_no_sched, int, 0444);
 MODULE_PARM_DESC(no_sched, "No io scheduler");
 
+static int g_tagset_nr_maps = 1;
 static int g_submit_queues = 1;
 module_param_named(submit_queues, g_submit_queues, int, 0444);
 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
 
+#define NULLB_SUBMIT_QUEUE(attr, count) \
+static int g_submit_queues_##attr = count; \
+module_param_named(submit_queues_##attr, g_submit_queues_##attr, int, 0444); \
+MODULE_PARM_DESC(submit_queues_##attr, "Number of " #attr " submission queues");
+
+NULLB_SUBMIT_QUEUE(default, 1)
+NULLB_SUBMIT_QUEUE(read, 0)
+NULLB_SUBMIT_QUEUE(poll, 0)
+NULLB_SUBMIT_QUEUE(wrr_low, 0)
+NULLB_SUBMIT_QUEUE(wrr_medium, 0)
+NULLB_SUBMIT_QUEUE(wrr_high, 0)
+
 static int g_home_node = NUMA_NO_NODE;
 module_param_named(home_node, g_home_node, int, 0444);
 MODULE_PARM_DESC(home_node, "Home node for the device");
@@ -158,7 +174,7 @@ static int g_irqmode = NULL_IRQ_SOFTIRQ;
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
 {
 	return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
-					NULL_IRQ_TIMER);
+					NULL_IRQ_WRR);
 }
 
 static const struct kernel_param_ops null_irqmode_param_ops = {
@@ -643,6 +659,22 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
 	hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
 }
 
+static void null_cmd_wrr_insert(struct nullb_cmd *cmd)
+{
+	struct nullb_queue *nq = cmd->nq;
+	struct nullb *nullb = nq->dev->nullb;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&cmd->wrr_node);
+	spin_lock_irqsave(&nq->wrr_lock, flags);
+	list_add_tail(&cmd->wrr_node, &nq->wrr_head);
+	spin_unlock_irqrestore(&nq->wrr_lock, flags);
+
+	/* wake up wrr_thread if needed */
+	if (atomic_long_inc_return(&nullb->wrrd_inflight) == 1)
+		wake_up_interruptible(&nullb->wrrd_wait);
+}
+
 static void null_complete_rq(struct request *rq)
 {
 	end_cmd(blk_mq_rq_to_pdu(rq));
@@ -1236,6 +1268,9 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
 	case NULL_IRQ_TIMER:
 		null_cmd_end_timer(cmd);
 		break;
+	case NULL_IRQ_WRR:
+		null_cmd_wrr_insert(cmd);
+		break;
 	}
 	return BLK_STS_OK;
 }
@@ -1351,10 +1386,64 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return null_handle_cmd(cmd);
 }
 
+static inline int null_hctx_nr_queue(int type)
+{
+	int ret;
+
+	switch (type) {
+	case HCTX_TYPE_DEFAULT:
+		ret = g_submit_queues_default;
+		break;
+	case HCTX_TYPE_READ:
+		ret = g_submit_queues_read;
+		break;
+	case HCTX_TYPE_POLL:
+		ret = g_submit_queues_poll;
+		break;
+	case HCTX_TYPE_WRR_LOW:
+		ret = g_submit_queues_wrr_low;
+		break;
+	case HCTX_TYPE_WRR_MEDIUM:
+		ret = g_submit_queues_wrr_medium;
+		break;
+	case HCTX_TYPE_WRR_HIGH:
+		ret = g_submit_queues_wrr_high;
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static int null_map_queues(struct blk_mq_tag_set *set)
+{
+	int i, offset;
+
+	for (i = 0, offset = 0; i < set->nr_maps; i++) {
+		struct blk_mq_queue_map *map = &set->map[i];
+
+		map->nr_queues = null_hctx_nr_queue(i);
+
+		if (!map->nr_queues) {
+			BUG_ON(i == HCTX_TYPE_DEFAULT);
+			continue;
+		}
+
+		map->queue_offset = offset;
+		blk_mq_map_queues(map);
+		offset += map->nr_queues;
+	}
+
+	return 0;
+}
+
 static const struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
 	.complete	= null_complete_rq,
 	.timeout	= null_timeout_rq,
+	.map_queues	= null_map_queues,
 };
 
 static void cleanup_queue(struct nullb_queue *nq)
@@ -1397,6 +1486,9 @@ static void null_del_dev(struct nullb *nullb)
 	cleanup_queues(nullb);
 	if (null_cache_active(nullb))
 		null_free_device_storage(nullb->dev, true);
+
+	if (dev->irqmode == NULL_IRQ_WRR)
+		kthread_stop(nullb->wrr_thread);
 	kfree(nullb);
 	dev->nullb = NULL;
 }
@@ -1435,6 +1527,8 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
 	init_waitqueue_head(&nq->wait);
 	nq->queue_depth = nullb->queue_depth;
 	nq->dev = nullb->dev;
+	INIT_LIST_HEAD(&nq->wrr_head);
+	spin_lock_init(&nq->wrr_lock);
 }
 
 static void null_init_queues(struct nullb *nullb)
@@ -1549,6 +1643,7 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
 						g_submit_queues;
 	set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
 						g_hw_queue_depth;
+	set->nr_maps = g_tagset_nr_maps;
 	set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
 	set->cmd_size	= sizeof(struct nullb_cmd);
 	set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -1576,7 +1671,7 @@ static void null_validate_conf(struct nullb_device *dev)
 		dev->submit_queues = 1;
 
 	dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
-	dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+	dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_WRR);
 
 	/* Do memory allocation, so set blocking */
 	if (dev->memory_backed)
@@ -1616,6 +1711,72 @@ static bool null_setup_fault(void)
 	return true;
 }
 
+static inline void null_wrr_handle_map(struct nullb *nullb,
+				struct blk_mq_queue_map *map, int batch)
+{
+	int i, nr;
+	struct nullb_queue *nq;
+	struct nullb_cmd *cmd,*tmp;
+	unsigned long flags;
+
+	for (i = 0; i < map->nr_queues; i++) {
+		nq = &nullb->queues[i + map->queue_offset];
+		nr = batch;
+		spin_lock_irqsave(&nq->wrr_lock, flags);
+		list_for_each_entry_safe(cmd, tmp, &nq->wrr_head, wrr_node) {
+			list_del(&cmd->wrr_node);
+			blk_mq_end_request(cmd->rq, cmd->error);
+			atomic_long_dec(&nullb->wrrd_inflight);
+			if (nr-- == 0)
+				break;
+		}
+		spin_unlock_irqrestore(&nq->wrr_lock, flags);
+	}
+}
+
+static int null_wrr_thread(void *data)
+{
+	struct nullb *nullb = (struct nullb *)data;
+	struct blk_mq_tag_set *set = nullb->tag_set;
+	struct blk_mq_queue_map	*map;
+	DEFINE_WAIT(wait);
+
+	while (1) {
+		if (kthread_should_stop())
+			goto out;
+
+		cond_resched();
+
+		/* handle each hardware queue in weighted round robin way */
+
+		map = &set->map[HCTX_TYPE_WRR_HIGH];
+		null_wrr_handle_map(nullb, map, 8);
+
+		map = &set->map[HCTX_TYPE_WRR_MEDIUM];
+		null_wrr_handle_map(nullb, map, 4);
+
+		map = &set->map[HCTX_TYPE_WRR_LOW];
+		null_wrr_handle_map(nullb, map, 1);
+
+		map = &set->map[HCTX_TYPE_POLL];
+		null_wrr_handle_map(nullb, map, 1);
+
+		map = &set->map[HCTX_TYPE_READ];
+		null_wrr_handle_map(nullb, map, 1);
+
+		map = &set->map[HCTX_TYPE_DEFAULT];
+		null_wrr_handle_map(nullb, map, 1);
+
+		prepare_to_wait(&nullb->wrrd_wait, &wait, TASK_INTERRUPTIBLE);
+		if (0 == atomic_long_read(&nullb->wrrd_inflight))
+			schedule();
+		finish_wait(&nullb->wrrd_wait, &wait);
+	}
+
+out:
+	return 0;
+}
+
 static int null_add_dev(struct nullb_device *dev)
 {
 	struct nullb *nullb;
@@ -1706,15 +1867,27 @@ static int null_add_dev(struct nullb_device *dev)
 
 	sprintf(nullb->disk_name, "nullb%d", nullb->index);
 
+	if (dev->irqmode == NULL_IRQ_WRR) {
+		init_waitqueue_head(&nullb->wrrd_wait);
+		atomic_long_set(&nullb->wrrd_inflight, 0);
+		nullb->wrr_thread = kthread_run(null_wrr_thread, (void *)nullb,
+			"wrrd_%s", nullb->disk_name);
+		if (!nullb->wrr_thread)
+			goto out_cleanup_zone;
+	}
+
 	rv = null_gendisk_register(nullb);
 	if (rv)
-		goto out_cleanup_zone;
+		goto out_cleanup_wrrd;
 
 	mutex_lock(&lock);
 	list_add_tail(&nullb->list, &nullb_list);
 	mutex_unlock(&lock);
 
 	return 0;
+out_cleanup_wrrd:
+	if (dev->irqmode == NULL_IRQ_WRR)
+		kthread_stop(nullb->wrr_thread);
 out_cleanup_zone:
 	if (dev->zoned)
 		null_zone_exit(dev);
@@ -1731,6 +1904,106 @@ static int null_add_dev(struct nullb_device *dev)
 	return rv;
 }
 
+static int null_verify_queues(void)
+{
+	int queues, nr;
+
+	if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+		if (g_submit_queues != nr_online_nodes) {
+			pr_warn("null_blk: submit_queues param is set to %u.\n",
+							nr_online_nodes);
+			g_submit_queues = nr_online_nodes;
+		}
+	} else if (g_submit_queues > nr_cpu_ids)
+		g_submit_queues = nr_cpu_ids;
+	else if (g_submit_queues <= 0)
+		g_submit_queues = 1;
+
+	/* at least leave one queue for default */
+	g_submit_queues_default = 1;
+	queues = g_submit_queues - 1;
+	if (queues == 0)
+		goto def;
+
+	/* read queues */
+	nr = g_submit_queues_read;
+	if (nr < 0 || nr > queues) {
+		pr_warn("null_blk: invalid read queue count\n");
+		return -EINVAL;
+	}
+	g_tagset_nr_maps++;
+	queues -= nr;
+	if (queues == 0)
+		goto read;
+
+	/* poll queues */
+	nr = g_submit_queues_poll;
+	if (nr < 0 || nr > queues) {
+		pr_warn("null_blk: invalid poll queue count\n");
+		return -EINVAL;
+	}
+	g_tagset_nr_maps++;
+	queues -= nr;
+	if (queues == 0)
+		goto poll;
+
+	/* wrr_low queues */
+	nr = g_submit_queues_wrr_low;
+	if (nr < 0 || nr > queues) {
+		pr_warn("null_blk: invalid wrr_low queue count\n");
+		return -EINVAL;
+	}
+	g_tagset_nr_maps++;
+	queues -= nr;
+	if (queues == 0)
+		goto wrr_low;
+
+	/* wrr_medium queues */
+	nr = g_submit_queues_wrr_medium;
+	if (nr < 0 || nr > queues) {
+		pr_warn("null_blk: invalid wrr_medium queue count\n");
+		return -EINVAL;
+	}
+	g_tagset_nr_maps++;
+	queues -= nr;
+	if (queues == 0)
+		goto wrr_medium;
+
+	/* wrr_high queues */
+	nr = g_submit_queues_wrr_high;
+	if (nr < 0 || nr > queues) {
+		pr_warn("null_blk: invalid wrr_medium queue count\n");
+		return -EINVAL;
+	}
+	g_tagset_nr_maps++;
+	queues -= nr;
+
+	/* add all others queue to default group */
+	g_submit_queues_default += queues;
+
+	goto out;
+
+def:
+	g_submit_queues_read = 0;
+read:
+	g_submit_queues_poll = 0;
+poll:
+	g_submit_queues_wrr_low = 0;
+wrr_low:
+	g_submit_queues_wrr_medium = 0;
+wrr_medium:
+	g_submit_queues_wrr_high = 0;
+out:
+	pr_info("null_blk: total submit queues:%d, nr_map:%d, default:%d, "
+		"read:%d, poll:%d, wrr_low:%d, wrr_medium:%d wrr_high:%d\n",
+		g_submit_queues, g_tagset_nr_maps, g_submit_queues_default,
+		g_submit_queues_read, g_submit_queues_poll,
+		g_submit_queues_wrr_low, g_submit_queues_wrr_medium,
+		g_submit_queues_wrr_high);
+
+	return 0;
+}
+
 static int __init null_init(void)
 {
 	int ret = 0;
@@ -1758,16 +2031,11 @@ static int __init null_init(void)
 		pr_err("null_blk: legacy IO path no longer available\n");
 		return -EINVAL;
 	}
-	if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
-		if (g_submit_queues != nr_online_nodes) {
-			pr_warn("null_blk: submit_queues param is set to %u.\n",
-							nr_online_nodes);
-			g_submit_queues = nr_online_nodes;
-		}
-	} else if (g_submit_queues > nr_cpu_ids)
-		g_submit_queues = nr_cpu_ids;
-	else if (g_submit_queues <= 0)
-		g_submit_queues = 1;
+
+	if (null_verify_queues()){
+		pr_err("null_blk: invalid submit queue parameter\n");
+		return -EINVAL;
+	}
 
 	if (g_queue_mode == NULL_Q_MQ && shared_tags) {
 		ret = null_init_tag_set(NULL, &tag_set);
-- 
2.14.1




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux