[PATCH] nvme-multipath: round-robin I/O path scheduling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A multipath system might provide more than one optimized path
to a given namespace. This patch implements a simple round-robin
I/O path scheduler to allow the system utilize all paths.

Signed-off-by: Hannes Reinecke <hare@xxxxxxxx>
---
 drivers/nvme/host/core.c      |  6 ++++
 drivers/nvme/host/multipath.c | 76 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/nvme/host/nvme.h      |  8 +++++
 3 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index deb047514408..03d084649fdc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2189,6 +2189,9 @@ static struct attribute *nvme_subsys_attrs[] = {
 	&subsys_attr_serial.attr,
 	&subsys_attr_firmware_rev.attr,
 	&subsys_attr_subsysnqn.attr,
+#ifdef CONFIG_NVME_MULTIPATH
+	&subsys_attr_iopolicy.attr,
+#endif
 	NULL,
 };
 
@@ -2241,6 +2244,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
 	subsys->vendor_id = le16_to_cpu(id->vid);
 	subsys->cmic = id->cmic;
+#ifdef CONFIG_NVME_MULTIPATH
+	subsys->iopolicy = NVME_IOPOLICY_NUMA;
+#endif
 
 	subsys->dev.class = nvme_subsys_class;
 	subsys->dev.release = nvme_release_subsystem;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 8e03cda770c5..e4806edc6f91 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -141,7 +141,10 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
 		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
 			continue;
 
-		distance = node_distance(node, ns->ctrl->numa_node);
+		if (head->subsys->iopolicy == NVME_IOPOLICY_NUMA)
+			distance = node_distance(node, ns->ctrl->numa_node);
+		else
+			distance = LOCAL_DISTANCE;
 
 		switch (ns->ana_state) {
 		case NVME_ANA_OPTIMIZED:
@@ -174,14 +177,44 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
 		ns->ana_state == NVME_ANA_OPTIMIZED;
 }
 
+inline struct nvme_ns *__nvme_next_path(struct nvme_ns_head *head, int node,
+					struct nvme_ns *old)
+{
+	struct nvme_ns *ns, *found = NULL;
+
+	do {
+		ns = list_next_or_null_rcu(&head->list, &old->siblings,
+					   struct nvme_ns, siblings);
+		if (!ns)
+			ns = list_first_or_null_rcu(&head->list, struct nvme_ns,
+						    siblings);
+
+		if (ns && nvme_path_is_optimized(ns)) {
+			found = ns;
+			break;
+		}
+	} while (ns != old);
+
+	if (found)
+		rcu_assign_pointer(head->current_path[node], found);
+
+	return found;
+}
+
 inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
 {
 	int node = numa_node_id();
 	struct nvme_ns *ns;
 
 	ns = srcu_dereference(head->current_path[node], &head->srcu);
+retry:
 	if (unlikely(!ns || !nvme_path_is_optimized(ns)))
 		ns = __nvme_find_path(head, node);
+	else if (head->subsys->iopolicy == NVME_IOPOLICY_RR) {
+		ns = __nvme_next_path(head, node, ns);
+		if (!ns)
+			goto retry;
+	}
 	return ns;
 }
 
@@ -486,6 +519,47 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
 	cancel_work_sync(&ctrl->ana_work);
 }
 
+static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvme_subsystem *subsys =
+		container_of(dev, struct nvme_subsystem, dev);
+	switch (subsys->iopolicy) {
+	case NVME_IOPOLICY_NONE:
+		return sprintf(buf, "none");
+	case NVME_IOPOLICY_NUMA:
+		return sprintf(buf, "numa");
+	case NVME_IOPOLICY_RR:
+		return sprintf(buf, "round-robin");
+	default:
+		return sprintf(buf, "<unknown>");
+	}
+}
+
+#define SUBSYS_ATTR_RW(_name, _mode, _show, _store)  \
+	struct device_attribute subsys_attr_##_name =	\
+		__ATTR(_name, _mode, _show, _store)
+
+static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	unsigned int iopolicy = NVME_IOPOLICY_UNKNOWN;
+
+	if (!strncmp(buf, "none", 4))
+		iopolicy = NVME_IOPOLICY_NONE;
+	else if (!strncmp(buf, "numa", 4))
+		iopolicy = NVME_IOPOLICY_NUMA;
+	else if (!strncmp(buf, "round-robin", 11))
+		iopolicy = NVME_IOPOLICY_RR;
+
+	if (iopolicy == NVME_IOPOLICY_UNKNOWN)
+		return -EINVAL;
+
+	return count;
+}
+SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
+		      nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
+
 static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 27663ce3044e..edd7602b98eb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -261,6 +261,13 @@ struct nvme_subsystem {
 	u8			cmic;
 	u16			vendor_id;
 	struct ida		ns_ida;
+#ifdef CONFIG_NVME_MULTIPATH
+#define NVME_IOPOLICY_UNKNOWN 0
+#define NVME_IOPOLICY_NONE 1
+#define NVME_IOPOLICY_NUMA 2
+#define NVME_IOPOLICY_RR 3
+	unsigned int		iopolicy;
+#endif
 };
 
 /*
@@ -491,6 +498,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 
 extern struct device_attribute dev_attr_ana_grpid;
 extern struct device_attribute dev_attr_ana_state;
+extern struct device_attribute subsys_attr_iopolicy;
 
 #else
 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
-- 
2.16.4




[Index of Archives]     [Linux Filesystems]     [Linux SCSI]     [Linux RAID]     [Git]     [Kernel Newbies]     [Linux Newbie]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Samba]     [Device Mapper]

  Powered by Linux