A multipath system might provide more than one optimized path to a given namespace. This patch implements a simple round-robin I/O path scheduler to allow the system utilize all paths. Signed-off-by: Hannes Reinecke <hare@xxxxxxxx> --- drivers/nvme/host/core.c | 6 ++++ drivers/nvme/host/multipath.c | 76 ++++++++++++++++++++++++++++++++++++++++++- drivers/nvme/host/nvme.h | 8 +++++ 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index deb047514408..03d084649fdc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2189,6 +2189,9 @@ static struct attribute *nvme_subsys_attrs[] = { &subsys_attr_serial.attr, &subsys_attr_firmware_rev.attr, &subsys_attr_subsysnqn.attr, +#ifdef CONFIG_NVME_MULTIPATH + &subsys_attr_iopolicy.attr, +#endif NULL, }; @@ -2241,6 +2244,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; +#ifdef CONFIG_NVME_MULTIPATH + subsys->iopolicy = NVME_IOPOLICY_NUMA; +#endif subsys->dev.class = nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 8e03cda770c5..e4806edc6f91 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -141,7 +141,10 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; - distance = node_distance(node, ns->ctrl->numa_node); + if (head->subsys->iopolicy == NVME_IOPOLICY_NUMA) + distance = node_distance(node, ns->ctrl->numa_node); + else + distance = LOCAL_DISTANCE; switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: @@ -174,14 +177,44 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) ns->ana_state == NVME_ANA_OPTIMIZED; } +inline struct nvme_ns *__nvme_next_path(struct nvme_ns_head *head, int node, + struct nvme_ns *old) +{ + struct nvme_ns *ns, *found = NULL; + + do { + ns = list_next_or_null_rcu(&head->list, &old->siblings, + struct nvme_ns, siblings); + if (!ns) + ns = list_first_or_null_rcu(&head->list, struct nvme_ns, + siblings); + + if (ns && nvme_path_is_optimized(ns)) { + found = ns; + break; + } + } while (ns != old); + + if (found) + rcu_assign_pointer(head->current_path[node], found); + + return found; +} + inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) { int node = numa_node_id(); struct nvme_ns *ns; ns = srcu_dereference(head->current_path[node], &head->srcu); +retry: if (unlikely(!ns || !nvme_path_is_optimized(ns))) ns = __nvme_find_path(head, node); + else if (head->subsys->iopolicy == NVME_IOPOLICY_RR) { + ns = __nvme_next_path(head, node, ns); + if (!ns) + goto retry; + } return ns; } @@ -486,6 +519,47 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl) cancel_work_sync(&ctrl->ana_work); } +static ssize_t nvme_subsys_iopolicy_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + switch (subsys->iopolicy) { + case NVME_IOPOLICY_NONE: + return sprintf(buf, "none"); + case NVME_IOPOLICY_NUMA: + return sprintf(buf, "numa"); + case NVME_IOPOLICY_RR: + return sprintf(buf, "round-robin"); + default: + return sprintf(buf, "<unknown>"); + } +} + +#define SUBSYS_ATTR_RW(_name, _mode, _show, _store) \ + struct device_attribute subsys_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +static ssize_t nvme_subsys_iopolicy_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned int iopolicy = NVME_IOPOLICY_UNKNOWN; + + if (!strncmp(buf, "none", 4)) + iopolicy = NVME_IOPOLICY_NONE; + else if (!strncmp(buf, "numa", 4)) + iopolicy = NVME_IOPOLICY_NUMA; + else if (!strncmp(buf, "round-robin", 11)) + iopolicy = NVME_IOPOLICY_RR; + + if (iopolicy == NVME_IOPOLICY_UNKNOWN) + return -EINVAL; + + return count; +} +SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR, + nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store); + static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 27663ce3044e..edd7602b98eb 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -261,6 +261,13 @@ struct nvme_subsystem { u8 cmic; u16 vendor_id; struct ida ns_ida; +#ifdef CONFIG_NVME_MULTIPATH +#define NVME_IOPOLICY_UNKNOWN 0 +#define NVME_IOPOLICY_NONE 1 +#define NVME_IOPOLICY_NUMA 2 +#define NVME_IOPOLICY_RR 3 + unsigned int iopolicy; +#endif }; /* @@ -491,6 +498,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute subsys_attr_iopolicy; #else static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) -- 2.16.4