Use the previously introduced NVME wrapper library for the passthrough commands from the ANA prioritizer. Discard code duplicated from nvme-cli from the ana code itself. Furthermore, make additional cleanups in the ANA prioritizer: - don't use the same enum for priorities and error codes - use char* arrays for error messages and state names - return -1 prio to libmultipath for all error cases - check if a device is NVMe before trying ioctl - check for overflow in check_ana_state() - get_ana_info(): improve readability with is_anagrpid_const - priorities: PERSISTENT_LOSS state is worse than INACCESSIBLE and CHANGE Cc: lijie <lijie34@xxxxxxxxxx> Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> --- libmultipath/prioritizers/Makefile | 6 +- libmultipath/prioritizers/ana.c | 305 ++++++++++------------------- 2 files changed, 113 insertions(+), 198 deletions(-) diff --git a/libmultipath/prioritizers/Makefile b/libmultipath/prioritizers/Makefile index 15afaba3..4d80c20c 100644 --- a/libmultipath/prioritizers/Makefile +++ b/libmultipath/prioritizers/Makefile @@ -19,9 +19,13 @@ LIBS = \ libpriordac.so \ libprioweightedpath.so \ libpriopath_latency.so \ - libprioana.so \ libpriosysfs.so +ifneq ($(call check_file,/usr/include/linux/nvme_ioctl.h),0) + LIBS += libprioana.so + CFLAGS += -I../nvme +endif + all: $(LIBS) libprioalua.so: alua.o alua_rtpg.o diff --git a/libmultipath/prioritizers/ana.c b/libmultipath/prioritizers/ana.c index c5aaa5fb..88edb224 100644 --- a/libmultipath/prioritizers/ana.c +++ b/libmultipath/prioritizers/ana.c @@ -17,155 +17,91 @@ #include <sys/stat.h> #include <sys/types.h> #include <stdbool.h> +#include <libudev.h> #include "debug.h" +#include "nvme-lib.h" #include "prio.h" +#include "util.h" #include "structs.h" -#include "ana.h" enum { - ANA_PRIO_OPTIMIZED = 50, - ANA_PRIO_NONOPTIMIZED = 10, - ANA_PRIO_INACCESSIBLE = 5, - ANA_PRIO_PERSISTENT_LOSS = 1, - ANA_PRIO_CHANGE = 0, - ANA_PRIO_RESERVED = 0, - ANA_PRIO_GETCTRL_FAILED = -1, - ANA_PRIO_NOT_SUPPORTED = -2, - ANA_PRIO_GETANAS_FAILED = -3, - ANA_PRIO_GETANALOG_FAILED = -4, - ANA_PRIO_GETNSID_FAILED = -5, - ANA_PRIO_GETNS_FAILED = -6, - ANA_PRIO_NO_MEMORY = -7, - ANA_PRIO_NO_INFORMATION = -8, + ANA_ERR_GETCTRL_FAILED = 1, + ANA_ERR_NOT_NVME, + ANA_ERR_NOT_SUPPORTED, + ANA_ERR_GETANAS_OVERFLOW, + ANA_ERR_GETANAS_NOTFOUND, + ANA_ERR_GETANALOG_FAILED, + ANA_ERR_GETNSID_FAILED, + ANA_ERR_GETNS_FAILED, + ANA_ERR_NO_MEMORY, + ANA_ERR_NO_INFORMATION, }; -static const char * anas_string[] = { +static const char *ana_errmsg[] = { + [ANA_ERR_GETCTRL_FAILED] = "couldn't get ctrl info", + [ANA_ERR_NOT_NVME] = "not an NVMe device", + [ANA_ERR_NOT_SUPPORTED] = "ANA not supported", + [ANA_ERR_GETANAS_OVERFLOW] = "buffer overflow in ANA log", + [ANA_ERR_GETANAS_NOTFOUND] = "NSID or ANAGRPID not found", + [ANA_ERR_GETANALOG_FAILED] = "couldn't get ana log", + [ANA_ERR_GETNSID_FAILED] = "couldn't get NSID", + [ANA_ERR_GETNS_FAILED] = "couldn't get namespace info", + [ANA_ERR_NO_MEMORY] = "out of memory", + [ANA_ERR_NO_INFORMATION] = "invalid fd", +}; + +/* Use the implicit initialization: value 0 is "invalid" */ +static const int ana_prio [] = { + [NVME_ANA_OPTIMIZED] = 50, + [NVME_ANA_NONOPTIMIZED] = 10, + [NVME_ANA_INACCESSIBLE] = 5, + [NVME_ANA_PERSISTENT_LOSS] = 1, + [NVME_ANA_CHANGE] = 5, +}; + +static const char *anas_string[] = { [NVME_ANA_OPTIMIZED] = "ANA Optimized State", [NVME_ANA_NONOPTIMIZED] = "ANA Non-Optimized State", [NVME_ANA_INACCESSIBLE] = "ANA Inaccessible State", [NVME_ANA_PERSISTENT_LOSS] = "ANA Persistent Loss State", [NVME_ANA_CHANGE] = "ANA Change state", - [NVME_ANA_RESERVED] = "Invalid namespace group state!", }; static const char *aas_print_string(int rc) { rc &= 0xff; - - switch(rc) { - case NVME_ANA_OPTIMIZED: - case NVME_ANA_NONOPTIMIZED: - case NVME_ANA_INACCESSIBLE: - case NVME_ANA_PERSISTENT_LOSS: - case NVME_ANA_CHANGE: + if (rc >= 0 && rc < ARRAY_SIZE(anas_string) && + anas_string[rc] != NULL) return anas_string[rc]; - default: - return anas_string[NVME_ANA_RESERVED]; - } - - return anas_string[NVME_ANA_RESERVED]; -} - -static int nvme_get_nsid(int fd, unsigned *nsid) -{ - static struct stat nvme_stat; - int err = fstat(fd, &nvme_stat); - if (err < 0) - return 1; - - if (!S_ISBLK(nvme_stat.st_mode)) { - condlog(0, "Error: requesting namespace-id from non-block device\n"); - return 1; - } - - *nsid = ioctl(fd, NVME_IOCTL_ID); - return 0; -} - -static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd) -{ - return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd); -} - -int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo, - __u16 lsi, bool rae, __u32 data_len, void *data) -{ - struct nvme_admin_cmd cmd = { - .opcode = nvme_admin_get_log_page, - .nsid = nsid, - .addr = (__u64)(uintptr_t) data, - .data_len = data_len, - }; - __u32 numd = (data_len >> 2) - 1; - __u16 numdu = numd >> 16, numdl = numd & 0xffff; - - cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0); - if (lsp) - cmd.cdw10 |= lsp << 8; - - cmd.cdw11 = numdu | (lsi << 16); - cmd.cdw12 = lpo; - cmd.cdw13 = (lpo >> 32); - - return nvme_submit_admin_passthru(fd, &cmd); - -} - -int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data) -{ - struct nvme_admin_cmd cmd = { - .opcode = nvme_admin_identify, - .nsid = nsid, - .addr = (__u64)(uintptr_t) data, - .data_len = NVME_IDENTIFY_DATA_SIZE, - .cdw10 = cdw10, - .cdw11 = cdw11, - }; - - return nvme_submit_admin_passthru(fd, &cmd); -} - -int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data) -{ - return nvme_identify13(fd, nsid, cdw10, 0, data); -} -int nvme_identify_ctrl(int fd, void *data) -{ - return nvme_identify(fd, 0, NVME_ID_CNS_CTRL, data); -} - -int nvme_identify_ns(int fd, __u32 nsid, void *data) -{ - return nvme_identify(fd, nsid, NVME_ID_CNS_NS, data); -} - -int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo) -{ - __u64 lpo = 0; - - return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0, - true, ana_log_len, ana_log); + return "invalid ANA state"; } -static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log) +static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log, + size_t ana_log_len) { - int rc = ANA_PRIO_GETANAS_FAILED; void *base = ana_log; struct nvme_ana_rsp_hdr *hdr = base; struct nvme_ana_group_desc *ana_desc; - int offset = sizeof(struct nvme_ana_rsp_hdr); + size_t offset = sizeof(struct nvme_ana_rsp_hdr); __u32 nr_nsids; size_t nsid_buf_size; int i, j; for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) { ana_desc = base + offset; + + offset += sizeof(*ana_desc); + if (offset > ana_log_len) + return -ANA_ERR_GETANAS_OVERFLOW; + nr_nsids = le32_to_cpu(ana_desc->nnsids); nsid_buf_size = nr_nsids * sizeof(__le32); - offset += sizeof(*ana_desc); + offset += nsid_buf_size; + if (offset > ana_log_len) + return -ANA_ERR_GETANAS_OVERFLOW; for (j = 0; j < nr_nsids; j++) { if (nsid == le32_to_cpu(ana_desc->nsids[j])) @@ -173,12 +109,10 @@ static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log) } if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid)) - rc = ana_desc->state; + return ana_desc->state; - offset += nsid_buf_size; } - - return rc; + return -ANA_ERR_GETANAS_NOTFOUND; } int get_ana_info(struct path * pp, unsigned int timeout) @@ -189,104 +123,81 @@ int get_ana_info(struct path * pp, unsigned int timeout) struct nvme_id_ns ns; void *ana_log; size_t ana_log_len; + bool is_anagrpid_const; rc = nvme_identify_ctrl(pp->fd, &ctrl); - if (rc) - return ANA_PRIO_GETCTRL_FAILED; + if (rc < 0) { + log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl"); + return -ANA_ERR_GETCTRL_FAILED; + } if(!(ctrl.cmic & (1 << 3))) - return ANA_PRIO_NOT_SUPPORTED; - - rc = nvme_get_nsid(pp->fd, &nsid); - if (rc) - return ANA_PRIO_GETNSID_FAILED; + return -ANA_ERR_NOT_SUPPORTED; - rc = nvme_identify_ns(pp->fd, nsid, &ns); - if (rc) - return ANA_PRIO_GETNS_FAILED; + nsid = nvme_get_nsid(pp->fd); + if (nsid <= 0) { + log_nvme_errcode(rc, pp->dev, "nvme_get_nsid"); + return -ANA_ERR_GETNSID_FAILED; + } + is_anagrpid_const = ctrl.anacap & (1 << 6); + /* + * Code copied from nvme-cli/nvme.c. We don't need to allocate an + * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most + * in one ANA group. + */ ana_log_len = sizeof(struct nvme_ana_rsp_hdr) + - le32_to_cpu(ctrl.nanagrpid) * sizeof(struct nvme_ana_group_desc); - if (!(ctrl.anacap & (1 << 6))) + le32_to_cpu(ctrl.nanagrpid) + * sizeof(struct nvme_ana_group_desc); + + if (is_anagrpid_const) { + rc = nvme_identify_ns(pp->fd, nsid, 0, &ns); + if (rc) { + log_nvme_errcode(rc, pp->dev, "nvme_identify_ns"); + return -ANA_ERR_GETNS_FAILED; + } + } else ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32); ana_log = malloc(ana_log_len); if (!ana_log) - return ANA_PRIO_NO_MEMORY; - + return -ANA_ERR_NO_MEMORY; + pthread_cleanup_push(free, ana_log); rc = nvme_ana_log(pp->fd, ana_log, ana_log_len, - (ctrl.anacap & (1 << 6)) ? NVME_ANA_LOG_RGO : 0); + is_anagrpid_const ? NVME_ANA_LOG_RGO : 0); if (rc) { - free(ana_log); - return ANA_PRIO_GETANALOG_FAILED; - } - - rc = get_ana_state(nsid, le32_to_cpu(ns.anagrpid), ana_log); - if (rc < 0){ - free(ana_log); - return ANA_PRIO_GETANAS_FAILED; - } - - free(ana_log); - condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc, aas_print_string(rc)); - + log_nvme_errcode(rc, pp->dev, "nvme_ana_log"); + rc = -ANA_ERR_GETANALOG_FAILED; + } else + rc = get_ana_state(nsid, + is_anagrpid_const ? + le32_to_cpu(ns.anagrpid) : 0, + ana_log, ana_log_len); + pthread_cleanup_pop(1); + if (rc >= 0) + condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc, + aas_print_string(rc)); return rc; } -int getprio(struct path * pp, char * args, unsigned int timeout) +int getprio(struct path *pp, char *args, unsigned int timeout) { int rc; if (pp->fd < 0) - return ANA_PRIO_NO_INFORMATION; - - rc = get_ana_info(pp, timeout); - if (rc >= 0) { - rc &= 0x0f; - switch(rc) { - case NVME_ANA_OPTIMIZED: - rc = ANA_PRIO_OPTIMIZED; - break; - case NVME_ANA_NONOPTIMIZED: - rc = ANA_PRIO_NONOPTIMIZED; - break; - case NVME_ANA_INACCESSIBLE: - rc = ANA_PRIO_INACCESSIBLE; - break; - case NVME_ANA_PERSISTENT_LOSS: - rc = ANA_PRIO_PERSISTENT_LOSS; - break; - case NVME_ANA_CHANGE: - rc = ANA_PRIO_CHANGE; - break; - default: - rc = ANA_PRIO_RESERVED; - } - } else { - switch(rc) { - case ANA_PRIO_GETCTRL_FAILED: - condlog(0, "%s: couldn't get ctrl info", pp->dev); - break; - case ANA_PRIO_NOT_SUPPORTED: - condlog(0, "%s: ana not supported", pp->dev); - break; - case ANA_PRIO_GETANAS_FAILED: - condlog(0, "%s: couldn't get ana state", pp->dev); - break; - case ANA_PRIO_GETANALOG_FAILED: - condlog(0, "%s: couldn't get ana log", pp->dev); - break; - case ANA_PRIO_GETNS_FAILED: - condlog(0, "%s: couldn't get namespace", pp->dev); - break; - case ANA_PRIO_GETNSID_FAILED: - condlog(0, "%s: couldn't get namespace id", pp->dev); - break; - case ANA_PRIO_NO_MEMORY: - condlog(0, "%s: couldn't alloc memory", pp->dev); - break; - } + rc = -ANA_ERR_NO_INFORMATION; + else if (udev_device_get_parent_with_subsystem_devtype(pp->udev, + "nvme", NULL) + == NULL) + rc = -ANA_ERR_NOT_NVME; + else { + rc = get_ana_info(pp, timeout); + if (rc >= 0 && rc < ARRAY_SIZE(ana_prio) && ana_prio[rc] != 0) + return ana_prio[rc]; } - return rc; + if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg)) + condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]); + else + condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc); + return -1; } - -- 2.19.2 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel