1. The SCSI-to-NVMe translations have been removed in the patch "nvme: Remove SCSI translations" in the linux-nvme, so the native NVMe Ioctl command should be supported in the multipath-tools. 2. In the prioritizers/path_latency.c, modify the func do_readsector0(): send a native NVMe Read Ioctl command to the nvme device, and send a SG Read Ioctl command to the scsi device. 3. In the checkers, delete the file tur.c and create the new file ping.c: ping.c can support the native NVMe Keep Alive Ioctl command to the nvme device, and can support the SG TUR Ioctl command to the scsi device. Signed-off-by: Yang Feng <philip.yang@xxxxxxxxxx> --- libmultipath/checkers.c | 7 + libmultipath/checkers.h | 6 +- libmultipath/checkers/Makefile | 6 +- libmultipath/checkers/emc_clariion.c | 4 +- libmultipath/checkers/libsg.c | 94 ------- libmultipath/checkers/libsg.h | 9 - libmultipath/checkers/ping.c | 453 +++++++++++++++++++++++++++++++ libmultipath/checkers/readsector0.c | 4 +- libmultipath/checkers/tur.c | 427 ----------------------------- libmultipath/checkers/tur.h | 8 - libmultipath/defaults.h | 2 +- libmultipath/discovery.c | 1 + libmultipath/hwtable.c | 2 +- libmultipath/libnvme.c | 130 +++++++++ libmultipath/libnvme.h | 10 + libmultipath/libsg.c | 113 ++++++++ libmultipath/libsg.h | 13 + libmultipath/prioritizers/Makefile | 2 +- libmultipath/prioritizers/path_latency.c | 58 +--- libmultipath/propsel.c | 2 +- multipath/multipath.conf.5 | 4 +- 21 files changed, 754 insertions(+), 601 deletions(-) delete mode 100644 libmultipath/checkers/libsg.c delete mode 100644 libmultipath/checkers/libsg.h create mode 100644 libmultipath/checkers/ping.c delete mode 100644 libmultipath/checkers/tur.c delete mode 100644 libmultipath/checkers/tur.h create mode 100644 libmultipath/libnvme.c create mode 100644 libmultipath/libnvme.h create mode 100644 libmultipath/libsg.c create mode 100644 libmultipath/libsg.h diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c index 05e024f..00fbd6e 100644 --- a/libmultipath/checkers.c +++ b/libmultipath/checkers.c @@ -162,6 +162,13 @@ void checker_set_fd (struct checker * c, int fd) c->fd = fd; } +void checker_set_dev(struct checker *c, char *dev) +{ + if (!c) + return; + strncpy(c->dev, dev, strlen(dev)+1); +} + void checker_set_sync (struct checker * c) { if (!c) diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h index 1d225de..506dd4c 100644 --- a/libmultipath/checkers.h +++ b/libmultipath/checkers.h @@ -79,7 +79,7 @@ enum path_check_state { }; #define DIRECTIO "directio" -#define TUR "tur" +#define PING "ping" #define HP_SW "hp_sw" #define RDAC "rdac" #define EMC_CLARIION "emc_clariion" @@ -97,6 +97,8 @@ enum path_check_state { #define CHECKER_DEV_LEN 256 #define LIB_CHECKER_NAMELEN 256 +#define FILE_NAME_SIZE 256 + struct checker { struct list_head node; void *handle; @@ -107,6 +109,7 @@ struct checker { int disable; char name[CHECKER_NAME_LEN]; char message[CHECKER_MSG_LEN]; /* comm with callers */ + char dev[FILE_NAME_SIZE]; void * context; /* store for persistent data */ void ** mpcontext; /* store for persistent data shared multipath-wide. Use MALLOC if @@ -132,6 +135,7 @@ void checker_reset (struct checker *); void checker_set_sync (struct checker *); void checker_set_async (struct checker *); void checker_set_fd (struct checker *, int); +void checker_set_dev(struct checker *c, char *dev); void checker_enable (struct checker *); void checker_disable (struct checker *); void checker_repair (struct checker *); diff --git a/libmultipath/checkers/Makefile b/libmultipath/checkers/Makefile index bce6b8b..3ab04ef 100644 --- a/libmultipath/checkers/Makefile +++ b/libmultipath/checkers/Makefile @@ -9,7 +9,7 @@ CFLAGS += $(LIB_CFLAGS) -I.. LIBS= \ libcheckcciss_tur.so \ libcheckreadsector0.so \ - libchecktur.so \ + libcheckping.so \ libcheckdirectio.so \ libcheckemc_clariion.so \ libcheckhp_sw.so \ @@ -24,10 +24,10 @@ all: $(LIBS) libcheckrbd.so: rbd.o $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lrados -ludev -libcheckdirectio.so: libsg.o directio.o +libcheckdirectio.so: ../libsg.o ../libnvme.o directio.o $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -laio -libcheck%.so: libsg.o %.o +libcheck%.so: ../libsg.o ../libnvme.o %.o $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ install: diff --git a/libmultipath/checkers/emc_clariion.c b/libmultipath/checkers/emc_clariion.c index 9c1ffed..12c1e3e 100644 --- a/libmultipath/checkers/emc_clariion.c +++ b/libmultipath/checkers/emc_clariion.c @@ -12,7 +12,7 @@ #include <errno.h> #include "../libmultipath/sg_include.h" -#include "libsg.h" +#include "../libmultipath/libsg.h" #include "checkers.h" #include "debug.h" #include "memory.h" @@ -21,6 +21,8 @@ #define INQUIRY_CMDLEN 6 #define HEAVY_CHECK_COUNT 10 +#define SENSE_BUFF_LEN 32 + /* * Mechanism to track CLARiiON inactive snapshot LUs. * This is done so that we can fail passive paths diff --git a/libmultipath/checkers/libsg.c b/libmultipath/checkers/libsg.c deleted file mode 100644 index 958ea92..0000000 --- a/libmultipath/checkers/libsg.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Christophe Varoqui - */ -#include <string.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <sys/stat.h> - -#include "checkers.h" -#include "libsg.h" -#include "../libmultipath/sg_include.h" - -int -sg_read (int sg_fd, unsigned char * buff, int buff_len, - unsigned char * sense, int sense_len, unsigned int timeout) -{ - /* defaults */ - int blocks; - long long start_block = 0; - int bs = 512; - int cdbsz = 10; - - unsigned char rdCmd[cdbsz]; - unsigned char *sbb = sense; - struct sg_io_hdr io_hdr; - int res; - int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88}; - int sz_ind; - struct stat filestatus; - int retry_count = 3; - - if (fstat(sg_fd, &filestatus) != 0) - return PATH_DOWN; - bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize; - blocks = buff_len / bs; - memset(rdCmd, 0, cdbsz); - sz_ind = 1; - rdCmd[0] = rd_opcode[sz_ind]; - rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff); - rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff); - rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff); - rdCmd[5] = (unsigned char)(start_block & 0xff); - rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff); - rdCmd[8] = (unsigned char)(blocks & 0xff); - - memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); - io_hdr.interface_id = 'S'; - io_hdr.cmd_len = cdbsz; - io_hdr.cmdp = rdCmd; - io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; - io_hdr.dxfer_len = bs * blocks; - io_hdr.dxferp = buff; - io_hdr.mx_sb_len = sense_len; - io_hdr.sbp = sense; - io_hdr.timeout = timeout * 1000; - io_hdr.pack_id = (int)start_block; - -retry: - memset(sense, 0, sense_len); - while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno)); - - if (res < 0) { - if (ENOMEM == errno) { - return PATH_UP; - } - return PATH_DOWN; - } - - if ((0 == io_hdr.status) && - (0 == io_hdr.host_status) && - (0 == io_hdr.driver_status)) { - return PATH_UP; - } else { - int key = 0; - - if (io_hdr.sb_len_wr > 3) { - if (sbb[0] == 0x72 || sbb[0] == 0x73) - key = sbb[1] & 0x0f; - else if (io_hdr.sb_len_wr > 13 && - ((sbb[0] & 0x7f) == 0x70 || - (sbb[0] & 0x7f) == 0x71)) - key = sbb[2] & 0x0f; - } - - /* - * Retry if UNIT_ATTENTION check condition. - */ - if (key == 0x6) { - if (--retry_count) - goto retry; - } - return PATH_DOWN; - } -} diff --git a/libmultipath/checkers/libsg.h b/libmultipath/checkers/libsg.h deleted file mode 100644 index 3994f45..0000000 --- a/libmultipath/checkers/libsg.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _LIBSG_H -#define _LIBSG_H - -#define SENSE_BUFF_LEN 32 - -int sg_read (int sg_fd, unsigned char * buff, int buff_len, - unsigned char * sense, int sense_len, unsigned int timeout); - -#endif /* _LIBSG_H */ diff --git a/libmultipath/checkers/ping.c b/libmultipath/checkers/ping.c new file mode 100644 index 0000000..3a87571 --- /dev/null +++ b/libmultipath/checkers/ping.c @@ -0,0 +1,453 @@ +/* + * Some code borrowed from sg-utils and + * NVM-Express command line utility, + * including using of a TUR command and + * a Keep Alive command. + * + * Copyright (c) 2004 Christophe Varoqui + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/sysmacros.h> +#include <errno.h> +#include <sys/time.h> +#include <pthread.h> + +#include "checkers.h" + +#include "../libmultipath/debug.h" +#include "../libmultipath/sg_include.h" +#include "../libmultipath/util.h" +#include "../libmultipath/time-util.h" +#include "../libmultipath/libsg.h" +#include "../libmultipath/libnvme.h" + +#define SENSE_BUFF_LEN 32 + +#define MSG_PING_UP "ping checker reports path is up" +#define MSG_PING_DOWN "ping checker reports path is down" +#define MSG_PING_GHOST "ping checker reports path is in standby state" +#define MSG_PING_RUNNING "ping checker still running" +#define MSG_PING_TIMEOUT "ping checker timed out" +#define MSG_PING_FAILED "ping checker failed to initialize" + +struct ping_checker_context { + dev_t devt; + int state; + int running; + int fd; + char dev[FILE_NAME_SIZE]; + unsigned int timeout; + time_t time; + pthread_t thread; + pthread_mutex_t lock; + pthread_cond_t active; + pthread_spinlock_t hldr_lock; + int holders; + char message[CHECKER_MSG_LEN]; +}; + +static const char *ping_devt(char *devt_buf, int size, + struct ping_checker_context *ct) +{ + dev_t devt; + + pthread_mutex_lock(&ct->lock); + devt = ct->devt; + pthread_mutex_unlock(&ct->lock); + + snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt)); + return devt_buf; +} + +int libcheck_init (struct checker * c) +{ + struct ping_checker_context *ct; + pthread_mutexattr_t attr; + + ct = malloc(sizeof(struct ping_checker_context)); + if (!ct) + return 1; + memset(ct, 0, sizeof(struct ping_checker_context)); + + ct->state = PATH_UNCHECKED; + ct->fd = -1; + ct->holders = 1; + memset(ct->dev, 0, sizeof(ct->dev)); + pthread_cond_init_mono(&ct->active); + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&ct->lock, &attr); + pthread_mutexattr_destroy(&attr); + pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE); + c->context = ct; + + return 0; +} + +static void cleanup_context(struct ping_checker_context *ct) +{ + pthread_mutex_destroy(&ct->lock); + pthread_cond_destroy(&ct->active); + pthread_spin_destroy(&ct->hldr_lock); + free(ct); +} + +void libcheck_free (struct checker * c) +{ + if (c->context) { + struct ping_checker_context *ct = c->context; + int holders; + pthread_t thread; + + pthread_spin_lock(&ct->hldr_lock); + ct->holders--; + holders = ct->holders; + thread = ct->thread; + pthread_spin_unlock(&ct->hldr_lock); + if (holders) + pthread_cancel(thread); + else + cleanup_context(ct); + c->context = NULL; + } + return; +} + +void libcheck_repair (struct checker * c) +{ + return; +} + +#define PING_MSG(fmt, args...) \ + do { \ + char msg[CHECKER_MSG_LEN]; \ + \ + snprintf(msg, sizeof(msg), fmt, ##args); \ + copy_message(cb_arg, msg); \ + } while (0) + +static int +tur_check(int fd, unsigned int timeout, + void (*copy_message)(void *, const char *), void *cb_arg) +{ + struct sg_io_hdr io_hdr; + unsigned char sense_buffer[SENSE_BUFF_LEN]; + int retry_tur = 5; + +retry: + if (sg_tur(fd, &io_hdr, sense_buffer, + sizeof(sense_buffer), timeout) < 0) { + PING_MSG(MSG_PING_DOWN); + return PATH_DOWN; + } + + if ((io_hdr.status & 0x7e) == 0x18) { + /* + * SCSI-3 arrays might return + * reservation conflict on TUR + */ + PING_MSG(MSG_PING_UP); + return PATH_UP; + } + if (io_hdr.info & SG_INFO_OK_MASK) { + int key = 0, asc, ascq; + + switch (io_hdr.host_status) { + case DID_OK: + case DID_NO_CONNECT: + case DID_BAD_TARGET: + case DID_ABORT: + case DID_TRANSPORT_FAILFAST: + break; + default: + /* Driver error, retry */ + if (--retry_tur) + goto retry; + break; + } + if (io_hdr.sb_len_wr > 3) { + if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) { + key = io_hdr.sbp[1] & 0x0f; + asc = io_hdr.sbp[2]; + ascq = io_hdr.sbp[3]; + } else if (io_hdr.sb_len_wr > 13 && + ((io_hdr.sbp[0] & 0x7f) == 0x70 || + (io_hdr.sbp[0] & 0x7f) == 0x71)) { + key = io_hdr.sbp[2] & 0x0f; + asc = io_hdr.sbp[12]; + ascq = io_hdr.sbp[13]; + } + } + if (key == 0x6) { + /* Unit Attention, retry */ + if (--retry_tur) + goto retry; + } + else if (key == 0x2) { + /* Not Ready */ + /* Note: Other ALUA states are either UP or DOWN */ + if( asc == 0x04 && ascq == 0x0b){ + /* + * LOGICAL UNIT NOT ACCESSIBLE, + * TARGET PORT IN STANDBY STATE + */ + PING_MSG(MSG_PING_GHOST); + return PATH_GHOST; + } + } + PING_MSG(MSG_PING_DOWN); + return PATH_DOWN; + } + PING_MSG(MSG_PING_UP); + return PATH_UP; +} + +static int +keep_alive_check(int fd, unsigned int timeout, + void (*copy_message)(void *, const char *), void *cb_arg) +{ + int err; + + err = nvme_keep_alive(fd, timeout); + if (err == 0) { + PING_MSG(MSG_PING_UP); + return PATH_UP; + } + + PING_MSG(MSG_PING_DOWN); + return PATH_DOWN; +} + +static int +ping_check(int fd, char *dev, unsigned int timeout, + void (*copy_message)(void *, const char *), void *cb_arg) +{ + if (!strncmp(dev, "nvme", 4)) + { + return keep_alive_check(fd, timeout, copy_message, cb_arg); + } + else + { + return tur_check(fd, timeout, copy_message, cb_arg); + } +} + +#define ping_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct) +#define ping_thread_cleanup_pop(ct) pthread_cleanup_pop(1) + +static void cleanup_func(void *data) +{ + int holders; + struct ping_checker_context *ct = data; + pthread_spin_lock(&ct->hldr_lock); + ct->holders--; + holders = ct->holders; + ct->thread = 0; + pthread_spin_unlock(&ct->hldr_lock); + if (!holders) + cleanup_context(ct); +} + +static int ping_running(struct ping_checker_context *ct) +{ + pthread_t thread; + + pthread_spin_lock(&ct->hldr_lock); + thread = ct->thread; + pthread_spin_unlock(&ct->hldr_lock); + + return thread != 0; +} + +static void copy_msg_to_tcc(void *ct_p, const char *msg) +{ + struct ping_checker_context *ct = ct_p; + + pthread_mutex_lock(&ct->lock); + strlcpy(ct->message, msg, sizeof(ct->message)); + pthread_mutex_unlock(&ct->lock); +} + +static void *ping_thread(void *ctx) +{ + struct ping_checker_context *ct = ctx; + int state; + char devt[32]; + + condlog(3, "%s: ping checker starting up", + ping_devt(devt, sizeof(devt), ct)); + + /* This thread can be canceled, so setup clean up */ + ping_thread_cleanup_push(ct); + + /* PING checker start up */ + pthread_mutex_lock(&ct->lock); + ct->state = PATH_PENDING; + ct->message[0] = '\0'; + pthread_mutex_unlock(&ct->lock); + state = ping_check(ct->fd, ct->dev, ct->timeout, copy_msg_to_tcc, ct->message); + pthread_testcancel(); + + /* PING checker done */ + pthread_mutex_lock(&ct->lock); + ct->state = state; + pthread_cond_signal(&ct->active); + pthread_mutex_unlock(&ct->lock); + + condlog(3, "%s: ping checker finished, state %s", + ping_devt(devt, sizeof(devt), ct), checker_state_name(state)); + ping_thread_cleanup_pop(ct); + + return ((void *)0); +} + + +static void ping_timeout(struct timespec *tsp) +{ + clock_gettime(CLOCK_MONOTONIC, tsp); + tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */ + normalize_timespec(tsp); +} + +static void ping_set_async_timeout(struct checker *c) +{ + struct ping_checker_context *ct = c->context; + struct timespec now; + + clock_gettime(CLOCK_MONOTONIC, &now); + ct->time = now.tv_sec + c->timeout; +} + +static int ping_check_async_timeout(struct checker *c) +{ + struct ping_checker_context *ct = c->context; + struct timespec now; + + clock_gettime(CLOCK_MONOTONIC, &now); + return (now.tv_sec > ct->time); +} + +static void copy_msg_to_checker(void *c_p, const char *msg) +{ + struct checker *c = c_p; + + strlcpy(c->message, msg, sizeof(c->message)); +} + +int libcheck_check(struct checker * c) +{ + struct ping_checker_context *ct = c->context; + struct timespec tsp; + struct stat sb; + pthread_attr_t attr; + int ping_status, r; + char devt[32]; + + + if (!ct) + return PATH_UNCHECKED; + + if (fstat(c->fd, &sb) == 0) { + pthread_mutex_lock(&ct->lock); + ct->devt = sb.st_rdev; + pthread_mutex_unlock(&ct->lock); + } + + if (c->sync) + return ping_check(c->fd, c->dev, c->timeout, copy_msg_to_checker, c); + + /* + * Async mode + */ + r = pthread_mutex_lock(&ct->lock); + if (r != 0) { + condlog(2, "%s: ping mutex lock failed with %d", + ping_devt(devt, sizeof(devt), ct), r); + MSG(c, MSG_PING_FAILED); + return PATH_WILD; + } + + if (ct->running) { + /* + * Check if PING checker is still running. Hold hldr_lock + * around the pthread_cancel() call to avoid that + * pthread_cancel() gets called after the (detached) PING + * thread has exited. + */ + pthread_spin_lock(&ct->hldr_lock); + if (ct->thread) { + if (ping_check_async_timeout(c)) { + condlog(3, "%s: ping checker timeout", + ping_devt(devt, sizeof(devt), ct)); + pthread_cancel(ct->thread); + ct->running = 0; + MSG(c, MSG_PING_TIMEOUT); + ping_status = PATH_TIMEOUT; + } else { + condlog(3, "%s: ping checker not finished", + ping_devt(devt, sizeof(devt), ct)); + ct->running++; + ping_status = PATH_PENDING; + } + } else { + /* PING checker done */ + ct->running = 0; + ping_status = ct->state; + strlcpy(c->message, ct->message, sizeof(c->message)); + } + pthread_spin_unlock(&ct->hldr_lock); + pthread_mutex_unlock(&ct->lock); + } else { + if (ping_running(ct)) { + /* pthread cancel failed. continue in sync mode */ + pthread_mutex_unlock(&ct->lock); + condlog(3, "%s: ping thread not responding", + ping_devt(devt, sizeof(devt), ct)); + return PATH_TIMEOUT; + } + /* Start new PING checker */ + ct->state = PATH_UNCHECKED; + ct->fd = c->fd; + strncpy(ct->dev, c->dev, strlen(c->dev)+1); + ct->timeout = c->timeout; + pthread_spin_lock(&ct->hldr_lock); + ct->holders++; + pthread_spin_unlock(&ct->hldr_lock); + ping_set_async_timeout(c); + setup_thread_attr(&attr, 32 * 1024, 1); + r = pthread_create(&ct->thread, &attr, ping_thread, ct); + pthread_attr_destroy(&attr); + if (r) { + pthread_spin_lock(&ct->hldr_lock); + ct->holders--; + pthread_spin_unlock(&ct->hldr_lock); + pthread_mutex_unlock(&ct->lock); + ct->thread = 0; + condlog(3, "%s: failed to start ping thread, using" + " sync mode", ping_devt(devt, sizeof(devt), ct)); + return ping_check(c->fd, c->dev, c->timeout, + copy_msg_to_checker, c); + } + ping_timeout(&tsp); + r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp); + ping_status = ct->state; + strlcpy(c->message, ct->message, sizeof(c->message)); + pthread_mutex_unlock(&ct->lock); + if (ping_running(ct) && + (ping_status == PATH_PENDING || ping_status == PATH_UNCHECKED)) { + condlog(3, "%s: ping checker still running", + ping_devt(devt, sizeof(devt), ct)); + ct->running = 1; + ping_status = PATH_PENDING; + } + } + + return ping_status; +} diff --git a/libmultipath/checkers/readsector0.c b/libmultipath/checkers/readsector0.c index 8fccb46..e485810 100644 --- a/libmultipath/checkers/readsector0.c +++ b/libmultipath/checkers/readsector0.c @@ -4,11 +4,13 @@ #include <stdio.h> #include "checkers.h" -#include "libsg.h" +#include "../libmultipath/libsg.h" #define MSG_READSECTOR0_UP "readsector0 checker reports path is up" #define MSG_READSECTOR0_DOWN "readsector0 checker reports path is down" +#define SENSE_BUFF_LEN 32 + struct readsector0_checker_context { void * dummy; }; diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c deleted file mode 100644 index b4a5cb2..0000000 --- a/libmultipath/checkers/tur.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Some code borrowed from sg-utils. - * - * Copyright (c) 2004 Christophe Varoqui - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <sys/sysmacros.h> -#include <errno.h> -#include <sys/time.h> -#include <pthread.h> - -#include "checkers.h" - -#include "../libmultipath/debug.h" -#include "../libmultipath/sg_include.h" -#include "../libmultipath/util.h" -#include "../libmultipath/time-util.h" -#include "../libmultipath/util.h" - -#define TUR_CMD_LEN 6 -#define HEAVY_CHECK_COUNT 10 - -#define MSG_TUR_UP "tur checker reports path is up" -#define MSG_TUR_DOWN "tur checker reports path is down" -#define MSG_TUR_GHOST "tur checker reports path is in standby state" -#define MSG_TUR_RUNNING "tur checker still running" -#define MSG_TUR_TIMEOUT "tur checker timed out" -#define MSG_TUR_FAILED "tur checker failed to initialize" - -struct tur_checker_context { - dev_t devt; - int state; - int running; - int fd; - unsigned int timeout; - time_t time; - pthread_t thread; - pthread_mutex_t lock; - pthread_cond_t active; - pthread_spinlock_t hldr_lock; - int holders; - char message[CHECKER_MSG_LEN]; -}; - -static const char *tur_devt(char *devt_buf, int size, - struct tur_checker_context *ct) -{ - dev_t devt; - - pthread_mutex_lock(&ct->lock); - devt = ct->devt; - pthread_mutex_unlock(&ct->lock); - - snprintf(devt_buf, size, "%d:%d", major(devt), minor(devt)); - return devt_buf; -} - -int libcheck_init (struct checker * c) -{ - struct tur_checker_context *ct; - pthread_mutexattr_t attr; - - ct = malloc(sizeof(struct tur_checker_context)); - if (!ct) - return 1; - memset(ct, 0, sizeof(struct tur_checker_context)); - - ct->state = PATH_UNCHECKED; - ct->fd = -1; - ct->holders = 1; - pthread_cond_init_mono(&ct->active); - pthread_mutexattr_init(&attr); - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); - pthread_mutex_init(&ct->lock, &attr); - pthread_mutexattr_destroy(&attr); - pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE); - c->context = ct; - - return 0; -} - -static void cleanup_context(struct tur_checker_context *ct) -{ - pthread_mutex_destroy(&ct->lock); - pthread_cond_destroy(&ct->active); - pthread_spin_destroy(&ct->hldr_lock); - free(ct); -} - -void libcheck_free (struct checker * c) -{ - if (c->context) { - struct tur_checker_context *ct = c->context; - int holders; - pthread_t thread; - - pthread_spin_lock(&ct->hldr_lock); - ct->holders--; - holders = ct->holders; - thread = ct->thread; - pthread_spin_unlock(&ct->hldr_lock); - if (holders) - pthread_cancel(thread); - else - cleanup_context(ct); - c->context = NULL; - } - return; -} - -void libcheck_repair (struct checker * c) -{ - return; -} - -#define TUR_MSG(fmt, args...) \ - do { \ - char msg[CHECKER_MSG_LEN]; \ - \ - snprintf(msg, sizeof(msg), fmt, ##args); \ - copy_message(cb_arg, msg); \ - } while (0) - -static int -tur_check(int fd, unsigned int timeout, - void (*copy_message)(void *, const char *), void *cb_arg) -{ - struct sg_io_hdr io_hdr; - unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 }; - unsigned char sense_buffer[32]; - int retry_tur = 5; - -retry: - memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); - memset(&sense_buffer, 0, 32); - io_hdr.interface_id = 'S'; - io_hdr.cmd_len = sizeof (turCmdBlk); - io_hdr.mx_sb_len = sizeof (sense_buffer); - io_hdr.dxfer_direction = SG_DXFER_NONE; - io_hdr.cmdp = turCmdBlk; - io_hdr.sbp = sense_buffer; - io_hdr.timeout = timeout * 1000; - io_hdr.pack_id = 0; - if (ioctl(fd, SG_IO, &io_hdr) < 0) { - TUR_MSG(MSG_TUR_DOWN); - return PATH_DOWN; - } - if ((io_hdr.status & 0x7e) == 0x18) { - /* - * SCSI-3 arrays might return - * reservation conflict on TUR - */ - TUR_MSG(MSG_TUR_UP); - return PATH_UP; - } - if (io_hdr.info & SG_INFO_OK_MASK) { - int key = 0, asc, ascq; - - switch (io_hdr.host_status) { - case DID_OK: - case DID_NO_CONNECT: - case DID_BAD_TARGET: - case DID_ABORT: - case DID_TRANSPORT_FAILFAST: - break; - default: - /* Driver error, retry */ - if (--retry_tur) - goto retry; - break; - } - if (io_hdr.sb_len_wr > 3) { - if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) { - key = io_hdr.sbp[1] & 0x0f; - asc = io_hdr.sbp[2]; - ascq = io_hdr.sbp[3]; - } else if (io_hdr.sb_len_wr > 13 && - ((io_hdr.sbp[0] & 0x7f) == 0x70 || - (io_hdr.sbp[0] & 0x7f) == 0x71)) { - key = io_hdr.sbp[2] & 0x0f; - asc = io_hdr.sbp[12]; - ascq = io_hdr.sbp[13]; - } - } - if (key == 0x6) { - /* Unit Attention, retry */ - if (--retry_tur) - goto retry; - } - else if (key == 0x2) { - /* Not Ready */ - /* Note: Other ALUA states are either UP or DOWN */ - if( asc == 0x04 && ascq == 0x0b){ - /* - * LOGICAL UNIT NOT ACCESSIBLE, - * TARGET PORT IN STANDBY STATE - */ - TUR_MSG(MSG_TUR_GHOST); - return PATH_GHOST; - } - } - TUR_MSG(MSG_TUR_DOWN); - return PATH_DOWN; - } - TUR_MSG(MSG_TUR_UP); - return PATH_UP; -} - -#define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct) -#define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1) - -static void cleanup_func(void *data) -{ - int holders; - struct tur_checker_context *ct = data; - pthread_spin_lock(&ct->hldr_lock); - ct->holders--; - holders = ct->holders; - ct->thread = 0; - pthread_spin_unlock(&ct->hldr_lock); - if (!holders) - cleanup_context(ct); -} - -static int tur_running(struct tur_checker_context *ct) -{ - pthread_t thread; - - pthread_spin_lock(&ct->hldr_lock); - thread = ct->thread; - pthread_spin_unlock(&ct->hldr_lock); - - return thread != 0; -} - -static void copy_msg_to_tcc(void *ct_p, const char *msg) -{ - struct tur_checker_context *ct = ct_p; - - pthread_mutex_lock(&ct->lock); - strlcpy(ct->message, msg, sizeof(ct->message)); - pthread_mutex_unlock(&ct->lock); -} - -static void *tur_thread(void *ctx) -{ - struct tur_checker_context *ct = ctx; - int state; - char devt[32]; - - condlog(3, "%s: tur checker starting up", - tur_devt(devt, sizeof(devt), ct)); - - /* This thread can be canceled, so setup clean up */ - tur_thread_cleanup_push(ct); - - /* TUR checker start up */ - pthread_mutex_lock(&ct->lock); - ct->state = PATH_PENDING; - ct->message[0] = '\0'; - pthread_mutex_unlock(&ct->lock); - - state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message); - pthread_testcancel(); - - /* TUR checker done */ - pthread_mutex_lock(&ct->lock); - ct->state = state; - pthread_cond_signal(&ct->active); - pthread_mutex_unlock(&ct->lock); - - condlog(3, "%s: tur checker finished, state %s", - tur_devt(devt, sizeof(devt), ct), checker_state_name(state)); - tur_thread_cleanup_pop(ct); - - return ((void *)0); -} - - -static void tur_timeout(struct timespec *tsp) -{ - clock_gettime(CLOCK_MONOTONIC, tsp); - tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */ - normalize_timespec(tsp); -} - -static void tur_set_async_timeout(struct checker *c) -{ - struct tur_checker_context *ct = c->context; - struct timespec now; - - clock_gettime(CLOCK_MONOTONIC, &now); - ct->time = now.tv_sec + c->timeout; -} - -static int tur_check_async_timeout(struct checker *c) -{ - struct tur_checker_context *ct = c->context; - struct timespec now; - - clock_gettime(CLOCK_MONOTONIC, &now); - return (now.tv_sec > ct->time); -} - -static void copy_msg_to_checker(void *c_p, const char *msg) -{ - struct checker *c = c_p; - - strlcpy(c->message, msg, sizeof(c->message)); -} - -int libcheck_check(struct checker * c) -{ - struct tur_checker_context *ct = c->context; - struct timespec tsp; - struct stat sb; - pthread_attr_t attr; - int tur_status, r; - char devt[32]; - - - if (!ct) - return PATH_UNCHECKED; - - if (fstat(c->fd, &sb) == 0) { - pthread_mutex_lock(&ct->lock); - ct->devt = sb.st_rdev; - pthread_mutex_unlock(&ct->lock); - } - - if (c->sync) - return tur_check(c->fd, c->timeout, copy_msg_to_checker, c); - - /* - * Async mode - */ - r = pthread_mutex_lock(&ct->lock); - if (r != 0) { - condlog(2, "%s: tur mutex lock failed with %d", - tur_devt(devt, sizeof(devt), ct), r); - MSG(c, MSG_TUR_FAILED); - return PATH_WILD; - } - - if (ct->running) { - /* - * Check if TUR checker is still running. Hold hldr_lock - * around the pthread_cancel() call to avoid that - * pthread_cancel() gets called after the (detached) TUR - * thread has exited. - */ - pthread_spin_lock(&ct->hldr_lock); - if (ct->thread) { - if (tur_check_async_timeout(c)) { - condlog(3, "%s: tur checker timeout", - tur_devt(devt, sizeof(devt), ct)); - pthread_cancel(ct->thread); - ct->running = 0; - MSG(c, MSG_TUR_TIMEOUT); - tur_status = PATH_TIMEOUT; - } else { - condlog(3, "%s: tur checker not finished", - tur_devt(devt, sizeof(devt), ct)); - ct->running++; - tur_status = PATH_PENDING; - } - } else { - /* TUR checker done */ - ct->running = 0; - tur_status = ct->state; - strlcpy(c->message, ct->message, sizeof(c->message)); - } - pthread_spin_unlock(&ct->hldr_lock); - pthread_mutex_unlock(&ct->lock); - } else { - if (tur_running(ct)) { - /* pthread cancel failed. continue in sync mode */ - pthread_mutex_unlock(&ct->lock); - condlog(3, "%s: tur thread not responding", - tur_devt(devt, sizeof(devt), ct)); - return PATH_TIMEOUT; - } - /* Start new TUR checker */ - ct->state = PATH_UNCHECKED; - ct->fd = c->fd; - ct->timeout = c->timeout; - pthread_spin_lock(&ct->hldr_lock); - ct->holders++; - pthread_spin_unlock(&ct->hldr_lock); - tur_set_async_timeout(c); - setup_thread_attr(&attr, 32 * 1024, 1); - r = pthread_create(&ct->thread, &attr, tur_thread, ct); - pthread_attr_destroy(&attr); - if (r) { - pthread_spin_lock(&ct->hldr_lock); - ct->holders--; - pthread_spin_unlock(&ct->hldr_lock); - pthread_mutex_unlock(&ct->lock); - ct->thread = 0; - condlog(3, "%s: failed to start tur thread, using" - " sync mode", tur_devt(devt, sizeof(devt), ct)); - return tur_check(c->fd, c->timeout, - copy_msg_to_checker, c); - } - tur_timeout(&tsp); - r = pthread_cond_timedwait(&ct->active, &ct->lock, &tsp); - tur_status = ct->state; - strlcpy(c->message, ct->message, sizeof(c->message)); - pthread_mutex_unlock(&ct->lock); - if (tur_running(ct) && - (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) { - condlog(3, "%s: tur checker still running", - tur_devt(devt, sizeof(devt), ct)); - ct->running = 1; - tur_status = PATH_PENDING; - } - } - - return tur_status; -} diff --git a/libmultipath/checkers/tur.h b/libmultipath/checkers/tur.h deleted file mode 100644 index a2e8c88..0000000 --- a/libmultipath/checkers/tur.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _TUR_H -#define _TUR_H - -int tur (struct checker *); -int tur_init (struct checker *); -void tur_free (struct checker *); - -#endif /* _TUR_H */ diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h index db2b756..9a65cec 100644 --- a/libmultipath/defaults.h +++ b/libmultipath/defaults.h @@ -32,7 +32,7 @@ #define DEFAULT_UEV_WAIT_TIMEOUT 30 #define DEFAULT_PRIO PRIO_CONST #define DEFAULT_PRIO_ARGS "" -#define DEFAULT_CHECKER TUR +#define DEFAULT_CHECKER PING #define DEFAULT_FLUSH FLUSH_DISABLED #define DEFAULT_USER_FRIENDLY_NAMES USER_FRIENDLY_NAMES_OFF #define DEFAULT_FORCE_SYNC 0 diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c index 663c8ea..bae5d24 100644 --- a/libmultipath/discovery.c +++ b/libmultipath/discovery.c @@ -1539,6 +1539,7 @@ get_state (struct path * pp, struct config *conf, int daemon) return PATH_UNCHECKED; } checker_set_fd(c, pp->fd); + checker_set_dev(c, pp->dev); if (checker_init(c, pp->mpp?&pp->mpp->mpcontext:NULL)) { memset(c, 0x0, sizeof(struct checker)); condlog(3, "%s: checker init failed", pp->dev); diff --git a/libmultipath/hwtable.c b/libmultipath/hwtable.c index 390d143..9e8e9e3 100644 --- a/libmultipath/hwtable.c +++ b/libmultipath/hwtable.c @@ -1081,7 +1081,7 @@ static struct hwentry default_hw[] = { .pgpolicy = FAILOVER, .uid_attribute = "ID_SERIAL", .selector = "service-time 0", - .checker_name = TUR, + .checker_name = PING, .alias_prefix = "mpath", .features = "0", .hwhandler = "0", diff --git a/libmultipath/libnvme.c b/libmultipath/libnvme.c new file mode 100644 index 0000000..97c9125 --- /dev/null +++ b/libmultipath/libnvme.c @@ -0,0 +1,130 @@ +/* + * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved. + * + * libnvme.c + * + * Some code borrowed from NVM-Express command line utility. + * + * Author(s): Yang Feng <philip.yang@xxxxxxxxxx> + * + * This file is released under the GPL version 2, or any later version. + * + */ +#include <linux/types.h> +#include <sys/ioctl.h> +#include <stdint.h> + +struct nvme_user_io { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +}; + +struct nvme_admin_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 result; +}; + +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) + +static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control, + __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data, + void *metadata) +{ + struct nvme_user_io io = { + .opcode = opcode, + .flags = 0, + .control = control, + .nblocks = nblocks, + .rsvd = 0, + .metadata = (__u64)(uintptr_t) metadata, + .addr = (__u64)(uintptr_t) data, + .slba = slba, + .dsmgmt = dsmgmt, + .reftag = reftag, + .appmask = apptag, + .apptag = appmask, + }; + + return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io); +} + +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, + __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata) +{ + return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt, + reftag, apptag, appmask, data, metadata); +} + +static int nvme_submit_passthru(int fd, int ioctl_cmd, struct nvme_admin_cmd *cmd) +{ + return ioctl(fd, ioctl_cmd, cmd); +} + +int nvme_passthru(int fd, int ioctl_cmd, __u8 opcode, __u8 flags, __u16 rsvd, + __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11, + __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15, + __u32 data_len, void *data, __u32 metadata_len, + void *metadata, __u32 timeout_ms, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = opcode, + .flags = flags, + .rsvd1 = rsvd, + .nsid = nsid, + .cdw2 = cdw2, + .cdw3 = cdw3, + .metadata = (__u64)(uintptr_t) metadata, + .addr = (__u64)(uintptr_t) data, + .metadata_len = metadata_len, + .data_len = data_len, + .cdw10 = cdw10, + .cdw11 = cdw11, + .cdw12 = cdw12, + .cdw13 = cdw13, + .cdw14 = cdw14, + .cdw15 = cdw15, + .timeout_ms = timeout_ms, + .result = 0, + }; + int err; + + err = nvme_submit_passthru(fd, ioctl_cmd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_keep_alive(int fd, __u32 timeout_ms) +{ + __u32 result; + + return nvme_passthru(fd, NVME_IOCTL_ADMIN_CMD, 0x18, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,0 , timeout_ms, &result); +} diff --git a/libmultipath/libnvme.h b/libmultipath/libnvme.h new file mode 100644 index 0000000..a2b5460 --- /dev/null +++ b/libmultipath/libnvme.h @@ -0,0 +1,10 @@ +#ifndef _LIBNVME_H +#define _LIBNVME_H + +#include <linux/types.h> + +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, + __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata); +int nvme_keep_alive(int fd, __u32 timeout_ms); + +#endif /* _LIBNVME_H */ diff --git a/libmultipath/libsg.c b/libmultipath/libsg.c new file mode 100644 index 0000000..900103e --- /dev/null +++ b/libmultipath/libsg.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + */ +#include <string.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <sys/stat.h> + +#include "checkers.h" +#include "libsg.h" + +int +sg_read (int sg_fd, unsigned char * buff, int buff_len, + unsigned char * sense, int sense_len, unsigned int timeout) +{ + /* defaults */ + int blocks; + long long start_block = 0; + int bs = 512; + int cdbsz = 10; + + unsigned char rdCmd[cdbsz]; + unsigned char *sbb = sense; + struct sg_io_hdr io_hdr; + int res; + int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88}; + int sz_ind; + struct stat filestatus; + int retry_count = 3; + + if (fstat(sg_fd, &filestatus) != 0) + return PATH_DOWN; + bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize; + blocks = buff_len / bs; + memset(rdCmd, 0, cdbsz); + sz_ind = 1; + rdCmd[0] = rd_opcode[sz_ind]; + rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff); + rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff); + rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff); + rdCmd[5] = (unsigned char)(start_block & 0xff); + rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff); + rdCmd[8] = (unsigned char)(blocks & 0xff); + + memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = cdbsz; + io_hdr.cmdp = rdCmd; + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = bs * blocks; + io_hdr.dxferp = buff; + io_hdr.mx_sb_len = sense_len; + io_hdr.sbp = sense; + io_hdr.timeout = timeout * 1000; + io_hdr.pack_id = (int)start_block; + +retry: + memset(sense, 0, sense_len); + while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno)); + + if (res < 0) { + if (ENOMEM == errno) { + return PATH_UP; + } + return PATH_DOWN; + } + + if ((0 == io_hdr.status) && + (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) { + return PATH_UP; + } else { + int key = 0; + + if (io_hdr.sb_len_wr > 3) { + if (sbb[0] == 0x72 || sbb[0] == 0x73) + key = sbb[1] & 0x0f; + else if (io_hdr.sb_len_wr > 13 && + ((sbb[0] & 0x7f) == 0x70 || + (sbb[0] & 0x7f) == 0x71)) + key = sbb[2] & 0x0f; + } + + /* + * Retry if UNIT_ATTENTION check condition. + */ + if (key == 0x6) { + if (--retry_count) + goto retry; + } + return PATH_DOWN; + } +} + +int +sg_tur(int fd, struct sg_io_hdr *io_hdr, unsigned char *sense, + int sense_len, unsigned int timeout) +{ + unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 }; + + memset(io_hdr, 0, sizeof(struct sg_io_hdr)); + memset(sense, 0, sense_len); + io_hdr->interface_id = 'S'; + io_hdr->cmd_len = sizeof(turCmdBlk); + io_hdr->mx_sb_len = sense_len; + io_hdr->dxfer_direction = SG_DXFER_NONE; + io_hdr->cmdp = turCmdBlk; + io_hdr->sbp = sense; + io_hdr->timeout = timeout * 1000; + io_hdr->pack_id = 0; + + return ioctl(fd, SG_IO, io_hdr); +} diff --git a/libmultipath/libsg.h b/libmultipath/libsg.h new file mode 100644 index 0000000..70049a2 --- /dev/null +++ b/libmultipath/libsg.h @@ -0,0 +1,13 @@ +#ifndef _LIBSG_H +#define _LIBSG_H + +#include "sg_include.h" + +#define TUR_CMD_LEN 6 + +int sg_read (int sg_fd, unsigned char * buff, int buff_len, + unsigned char * sense, int sense_len, unsigned int timeout); +int sg_tur(int fd, struct sg_io_hdr *io_hdr, unsigned char *sense, + int sense_len, unsigned int timeout); + +#endif /* _LIBSG_H */ diff --git a/libmultipath/prioritizers/Makefile b/libmultipath/prioritizers/Makefile index 0c71e63..0c5c69b 100644 --- a/libmultipath/prioritizers/Makefile +++ b/libmultipath/prioritizers/Makefile @@ -26,7 +26,7 @@ all: $(LIBS) libprioalua.so: alua.o alua_rtpg.o $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -libpriopath_latency.so: path_latency.o ../checkers/libsg.o +libpriopath_latency.so: path_latency.o ../libsg.o ../libnvme.o $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lm libprio%.so: %.o diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c index 8f633e0..21209ff 100644 --- a/libmultipath/prioritizers/path_latency.c +++ b/libmultipath/prioritizers/path_latency.c @@ -26,29 +26,11 @@ #include "debug.h" #include "prio.h" #include "structs.h" -#include <linux/types.h> -#include <sys/ioctl.h> -#include "../checkers/libsg.h" +#include "libsg.h" +#include "libnvme.h" #define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, ##args) -struct nvme_user_io { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nblocks; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 slba; - __u32 dsmgmt; - __u32 reftag; - __u16 apptag; - __u16 appmask; -}; - -#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) - #define MAX_IO_NUM 200 #define MIN_IO_NUM 2 @@ -62,6 +44,8 @@ struct nvme_user_io { #define MAX_CHAR_SIZE 30 +#define SENSE_BUFF_LEN 32 + #define USEC_PER_SEC 1000000LL #define NSEC_PER_USEC 1000LL @@ -72,34 +56,6 @@ static inline long long timeval_to_us(const struct timespec *tv) return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / NSEC_PER_USEC); } -int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control, - __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata) -{ - struct nvme_user_io io = { - .opcode = opcode, - .flags = 0, - .control = control, - .nblocks = nblocks, - .rsvd = 0, - .metadata = (__u64)(uintptr_t) metadata, - .addr = (__u64)(uintptr_t) data, - .slba = slba, - .dsmgmt = dsmgmt, - .reftag = reftag, - .appmask = apptag, - .apptag = appmask, - }; - - return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io); -} - -int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, - __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata) -{ - return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt, - reftag, apptag, appmask, data, metadata); -} - static int do_readsector0(struct path *pp, unsigned int timeout) { unsigned char buf[4096]; @@ -108,12 +64,12 @@ static int do_readsector0(struct path *pp, unsigned int timeout) if (!strncmp(pp->dev, "nvme", 4)) { - if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) < 0) + if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) != 0) return 0; } else { - if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0],SENSE_BUFF_LEN, timeout) == 2) + if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0], SENSE_BUFF_LEN, timeout) == 2) return 0; } @@ -300,7 +256,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout) Warn the user if latency_interval is smaller than (2 * standard_deviation), or equal */ standard_deviation = calc_standard_deviation(path_latency, index, avglatency); latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num); - if ((latency_interval != 0) + if ((latency_interval!= 0) && (latency_interval <= (2 * standard_deviation))) pp_pl_log(3, "%s: latency interval (%lld) according to average latency (%lld us) is smaller than " "2 * standard deviation (%lld us), or equal, args base_num (%d) needs to be set bigger value", diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c index 27f3951..d4c24de 100644 --- a/libmultipath/propsel.c +++ b/libmultipath/propsel.c @@ -316,7 +316,7 @@ int select_checker(struct config *conf, struct path *pp) struct checker * c = &pp->checker; if (pp->detect_checker == DETECT_CHECKER_ON && pp->tpgs > 0) { - checker_name = TUR; + checker_name = PING; origin = "(setting: array autodetected)"; goto out; } diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5 index 0049cba..915cc50 100644 --- a/multipath/multipath.conf.5 +++ b/multipath/multipath.conf.5 @@ -418,8 +418,8 @@ are: (Deprecated) Read the first sector of the device. This checker is being deprecated, please use \fItur\fR instead. .TP -.I tur -Issue a \fITEST UNIT READY\fR command to the device. +.I ping +Issue a \fITEST UNIT READY\fR command or a \fIKEEP ALIVE\fR command to the device. .TP .I emc_clariion (Hardware-dependent) -- 2.6.4.windows.1 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel