The state machine in the hv_utils driver can run out of order in some corner cases, e.g. if the kvp daemon doesn't call write() fast enough due to some reason, kvp_timeout_func() can run first and move the state to HVUTIL_READY; next, when kvp_on_msg() is called it returns -EINVAL since kvp_transaction.state is smaller than HVUTIL_USERSPACE_REQ; later, the daemon's write() gets an error -EINVAL, and the daemon will exit(). We can reproduce the issue by sending a SIGSTOP signal to the daemon, wait for 1 minute, and send a SIGCONT signal to the daemon: the daemon will exit() quickly. We can fix the issue by forcing a reset of the device (which means the daemon can close() and open() the device again) and doing extra necessary clean-up. Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx> --- tools/hv/hv_fcopy_daemon.c | 19 +++++++++++++++---- tools/hv/hv_kvp_daemon.c | 25 ++++++++++++++----------- tools/hv/hv_vss_daemon.c | 25 +++++++++++++++++++------ 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index aea2d91ab364..a78a5292589b 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -21,7 +21,7 @@ #include <fcntl.h> #include <getopt.h> -static int target_fd; +static int target_fd = -1; static char target_fname[PATH_MAX]; static unsigned long long filesize; @@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg) error = 0; done: + if (error) + memset(target_fname, 0, sizeof(target_fname)); return error; } @@ -111,12 +113,16 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg) static int hv_copy_finished(void) { close(target_fd); + target_fd = -1; + memset(target_fname, 0, sizeof(target_fname)); return 0; } static int hv_copy_cancel(void) { close(target_fd); + target_fd = -1; unlink(target_fname); + memset(target_fname, 0, sizeof(target_fname)); return 0; } @@ -141,7 +147,7 @@ int main(int argc, char *argv[]) struct hv_do_fcopy copy; __u32 kernel_modver; } buffer = { }; - int in_handshake = 1; + int in_handshake; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -170,6 +176,9 @@ int main(int argc, char *argv[]) openlog("HV_FCOPY", 0, LOG_USER); syslog(LOG_INFO, "starting; pid is:%d", getpid()); +reopen_fcopy_fd: + hv_copy_cancel(); + in_handshake = 1; fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR); if (fcopy_fd < 0) { @@ -196,7 +205,8 @@ int main(int argc, char *argv[]) len = pread(fcopy_fd, &buffer, sizeof(buffer), 0); if (len < 0) { syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - exit(EXIT_FAILURE); + close(fcopy_fd); + goto reopen_fcopy_fd; } if (in_handshake) { @@ -233,7 +243,8 @@ int main(int argc, char *argv[]) if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) { syslog(LOG_ERR, "pwrite failed: %s", strerror(errno)); - exit(EXIT_FAILURE); + close(fcopy_fd); + goto reopen_fcopy_fd; } } } diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index e9ef4ca6a655..3282d48c4487 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -76,7 +76,7 @@ enum { DNS }; -static int in_hand_shake = 1; +static int in_hand_shake; static char *os_name = ""; static char *os_major = ""; @@ -1400,14 +1400,6 @@ int main(int argc, char *argv[]) openlog("KVP", 0, LOG_USER); syslog(LOG_INFO, "KVP starting; pid is:%d", getpid()); - kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC); - - if (kvp_fd < 0) { - syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s", - errno, strerror(errno)); - exit(EXIT_FAILURE); - } - /* * Retrieve OS release information. */ @@ -1423,6 +1415,16 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } +reopen_kvp_fd: + in_hand_shake = 1; + kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC); + + if (kvp_fd < 0) { + syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s", + errno, strerror(errno)); + exit(EXIT_FAILURE); + } + /* * Register ourselves with the kernel. */ @@ -1458,7 +1460,7 @@ int main(int argc, char *argv[]) errno, strerror(errno)); close(kvp_fd); - return EXIT_FAILURE; + goto reopen_kvp_fd; } /* @@ -1623,7 +1625,8 @@ int main(int argc, char *argv[]) if (len != sizeof(struct hv_kvp_msg)) { syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); - exit(EXIT_FAILURE); + close(kvp_fd); + goto reopen_kvp_fd; } } diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 92902a88f671..e70fed66a5ae 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -28,6 +28,8 @@ #include <stdbool.h> #include <dirent.h> +static bool fs_frozen; + /* Don't use syslog() in the function since that can cause write to disk */ static int vss_do_freeze(char *dir, unsigned int cmd) { @@ -155,8 +157,11 @@ static int vss_operate(int operation) continue; } error |= vss_do_freeze(ent->mnt_dir, cmd); - if (error && operation == VSS_OP_FREEZE) - goto err; + if (operation == VSS_OP_FREEZE) { + if (error) + goto err; + fs_frozen = true; + } } endmntent(mounts); @@ -167,6 +172,9 @@ static int vss_operate(int operation) goto err; } + if (operation == VSS_OP_THAW && !error) + fs_frozen = false; + goto out; err: save_errno = errno; @@ -175,6 +183,7 @@ static int vss_operate(int operation) endmntent(mounts); } vss_operate(VSS_OP_THAW); + fs_frozen = false; /* Call syslog after we thaw all filesystems */ if (ent) syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s", @@ -202,7 +211,7 @@ int main(int argc, char *argv[]) int op; struct hv_vss_msg vss_msg[1]; int daemonize = 1, long_index = 0, opt; - int in_handshake = 1; + int in_handshake; __u32 kernel_modver; static struct option long_options[] = { @@ -232,6 +241,10 @@ int main(int argc, char *argv[]) openlog("Hyper-V VSS", 0, LOG_USER); syslog(LOG_INFO, "VSS starting; pid is:%d", getpid()); +reopen_vss_fd: + if (fs_frozen) + vss_operate(VSS_OP_THAW); + in_handshake = 1; vss_fd = open("/dev/vmbus/hv_vss", O_RDWR); if (vss_fd < 0) { syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s", @@ -285,7 +298,7 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "read failed; error:%d %s", errno, strerror(errno)); close(vss_fd); - return EXIT_FAILURE; + goto reopen_vss_fd; } op = vss_msg->vss_hdr.operation; @@ -318,8 +331,8 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); - if (op == VSS_OP_FREEZE) - vss_operate(VSS_OP_THAW); + close(vss_fd); + goto reopen_vss_fd; } } -- 2.19.1