If a FIFREEZE operation run by the hv_vss_daemon takes longer than the VSS_USERSPACE_TIMEOUT set in the hv_snapshot module, instead of exiting after a write failure, try to recover by reopening the hv_vss device and performing the initial handshake again. Exiting causes all subsequent VSS operations sent by the Hyper-V host to fail until the daemon is restarted. Signed-off-by: Michael Gissing <mg@xxxxxxxxxxxxx> --- tools/hv/hv_vss_daemon.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 5d51d6f..0ecbdab 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -176,6 +176,7 @@ int main(int argc, char *argv[]) openlog("Hyper-V VSS", 0, LOG_USER); syslog(LOG_INFO, "VSS starting; pid is:%d", getpid()); +recover: vss_fd = open("/dev/vmbus/hv_vss", O_RDWR); if (vss_fd < 0) { syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s", @@ -196,6 +197,7 @@ int main(int argc, char *argv[]) } pfd.fd = vss_fd; + in_handshake = 1; while (1) { pfd.events = POLLIN; @@ -258,7 +260,14 @@ int main(int argc, char *argv[]) if (len != sizeof(struct hv_vss_msg)) { syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); - exit(EXIT_FAILURE); + /* + * try to recover from possible timeout by THAWing + * and restarting the message loop + */ + vss_operate(VSS_OP_THAW); + close(vss_fd); + syslog(LOG_INFO, "trying to recover VSS connection"); + goto recover; } } -- 2.7.4 _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel