[PATCH 1/2] Introduce a lock for libxl long-running api

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch introduce a lock for protecting the long-running
api (save, dump, migration and so on) from the other api
which may update the status of the virtual machine.

Signed-off-by: Bamvor Jian Zhang <bjzhang@xxxxxxxx>
---
 src/libxl/libxl_conf.h   |  56 +++++++
 src/libxl/libxl_driver.c | 419 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 475 insertions(+)

diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h
index 56bf85c..95a3418 100644
--- a/src/libxl/libxl_conf.h
+++ b/src/libxl/libxl_conf.h
@@ -73,6 +73,60 @@ struct _libxlDriverPrivate {
     char *saveDir;
 };
 
+# define JOB_MASK(job)                  (1 << (job - 1))
+# define DEFAULT_JOB_MASK               \
+    (JOB_MASK(LIBXL_JOB_DESTROY) |      \
+     JOB_MASK(LIBXL_JOB_ABORT))
+
+/* Jobs which have to be tracked in domain state XML. */
+# define LIBXL_DOMAIN_TRACK_JOBS        \
+    (JOB_MASK(LIBXL_JOB_DESTROY) |      \
+     JOB_MASK(LIBXL_JOB_ASYNC))
+
+/* Only 1 job is allowed at any time
+ * A job includes *all* libxl.so api, even those just querying
+ * information, not merely actions */
+enum libxlDomainJob {
+    LIBXL_JOB_NONE = 0,      /* Always set to 0 for easy if (jobActive) conditions */
+    LIBXL_JOB_DESTROY,       /* Destroys the domain (cannot be masked out) */
+    LIBXL_JOB_MODIFY,        /* May change state */
+    LIBXL_JOB_ABORT,         /* Abort current async job */
+    LIBXL_JOB_MIGRATION_OP,  /* Operation influencing outgoing migration */
+
+    /* The following item must always be the last item before JOB_LAST */
+    LIBXL_JOB_ASYNC,         /* Asynchronous job */
+
+    LIBXL_JOB_LAST
+};
+VIR_ENUM_DECL(libxlDomainJob)
+
+/* Async job consists of a series of jobs that may change state. Independent
+ * jobs that do not change state (and possibly others if explicitly allowed by
+ * current async job) are allowed to be run even if async job is active.
+ */
+enum libxlDomainAsyncJob {
+    LIBXL_ASYNC_JOB_NONE = 0,
+    LIBXL_ASYNC_JOB_SAVE,
+    LIBXL_ASYNC_JOB_DUMP,
+
+    LIBXL_ASYNC_JOB_LAST
+};
+VIR_ENUM_DECL(libxlDomainAsyncJob)
+
+struct libxlDomainJobObj {
+    virCond cond;                       /* Use to coordinate jobs */
+    enum libxlDomainJob active;         /* Currently running job */
+    int owner;                          /* Thread which set current job */
+
+    virCond asyncCond;                  /* Use to coordinate with async jobs */
+    enum libxlDomainAsyncJob asyncJob;  /* Currently active async job */
+    int asyncOwner;                     /* Thread which set current async job */
+    int phase;                          /* Job phase (mainly for migrations) */
+    unsigned long long mask;            /* Jobs allowed during async job */
+    unsigned long long start;           /* When the async job started */
+    virDomainJobInfo info;              /* Async job progress data */
+};
+
 typedef struct _libxlDomainObjPrivate libxlDomainObjPrivate;
 typedef libxlDomainObjPrivate *libxlDomainObjPrivatePtr;
 struct _libxlDomainObjPrivate {
@@ -81,6 +135,8 @@ struct _libxlDomainObjPrivate {
     libxl_waiter *dWaiter;
     int waiterFD;
     int eventHdl;
+
+    struct libxlDomainJobObj job;
 };
 
 # define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r"
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
index f4e9aa6..05944ef 100644
--- a/src/libxl/libxl_driver.c
+++ b/src/libxl/libxl_driver.c
@@ -45,6 +45,7 @@
 #include "xen_xm.h"
 #include "virtypedparam.h"
 #include "viruri.h"
+#include "virtime.h"
 
 #define VIR_FROM_THIS VIR_FROM_LIBXL
 
@@ -84,6 +85,309 @@ libxlDriverUnlock(libxlDriverPrivatePtr driver)
     virMutexUnlock(&driver->lock);
 }
 
+/* job */
+VIR_ENUM_IMPL(libxlDomainJob, LIBXL_JOB_LAST,
+              "none",
+              "destroy",
+              "modify",
+              "abort",
+              "migration operation",
+              "none",   /* async job is never stored in job.active */
+);
+
+VIR_ENUM_IMPL(libxlDomainAsyncJob, LIBXL_ASYNC_JOB_LAST,
+              "none",
+              "save",
+              "dump",
+);
+
+static int
+libxlDomainObjInitJob(libxlDomainObjPrivatePtr priv)
+{
+    memset(&priv->job, 0, sizeof(priv->job));
+
+    if (virCondInit(&priv->job.cond) < 0)
+        return -1;
+
+    if (virCondInit(&priv->job.asyncCond) < 0) {
+        ignore_value(virCondDestroy(&priv->job.cond));
+        return -1;
+    }
+
+    return 0;
+}
+
+static void
+libxlDomainObjResetJob(libxlDomainObjPrivatePtr priv)
+{
+    struct libxlDomainJobObj *job = &priv->job;
+
+    job->active = LIBXL_JOB_NONE;
+    job->owner = 0;
+}
+
+static void
+libxlDomainObjResetAsyncJob(libxlDomainObjPrivatePtr priv)
+{
+    struct libxlDomainJobObj *job = &priv->job;
+
+    job->asyncJob = LIBXL_ASYNC_JOB_NONE;
+    job->asyncOwner = 0;
+    job->phase = 0;
+    job->mask = DEFAULT_JOB_MASK;
+    job->start = 0;
+    memset(&job->info, 0, sizeof(job->info));
+}
+
+static void
+libxlDomainObjFreeJob(libxlDomainObjPrivatePtr priv)
+{
+    ignore_value(virCondDestroy(&priv->job.cond));
+    ignore_value(virCondDestroy(&priv->job.asyncCond));
+}
+
+static bool
+libxlDomainTrackJob(enum libxlDomainJob job)
+{
+    return (LIBXL_DOMAIN_TRACK_JOBS & JOB_MASK(job)) != 0;
+}
+
+static void
+libxlDomainObjSaveJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
+{
+    if (!virDomainObjIsActive(obj)) {
+        /* don't write the state file yet, it will be written once the domain
+         * gets activated */
+        return;
+    }
+
+    if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0)
+        VIR_WARN("Failed to save status on vm %s", obj->def->name);
+}
+
+static bool
+libxlDomainNestedJobAllowed(libxlDomainObjPrivatePtr priv, enum libxlDomainJob job)
+{
+    return !priv->job.asyncJob || (priv->job.mask & JOB_MASK(job)) != 0;
+}
+
+static void ATTRIBUTE_UNUSED
+libxlDomainObjSetAsyncJobMask(virDomainObjPtr obj,
+                              unsigned long long allowedJobs)
+{
+    libxlDomainObjPrivatePtr priv = obj->privateData;
+
+    if (!priv->job.asyncJob)
+        return;
+
+    priv->job.mask = allowedJobs | JOB_MASK(LIBXL_JOB_DESTROY);
+}
+
+/* Give up waiting for mutex after 30 seconds */
+#define LIBXL_JOB_WAIT_TIME (1000ull * 30)
+
+/*
+ * obj must be locked before calling; driver_locked says if libxlDriverPrivatePtr
+ * is locked or not.
+ */
+static int ATTRIBUTE_NONNULL(1)
+libxlDomainObjBeginJobInternal(libxlDriverPrivatePtr driver,
+                               bool driver_locked,
+                               virDomainObjPtr obj,
+                               enum libxlDomainJob job,
+                               enum libxlDomainAsyncJob asyncJob)
+{
+    libxlDomainObjPrivatePtr priv = obj->privateData;
+    unsigned long long now;
+    unsigned long long then;
+
+    if (virTimeMillisNow(&now) < 0)
+        return -1;
+    then = now + LIBXL_JOB_WAIT_TIME;
+
+    virObjectRef(obj);
+    if (driver_locked) {
+        libxlDriverUnlock(driver);
+    }
+
+retry:
+    while (!libxlDomainNestedJobAllowed(priv, job)) {
+        VIR_DEBUG("Wait async job condition for starting job: %s (async=%s)",
+                  libxlDomainJobTypeToString(job),
+                  libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+        if (virCondWaitUntil(&priv->job.asyncCond, &obj->lock, then) < 0)
+            goto error;
+    }
+
+    while (priv->job.active) {
+        VIR_DEBUG("Wait normal job condition for starting job: %s (async=%s)",
+                  libxlDomainJobTypeToString(job),
+                  libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+        if (virCondWaitUntil(&priv->job.cond, &obj->lock, then) < 0)
+            goto error;
+    }
+
+    /* No job is active but a new async job could have been started while obj
+     * was unlocked, so we need to recheck it. */
+    if (!libxlDomainNestedJobAllowed(priv, job))
+        goto retry;
+
+    libxlDomainObjResetJob(priv);
+
+    if (job != LIBXL_JOB_ASYNC) {
+        VIR_DEBUG("Starting job: %s (async=%s)",
+                  libxlDomainJobTypeToString(job),
+                  libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+        priv->job.active = job;
+        priv->job.owner = virThreadSelfID();
+    } else {
+        VIR_DEBUG("Starting async job: %s",
+                  libxlDomainAsyncJobTypeToString(asyncJob));
+        libxlDomainObjResetAsyncJob(priv);
+        priv->job.asyncJob = asyncJob;
+        priv->job.asyncOwner = virThreadSelfID();
+        priv->job.start = now;
+    }
+
+    if (driver_locked) {
+        virDomainObjUnlock(obj);
+        libxlDriverLock(driver);
+        virDomainObjLock(obj);
+    }
+
+    if (libxlDomainTrackJob(job))
+        libxlDomainObjSaveJob(driver, obj);
+
+    return 0;
+
+error:
+    VIR_WARN("Cannot start job (%s, %s) for domain %s;"
+             " current job is (%s, %s) owned by (%d, %d)",
+             libxlDomainJobTypeToString(job),
+             libxlDomainAsyncJobTypeToString(asyncJob),
+             obj->def->name,
+             libxlDomainJobTypeToString(priv->job.active),
+             libxlDomainAsyncJobTypeToString(priv->job.asyncJob),
+             priv->job.owner, priv->job.asyncOwner);
+
+    if (errno == ETIMEDOUT)
+        virReportError(VIR_ERR_OPERATION_TIMEOUT,
+                       "%s", _("cannot acquire state change lock"));
+    else
+        virReportSystemError(errno,
+                             "%s", _("cannot acquire job mutex"));
+    if (driver_locked) {
+        virDomainObjUnlock(obj);
+        libxlDriverLock(driver);
+        virDomainObjLock(obj);
+    }
+    virObjectUnref(obj);
+    return -1;
+}
+
+/*
+ * obj must be locked before calling, libxlDriverPrivatePtr must NOT be locked
+ *
+ * This must be called by anything that will change the VM state
+ * in any way
+ *
+ * Upon successful return, the object will have its ref count increased,
+ * successful calls must be followed by EndJob eventually
+ */
+static int
+libxlDomainObjBeginJob(libxlDriverPrivatePtr driver,
+                       virDomainObjPtr obj,
+                       enum libxlDomainJob job)
+{
+    return libxlDomainObjBeginJobInternal(driver, false, obj, job,
+                                          LIBXL_ASYNC_JOB_NONE);
+}
+
+static int ATTRIBUTE_UNUSED
+libxlDomainObjBeginAsyncJob(libxlDriverPrivatePtr driver,
+                            virDomainObjPtr obj,
+                            enum libxlDomainAsyncJob asyncJob)
+{
+    return libxlDomainObjBeginJobInternal(driver, false, obj, LIBXL_JOB_ASYNC,
+                                          asyncJob);
+}
+
+/*
+ * obj must be locked before calling. libxlDriverPrivatePtr MUST be locked
+ *
+ * This must be called by anything that will change the VM state
+ * in any way
+ *
+ * Upon successful return, the object will have its ref count increased,
+ * successful calls must be followed by EndJob eventually
+ */
+static int
+libxlDomainObjBeginJobWithDriver(libxlDriverPrivatePtr driver,
+                                 virDomainObjPtr obj,
+                                 enum libxlDomainJob job)
+{
+    if (job <= LIBXL_JOB_NONE || job >= LIBXL_JOB_ASYNC) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Attempt to start invalid job"));
+        return -1;
+    }
+
+    return libxlDomainObjBeginJobInternal(driver, true, obj, job,
+                                          LIBXL_ASYNC_JOB_NONE);
+}
+
+static int
+libxlDomainObjBeginAsyncJobWithDriver(libxlDriverPrivatePtr driver,
+                                      virDomainObjPtr obj,
+                                      enum libxlDomainAsyncJob asyncJob)
+{
+    return libxlDomainObjBeginJobInternal(driver, true, obj, LIBXL_JOB_ASYNC,
+                                          asyncJob);
+}
+
+/*
+ * obj must be locked before calling, libxlDriverPrivatePtr does not matter
+ *
+ * To be called after completing the work associated with the
+ * earlier libxlDomainBeginJob() call
+ *
+ * Returns remaining refcount on 'obj', maybe 0 to indicated it
+ * was deleted
+ */
+static bool
+libxlDomainObjEndJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
+{
+    libxlDomainObjPrivatePtr priv = obj->privateData;
+    enum libxlDomainJob job = priv->job.active;
+
+    VIR_DEBUG("Stopping job: %s (async=%s)",
+              libxlDomainJobTypeToString(job),
+              libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+
+    libxlDomainObjResetJob(priv);
+    if (libxlDomainTrackJob(job))
+        libxlDomainObjSaveJob(driver, obj);
+    virCondSignal(&priv->job.cond);
+
+    return virObjectUnref(obj);
+}
+
+static bool
+libxlDomainObjEndAsyncJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
+{
+    libxlDomainObjPrivatePtr priv = obj->privateData;
+
+    VIR_DEBUG("Stopping async job: %s",
+              libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+
+    libxlDomainObjResetAsyncJob(priv);
+    libxlDomainObjSaveJob(driver, obj);
+    virCondBroadcast(&priv->job.asyncCond);
+
+    return virObjectUnref(obj);
+}
+/* job function finish */
+
 static void *
 libxlDomainObjPrivateAlloc(void)
 {
@@ -92,11 +396,18 @@ libxlDomainObjPrivateAlloc(void)
     if (VIR_ALLOC(priv) < 0)
         return NULL;
 
+    if (libxlDomainObjInitJob(priv) < 0)
+        goto error;
+
     libxl_ctx_init(&priv->ctx, LIBXL_VERSION, libxl_driver->logger);
     priv->waiterFD = -1;
     priv->eventHdl = -1;
 
     return priv;
+
+error:
+    VIR_FREE(priv);
+    return NULL;
 }
 
 static void
@@ -114,6 +425,7 @@ libxlDomainObjPrivateFree(void *data)
     }
 
     libxl_ctx_free(&priv->ctx);
+    libxlDomainObjFreeJob(priv);
     VIR_FREE(priv);
 }
 
@@ -3826,6 +4138,111 @@ cleanup:
 }
 
 static int
+libxlDomainGetJobInfo(virDomainPtr dom,
+                      virDomainJobInfoPtr info)
+{
+    libxlDriverPrivatePtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    int ret = -1;
+    libxlDomainObjPrivatePtr priv;
+
+    libxlDriverLock(driver);
+    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+    libxlDriverUnlock(driver);
+    if (!vm) {
+        char uuidstr[VIR_UUID_STRING_BUFLEN];
+        virUUIDFormat(dom->uuid, uuidstr);
+        virReportError(VIR_ERR_NO_DOMAIN,
+                       _("no domain with matching uuid '%s'"), uuidstr);
+        goto cleanup;
+    }
+
+    priv = vm->privateData;
+
+    if (virDomainObjIsActive(vm)) {
+        if (priv->job.asyncJob) {
+            memcpy(info, &priv->job.info, sizeof(*info));
+
+            /* Refresh elapsed time again just to ensure it
+             * is fully updated. This is primarily for benefit
+             * of incoming migration which we don't currently
+             * monitor actively in the background thread
+             */
+            if (virTimeMillisNow(&info->timeElapsed) < 0)
+                goto cleanup;
+            info->timeElapsed -= priv->job.start;
+        } else {
+            memset(info, 0, sizeof(*info));
+            info->type = VIR_DOMAIN_JOB_NONE;
+        }
+    } else {
+        virReportError(VIR_ERR_OPERATION_INVALID,
+                       "%s", _("domain is not running"));
+        goto cleanup;
+    }
+
+    ret = 0;
+
+cleanup:
+    if (vm)
+        virDomainObjUnlock(vm);
+    return ret;
+}
+
+static int
+libxlDomainAbortJob(virDomainPtr dom)
+{
+    libxlDriverPrivatePtr driver = dom->conn->privateData;
+    virDomainObjPtr vm;
+    int ret = -1;
+    libxlDomainObjPrivatePtr priv;
+
+    libxlDriverLock(driver);
+    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+    libxlDriverUnlock(driver);
+    if (!vm) {
+        char uuidstr[VIR_UUID_STRING_BUFLEN];
+        virUUIDFormat(dom->uuid, uuidstr);
+        virReportError(VIR_ERR_NO_DOMAIN,
+                       _("no domain with matching uuid '%s'"), uuidstr);
+        goto cleanup;
+    }
+
+    if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_ABORT) < 0)
+        goto cleanup;
+
+    if (!virDomainObjIsActive(vm)) {
+        virReportError(VIR_ERR_OPERATION_INVALID,
+                       "%s", _("domain is not running"));
+        goto endjob;
+    }
+
+    priv = vm->privateData;
+
+    if (!priv->job.asyncJob) {
+        virReportError(VIR_ERR_OPERATION_INVALID,
+                       "%s", _("no job is active on the domain"));
+        goto endjob;
+    } else {
+        virReportError(VIR_ERR_OPERATION_INVALID,
+                       _("cannot abort %s; use virDomainDestroy instead"),
+                       libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+        goto endjob;
+    }
+
+    VIR_DEBUG("Not job could be cancelled at current version");
+
+endjob:
+    if (libxlDomainObjEndJob(driver, vm) == 0)
+        vm = NULL;
+
+cleanup:
+    if (vm)
+        virDomainObjUnlock(vm);
+    return ret;
+}
+
+static int
 libxlDomainEventRegisterAny(virConnectPtr conn, virDomainPtr dom, int eventID,
                             virConnectDomainEventGenericCallback callback,
                             void *opaque, virFreeCallback freecb)
@@ -3959,6 +4376,8 @@ static virDriver libxlDriver = {
     .domainIsActive = libxlDomainIsActive, /* 0.9.0 */
     .domainIsPersistent = libxlDomainIsPersistent, /* 0.9.0 */
     .domainIsUpdated = libxlDomainIsUpdated, /* 0.9.0 */
+    .domainGetJobInfo = libxlDomainGetJobInfo, /* 1.0.0 */
+    .domainAbortJob = libxlDomainAbortJob, /* 1.0.0 */
     .domainEventRegisterAny = libxlDomainEventRegisterAny, /* 0.9.0 */
     .domainEventDeregisterAny = libxlDomainEventDeregisterAny, /* 0.9.0 */
     .isAlive = libxlIsAlive, /* 0.9.8 */
-- 
1.7.12

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list


[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]