This patch introduce a lock for protecting the long-running api (save, dump, migration and so on) from the other api which may update the status of the virtual machine. Signed-off-by: Bamvor Jian Zhang <bjzhang@xxxxxxxx> --- src/libxl/libxl_conf.h | 56 +++++++ src/libxl/libxl_driver.c | 419 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 475 insertions(+) diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h index 56bf85c..95a3418 100644 --- a/src/libxl/libxl_conf.h +++ b/src/libxl/libxl_conf.h @@ -73,6 +73,60 @@ struct _libxlDriverPrivate { char *saveDir; }; +# define JOB_MASK(job) (1 << (job - 1)) +# define DEFAULT_JOB_MASK \ + (JOB_MASK(LIBXL_JOB_DESTROY) | \ + JOB_MASK(LIBXL_JOB_ABORT)) + +/* Jobs which have to be tracked in domain state XML. */ +# define LIBXL_DOMAIN_TRACK_JOBS \ + (JOB_MASK(LIBXL_JOB_DESTROY) | \ + JOB_MASK(LIBXL_JOB_ASYNC)) + +/* Only 1 job is allowed at any time + * A job includes *all* libxl.so api, even those just querying + * information, not merely actions */ +enum libxlDomainJob { + LIBXL_JOB_NONE = 0, /* Always set to 0 for easy if (jobActive) conditions */ + LIBXL_JOB_DESTROY, /* Destroys the domain (cannot be masked out) */ + LIBXL_JOB_MODIFY, /* May change state */ + LIBXL_JOB_ABORT, /* Abort current async job */ + LIBXL_JOB_MIGRATION_OP, /* Operation influencing outgoing migration */ + + /* The following item must always be the last item before JOB_LAST */ + LIBXL_JOB_ASYNC, /* Asynchronous job */ + + LIBXL_JOB_LAST +}; +VIR_ENUM_DECL(libxlDomainJob) + +/* Async job consists of a series of jobs that may change state. Independent + * jobs that do not change state (and possibly others if explicitly allowed by + * current async job) are allowed to be run even if async job is active. + */ +enum libxlDomainAsyncJob { + LIBXL_ASYNC_JOB_NONE = 0, + LIBXL_ASYNC_JOB_SAVE, + LIBXL_ASYNC_JOB_DUMP, + + LIBXL_ASYNC_JOB_LAST +}; +VIR_ENUM_DECL(libxlDomainAsyncJob) + +struct libxlDomainJobObj { + virCond cond; /* Use to coordinate jobs */ + enum libxlDomainJob active; /* Currently running job */ + int owner; /* Thread which set current job */ + + virCond asyncCond; /* Use to coordinate with async jobs */ + enum libxlDomainAsyncJob asyncJob; /* Currently active async job */ + int asyncOwner; /* Thread which set current async job */ + int phase; /* Job phase (mainly for migrations) */ + unsigned long long mask; /* Jobs allowed during async job */ + unsigned long long start; /* When the async job started */ + virDomainJobInfo info; /* Async job progress data */ +}; + typedef struct _libxlDomainObjPrivate libxlDomainObjPrivate; typedef libxlDomainObjPrivate *libxlDomainObjPrivatePtr; struct _libxlDomainObjPrivate { @@ -81,6 +135,8 @@ struct _libxlDomainObjPrivate { libxl_waiter *dWaiter; int waiterFD; int eventHdl; + + struct libxlDomainJobObj job; }; # define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r" diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c index f4e9aa6..05944ef 100644 --- a/src/libxl/libxl_driver.c +++ b/src/libxl/libxl_driver.c @@ -45,6 +45,7 @@ #include "xen_xm.h" #include "virtypedparam.h" #include "viruri.h" +#include "virtime.h" #define VIR_FROM_THIS VIR_FROM_LIBXL @@ -84,6 +85,309 @@ libxlDriverUnlock(libxlDriverPrivatePtr driver) virMutexUnlock(&driver->lock); } +/* job */ +VIR_ENUM_IMPL(libxlDomainJob, LIBXL_JOB_LAST, + "none", + "destroy", + "modify", + "abort", + "migration operation", + "none", /* async job is never stored in job.active */ +); + +VIR_ENUM_IMPL(libxlDomainAsyncJob, LIBXL_ASYNC_JOB_LAST, + "none", + "save", + "dump", +); + +static int +libxlDomainObjInitJob(libxlDomainObjPrivatePtr priv) +{ + memset(&priv->job, 0, sizeof(priv->job)); + + if (virCondInit(&priv->job.cond) < 0) + return -1; + + if (virCondInit(&priv->job.asyncCond) < 0) { + ignore_value(virCondDestroy(&priv->job.cond)); + return -1; + } + + return 0; +} + +static void +libxlDomainObjResetJob(libxlDomainObjPrivatePtr priv) +{ + struct libxlDomainJobObj *job = &priv->job; + + job->active = LIBXL_JOB_NONE; + job->owner = 0; +} + +static void +libxlDomainObjResetAsyncJob(libxlDomainObjPrivatePtr priv) +{ + struct libxlDomainJobObj *job = &priv->job; + + job->asyncJob = LIBXL_ASYNC_JOB_NONE; + job->asyncOwner = 0; + job->phase = 0; + job->mask = DEFAULT_JOB_MASK; + job->start = 0; + memset(&job->info, 0, sizeof(job->info)); +} + +static void +libxlDomainObjFreeJob(libxlDomainObjPrivatePtr priv) +{ + ignore_value(virCondDestroy(&priv->job.cond)); + ignore_value(virCondDestroy(&priv->job.asyncCond)); +} + +static bool +libxlDomainTrackJob(enum libxlDomainJob job) +{ + return (LIBXL_DOMAIN_TRACK_JOBS & JOB_MASK(job)) != 0; +} + +static void +libxlDomainObjSaveJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj) +{ + if (!virDomainObjIsActive(obj)) { + /* don't write the state file yet, it will be written once the domain + * gets activated */ + return; + } + + if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0) + VIR_WARN("Failed to save status on vm %s", obj->def->name); +} + +static bool +libxlDomainNestedJobAllowed(libxlDomainObjPrivatePtr priv, enum libxlDomainJob job) +{ + return !priv->job.asyncJob || (priv->job.mask & JOB_MASK(job)) != 0; +} + +static void ATTRIBUTE_UNUSED +libxlDomainObjSetAsyncJobMask(virDomainObjPtr obj, + unsigned long long allowedJobs) +{ + libxlDomainObjPrivatePtr priv = obj->privateData; + + if (!priv->job.asyncJob) + return; + + priv->job.mask = allowedJobs | JOB_MASK(LIBXL_JOB_DESTROY); +} + +/* Give up waiting for mutex after 30 seconds */ +#define LIBXL_JOB_WAIT_TIME (1000ull * 30) + +/* + * obj must be locked before calling; driver_locked says if libxlDriverPrivatePtr + * is locked or not. + */ +static int ATTRIBUTE_NONNULL(1) +libxlDomainObjBeginJobInternal(libxlDriverPrivatePtr driver, + bool driver_locked, + virDomainObjPtr obj, + enum libxlDomainJob job, + enum libxlDomainAsyncJob asyncJob) +{ + libxlDomainObjPrivatePtr priv = obj->privateData; + unsigned long long now; + unsigned long long then; + + if (virTimeMillisNow(&now) < 0) + return -1; + then = now + LIBXL_JOB_WAIT_TIME; + + virObjectRef(obj); + if (driver_locked) { + libxlDriverUnlock(driver); + } + +retry: + while (!libxlDomainNestedJobAllowed(priv, job)) { + VIR_DEBUG("Wait async job condition for starting job: %s (async=%s)", + libxlDomainJobTypeToString(job), + libxlDomainAsyncJobTypeToString(priv->job.asyncJob)); + if (virCondWaitUntil(&priv->job.asyncCond, &obj->lock, then) < 0) + goto error; + } + + while (priv->job.active) { + VIR_DEBUG("Wait normal job condition for starting job: %s (async=%s)", + libxlDomainJobTypeToString(job), + libxlDomainAsyncJobTypeToString(priv->job.asyncJob)); + if (virCondWaitUntil(&priv->job.cond, &obj->lock, then) < 0) + goto error; + } + + /* No job is active but a new async job could have been started while obj + * was unlocked, so we need to recheck it. */ + if (!libxlDomainNestedJobAllowed(priv, job)) + goto retry; + + libxlDomainObjResetJob(priv); + + if (job != LIBXL_JOB_ASYNC) { + VIR_DEBUG("Starting job: %s (async=%s)", + libxlDomainJobTypeToString(job), + libxlDomainAsyncJobTypeToString(priv->job.asyncJob)); + priv->job.active = job; + priv->job.owner = virThreadSelfID(); + } else { + VIR_DEBUG("Starting async job: %s", + libxlDomainAsyncJobTypeToString(asyncJob)); + libxlDomainObjResetAsyncJob(priv); + priv->job.asyncJob = asyncJob; + priv->job.asyncOwner = virThreadSelfID(); + priv->job.start = now; + } + + if (driver_locked) { + virDomainObjUnlock(obj); + libxlDriverLock(driver); + virDomainObjLock(obj); + } + + if (libxlDomainTrackJob(job)) + libxlDomainObjSaveJob(driver, obj); + + return 0; + +error: + VIR_WARN("Cannot start job (%s, %s) for domain %s;" + " current job is (%s, %s) owned by (%d, %d)", + libxlDomainJobTypeToString(job), + libxlDomainAsyncJobTypeToString(asyncJob), + obj->def->name, + libxlDomainJobTypeToString(priv->job.active), + libxlDomainAsyncJobTypeToString(priv->job.asyncJob), + priv->job.owner, priv->job.asyncOwner); + + if (errno == ETIMEDOUT) + virReportError(VIR_ERR_OPERATION_TIMEOUT, + "%s", _("cannot acquire state change lock")); + else + virReportSystemError(errno, + "%s", _("cannot acquire job mutex")); + if (driver_locked) { + virDomainObjUnlock(obj); + libxlDriverLock(driver); + virDomainObjLock(obj); + } + virObjectUnref(obj); + return -1; +} + +/* + * obj must be locked before calling, libxlDriverPrivatePtr must NOT be locked + * + * This must be called by anything that will change the VM state + * in any way + * + * Upon successful return, the object will have its ref count increased, + * successful calls must be followed by EndJob eventually + */ +static int +libxlDomainObjBeginJob(libxlDriverPrivatePtr driver, + virDomainObjPtr obj, + enum libxlDomainJob job) +{ + return libxlDomainObjBeginJobInternal(driver, false, obj, job, + LIBXL_ASYNC_JOB_NONE); +} + +static int ATTRIBUTE_UNUSED +libxlDomainObjBeginAsyncJob(libxlDriverPrivatePtr driver, + virDomainObjPtr obj, + enum libxlDomainAsyncJob asyncJob) +{ + return libxlDomainObjBeginJobInternal(driver, false, obj, LIBXL_JOB_ASYNC, + asyncJob); +} + +/* + * obj must be locked before calling. libxlDriverPrivatePtr MUST be locked + * + * This must be called by anything that will change the VM state + * in any way + * + * Upon successful return, the object will have its ref count increased, + * successful calls must be followed by EndJob eventually + */ +static int +libxlDomainObjBeginJobWithDriver(libxlDriverPrivatePtr driver, + virDomainObjPtr obj, + enum libxlDomainJob job) +{ + if (job <= LIBXL_JOB_NONE || job >= LIBXL_JOB_ASYNC) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Attempt to start invalid job")); + return -1; + } + + return libxlDomainObjBeginJobInternal(driver, true, obj, job, + LIBXL_ASYNC_JOB_NONE); +} + +static int +libxlDomainObjBeginAsyncJobWithDriver(libxlDriverPrivatePtr driver, + virDomainObjPtr obj, + enum libxlDomainAsyncJob asyncJob) +{ + return libxlDomainObjBeginJobInternal(driver, true, obj, LIBXL_JOB_ASYNC, + asyncJob); +} + +/* + * obj must be locked before calling, libxlDriverPrivatePtr does not matter + * + * To be called after completing the work associated with the + * earlier libxlDomainBeginJob() call + * + * Returns remaining refcount on 'obj', maybe 0 to indicated it + * was deleted + */ +static bool +libxlDomainObjEndJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj) +{ + libxlDomainObjPrivatePtr priv = obj->privateData; + enum libxlDomainJob job = priv->job.active; + + VIR_DEBUG("Stopping job: %s (async=%s)", + libxlDomainJobTypeToString(job), + libxlDomainAsyncJobTypeToString(priv->job.asyncJob)); + + libxlDomainObjResetJob(priv); + if (libxlDomainTrackJob(job)) + libxlDomainObjSaveJob(driver, obj); + virCondSignal(&priv->job.cond); + + return virObjectUnref(obj); +} + +static bool +libxlDomainObjEndAsyncJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj) +{ + libxlDomainObjPrivatePtr priv = obj->privateData; + + VIR_DEBUG("Stopping async job: %s", + libxlDomainAsyncJobTypeToString(priv->job.asyncJob)); + + libxlDomainObjResetAsyncJob(priv); + libxlDomainObjSaveJob(driver, obj); + virCondBroadcast(&priv->job.asyncCond); + + return virObjectUnref(obj); +} +/* job function finish */ + static void * libxlDomainObjPrivateAlloc(void) { @@ -92,11 +396,18 @@ libxlDomainObjPrivateAlloc(void) if (VIR_ALLOC(priv) < 0) return NULL; + if (libxlDomainObjInitJob(priv) < 0) + goto error; + libxl_ctx_init(&priv->ctx, LIBXL_VERSION, libxl_driver->logger); priv->waiterFD = -1; priv->eventHdl = -1; return priv; + +error: + VIR_FREE(priv); + return NULL; } static void @@ -114,6 +425,7 @@ libxlDomainObjPrivateFree(void *data) } libxl_ctx_free(&priv->ctx); + libxlDomainObjFreeJob(priv); VIR_FREE(priv); } @@ -3826,6 +4138,111 @@ cleanup: } static int +libxlDomainGetJobInfo(virDomainPtr dom, + virDomainJobInfoPtr info) +{ + libxlDriverPrivatePtr driver = dom->conn->privateData; + virDomainObjPtr vm; + int ret = -1; + libxlDomainObjPrivatePtr priv; + + libxlDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + libxlDriverUnlock(driver); + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + virReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + priv = vm->privateData; + + if (virDomainObjIsActive(vm)) { + if (priv->job.asyncJob) { + memcpy(info, &priv->job.info, sizeof(*info)); + + /* Refresh elapsed time again just to ensure it + * is fully updated. This is primarily for benefit + * of incoming migration which we don't currently + * monitor actively in the background thread + */ + if (virTimeMillisNow(&info->timeElapsed) < 0) + goto cleanup; + info->timeElapsed -= priv->job.start; + } else { + memset(info, 0, sizeof(*info)); + info->type = VIR_DOMAIN_JOB_NONE; + } + } else { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + ret = 0; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + return ret; +} + +static int +libxlDomainAbortJob(virDomainPtr dom) +{ + libxlDriverPrivatePtr driver = dom->conn->privateData; + virDomainObjPtr vm; + int ret = -1; + libxlDomainObjPrivatePtr priv; + + libxlDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + libxlDriverUnlock(driver); + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + virReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_ABORT) < 0) + goto cleanup; + + if (!virDomainObjIsActive(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + priv = vm->privateData; + + if (!priv->job.asyncJob) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("no job is active on the domain")); + goto endjob; + } else { + virReportError(VIR_ERR_OPERATION_INVALID, + _("cannot abort %s; use virDomainDestroy instead"), + libxlDomainAsyncJobTypeToString(priv->job.asyncJob)); + goto endjob; + } + + VIR_DEBUG("Not job could be cancelled at current version"); + +endjob: + if (libxlDomainObjEndJob(driver, vm) == 0) + vm = NULL; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + return ret; +} + +static int libxlDomainEventRegisterAny(virConnectPtr conn, virDomainPtr dom, int eventID, virConnectDomainEventGenericCallback callback, void *opaque, virFreeCallback freecb) @@ -3959,6 +4376,8 @@ static virDriver libxlDriver = { .domainIsActive = libxlDomainIsActive, /* 0.9.0 */ .domainIsPersistent = libxlDomainIsPersistent, /* 0.9.0 */ .domainIsUpdated = libxlDomainIsUpdated, /* 0.9.0 */ + .domainGetJobInfo = libxlDomainGetJobInfo, /* 1.0.0 */ + .domainAbortJob = libxlDomainAbortJob, /* 1.0.0 */ .domainEventRegisterAny = libxlDomainEventRegisterAny, /* 0.9.0 */ .domainEventDeregisterAny = libxlDomainEventDeregisterAny, /* 0.9.0 */ .isAlive = libxlIsAlive, /* 0.9.8 */ -- 1.7.12 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list