Re: [PATCH 2/2] Add a watchdog action `managed'.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 17, 2010 at 03:45:28PM +0800, Hu Tao wrote:
> `managed' watchdog action lets libvirtd decide what to do if a guest
> watchdog fires. It has a subaction argument which is the action libvirtd
> will take. Currently only `dump' subaction is defined, it tells libvirtd
> to dump the guest on a watchdog event.
> 
> `managed' watchdog action is mapped to `none' qemu watchdog action, thus
> qemu will not get in the way of libvirtd's handling of watchdog events.

Won't that mean that the guest will continue to execute ? I think we'd
want to map it to 'pause', so the guest stops executing while we dump.

> diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
> index 790ce98..49584c8 100644
> --- a/src/qemu/qemu_conf.h
> +++ b/src/qemu/qemu_conf.h
> @@ -106,6 +106,12 @@ enum qemud_cmd_flags {
>  struct qemud_driver {
>      virMutex lock;
>  
> +    virMutex workerPoolLock;
> +    int poolRef;
> +    struct virWorkerPool *workerPool;
> +    int watchdogEventCallbackID;
> +    virConnectPtr conn;

We shouldn't be storing a virConnectPtr in here since that object
for the public APIs - we should directly use the internal APIs.



> @@ -4425,6 +4424,82 @@ retry:
>      }
>  }
>  
> +struct watchdogEvent
> +{
> +    virDomain dom;
> +    int action;
> +    void *actionArg;
> +};
> +
> +static void processWatchdogEvent(void *data)
> +{
> +    struct watchdogEvent *wdEvent = data;
> +
> +    switch (wdEvent->action) {
> +    case VIR_DOMAIN_WATCHDOG_SUBACTION_DUMP:
> +        {
> +            char *path = wdEvent->actionArg;
> +            char dumpfilePath[4096];
> +            int i;
> +
> +            i = snprintf(dumpfilePath, 4096, "%s/%s-%u", path, wdEvent->dom.name, (unsigned int)time(NULL));
> +            dumpfilePath[i] = '\0';

It is preferrable to use  'virAsprintf()' here instead of a fixed
buffer size.

> +            virDomainCoreDump(&wdEvent->dom, dumpfilePath, 0);

Calling out to public APIs isn't allowed from the internal
code. The functionality you want is mostly in the method
qemudDomainCoreDump(), but it takes a virDomainPtr and you
instead need one that has a virDomainObjPtr. So I'd pull
the code out of qemudDomainCoreDump() into a static function
you can call from here and qemudDomainCoreDump().

> +        }
> +        break;
> +    }
> +
> +    free(wdEvent->dom.name);
> +    free(wdEvent);
> +}

VIR_FREE() is required here.

> +
> +static int onWatchdogEvent(virConnectPtr conn,
> +                           virDomainPtr dom,
> +                           int action,
> +                           void *opaque ATTRIBUTE_UNUSED)
> +{
> +    struct qemud_driver *driver = conn->privateData;
> +    struct watchdogEvent *wdEvent;
> +    virDomainObjPtr vm;
> +
> +    wdEvent = malloc(sizeof(*wdEvent));
> +    if (!wdEvent)
> +        return -1;

malloc() isn't allowed in libvirt code - use VIR_ALLOC() and
checkout the 'HACKING' file for guidelines. 'make syntax-check'
should warn you about coding style issues like this.

> +
> +    vm = virDomainFindByID(&driver->domains, dom->id);
> +    if (!vm || !vm->def->watchdog) {
> +        free(wdEvent);
> +        return -1;
> +    }
> +
> +    if (vm->def->watchdog->action != VIR_DOMAIN_WATCHDOG_ACTION_MANAGED) {
> +        virDomainObjUnlock(vm);
> +        free(wdEvent);
> +        return 0;
> +    }
> +
> +    if (action != 0)
> +        return 0;
> +
> +    wdEvent->dom = *dom;
> +    wdEvent->dom.name = malloc(strlen(dom->name));
> +    if (!wdEvent->dom.name) {
> +        virDomainObjUnlock(vm);
> +        free(wdEvent);
> +        return -1;
> +    }
> +    strcpy(wdEvent->dom.name, dom->name);
> +
> +    wdEvent->action = vm->def->watchdog->subaction;
> +    wdEvent->actionArg = vm->def->watchdog->subactionArg;
> +
> +    virWorkerPoolSendJob(driver->workerPool, wdEvent);
> +    virDomainObjUnlock(vm);
> +
> +    return 0;
> +}
> +
> +
>  static virDrvOpenStatus qemudOpen(virConnectPtr conn,
>                                    virConnectAuthPtr auth ATTRIBUTE_UNUSED,
>                                    int flags ATTRIBUTE_UNUSED) {
> @@ -4481,6 +4556,30 @@ static virDrvOpenStatus qemudOpen(virConnectPtr conn,
>              }
>          }
>      }
> +
> +    virMutexLock(&qemu_driver->workerPoolLock);
> +    if (!qemu_driver->workerPool) {
> +        qemu_driver->workerPool = virWorkerPoolNew(0, 1, processWatchdogEvent);
> +        if (qemu_driver->workerPool) {
> +            qemu_driver->watchdogEventCallbackID = virDomainEventCallbackListAddID(conn,
> +                                                                                   qemu_driver->domainEventCallbacks,
> +                                                                                   NULL,
> +                                                                                   VIR_DOMAIN_EVENT_ID_WATCHDOG,
> +                                                                                   VIR_DOMAIN_EVENT_CALLBACK(onWatchdogEvent),
> +                                                                                   NULL,
> +                                                                                   NULL);
> +            if (qemu_driver->watchdogEventCallbackID == -1) {
> +                virWorkerPoolFree(qemu_driver->workerPool);
> +                qemu_driver->workerPool = NULL;
> +            } else {
> +                qemu_driver->poolRef = 1;
> +                conn->refs--;
> +            }
> +        }
> +    } else
> +        qemu_driver->poolRef++;
> +    virMutexUnlock(&qemu_driver->workerPoolLock);


This is where most of the problems arise. 'qemudOpen' is only run when a
client app connects to libvirt. We obviously want automatic core dumps
to run any time a guest crashes, not only when a client app is connected
to libvirt. Instead of using the virDomainEvent... APIs, you want to
directly add code to the internal method qemuHandleDomainWatchdog()
which is where the watchdog events originate.

The worker pool can be created right at the start of QEMU driver startup
in qemudStartup() avoiding the need for any ref counting on it.

> +
>      conn->privateData = qemu_driver;
>  
>      return VIR_DRV_OPEN_SUCCESS;
> @@ -4489,6 +4588,15 @@ static virDrvOpenStatus qemudOpen(virConnectPtr conn,
>  static int qemudClose(virConnectPtr conn) {
>      struct qemud_driver *driver = conn->privateData;
>  
> +    virMutexLock(&driver->workerPoolLock);
> +    if (--driver->poolRef == 0) {
> +        conn->refs--;
> +        virDomainEventCallbackListRemoveID(conn, driver->domainEventCallbacks, driver->watchdogEventCallbackID);
> +        virWorkerPoolFree(driver->workerPool);
> +        driver->workerPool = NULL;
> +    }
> +    virMutexUnlock(&driver->workerPoolLock);

This chunk won't be needed at all if everything is done from the
qemuHandleDomainWatchdog() method. Also a separate 'workerPoolLock'
is probably overkill - just protect it with the regular qemu driver
lock

Regards,
Daniel
-- 
|: Red Hat, Engineering, London    -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org        -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list


[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]