Re: [PATCH 11/11] Add handling for reboots of LXC containers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jul 24, 2012 at 14:22:53 +0100, Daniel P. Berrange wrote:
> From: "Daniel P. Berrange" <berrange@xxxxxxxxxx>
> 
> The reboot() syscall is allowed by new kernels for LXC containers.
> The LXC controller can detect whether a reboot was requested
> (instead of a normal shutdown) by looking at the "init" process
> exit status. If a reboot was triggered, the exit status will
> record SIGHUP as the kill reason.
> 
> The LXC controller has cleared all its capabilities, and the
> veth network devices will no longer exist at this time. Thus
> it cannot restart the container init process itself. Instead
> it emits an event which is picked up by the LXC driver in
> libvirtd. This will then re-create the container, using the
> same configuration as it was previously running with (ie it
> will not activate 'newDef').
> 
> Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx>
> ---
>  src/lxc/lxc_controller.c |   12 ++++-
>  src/lxc/lxc_domain.h     |    1 +
>  src/lxc/lxc_process.c    |  119 ++++++++++++++++++++++++++++++++++++++++++----
>  src/lxc/lxc_protocol.x   |    3 +-
>  4 files changed, 122 insertions(+), 13 deletions(-)
> 
> diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
> index 7fcc953..a26eec2 100644
> --- a/src/lxc/lxc_controller.c
> +++ b/src/lxc/lxc_controller.c
> @@ -661,6 +661,7 @@ static int lxcControllerClearCapabilities(void)
>  }
>  
>  static bool quit = false;
> +static bool wantReboot = false;
>  static virMutex lock;
>  
>  
> @@ -670,11 +671,15 @@ static void virLXCControllerSignalChildIO(virNetServerPtr server ATTRIBUTE_UNUSE
>  {
>      virLXCControllerPtr ctrl = opaque;
>      int ret;
> +    int status;
>  
> -    ret = waitpid(-1, NULL, WNOHANG);
> +    ret = waitpid(-1, &status, WNOHANG);
>      if (ret == ctrl->initpid) {
>          virMutexLock(&lock);
>          quit = true;
> +        if (WIFSIGNALED(status) &&
> +            WTERMSIG(status) == SIGHUP)
> +            wantReboot = true;
>          virMutexUnlock(&lock);
>      }
>  }
> @@ -998,7 +1003,7 @@ static int virLXCControllerMain(virLXCControllerPtr ctrl)
>  
>      err = virGetLastError();
>      if (!err || err->code == VIR_ERR_OK)
> -        rc = 0;
> +        rc = wantReboot ? 1 : 0;
>  
>  cleanup:
>      virMutexDestroy(&lock);
> @@ -1319,6 +1324,9 @@ virLXCControllerEventSendExit(virLXCControllerPtr ctrl,
>      case 0:
>          msg.status = VIR_LXC_PROTOCOL_EXIT_STATUS_SHUTDOWN;
>          break;
> +    case 1:
> +        msg.status = VIR_LXC_PROTOCOL_EXIT_STATUS_REBOOT;
> +        break;
>      default:
>          msg.status = VIR_LXC_PROTOCOL_EXIT_STATUS_ERROR;
>          break;
> diff --git a/src/lxc/lxc_domain.h b/src/lxc/lxc_domain.h
> index 9216c7a..4301075 100644
> --- a/src/lxc/lxc_domain.h
> +++ b/src/lxc/lxc_domain.h
> @@ -32,6 +32,7 @@ typedef virLXCDomainObjPrivate *virLXCDomainObjPrivatePtr;
>  struct _virLXCDomainObjPrivate {
>      virLXCMonitorPtr monitor;
>      int shutoffReason;
> +    bool wantReboot;
>  };
>  
>  void virLXCDomainSetPrivateDataHooks(virCapsPtr caps);
> diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
> index 0b0a810..8a1ee7d 100644
> --- a/src/lxc/lxc_process.c
> +++ b/src/lxc/lxc_process.c
> @@ -145,6 +145,58 @@ int virLXCProcessAutoDestroyRemove(virLXCDriverPtr driver,
>      return 0;
>  }
>  
> +static virConnectPtr
> +virLXCProcessAutoDestroyGetConn(virLXCDriverPtr driver,
> +                                virDomainObjPtr vm)
> +{
> +    char uuidstr[VIR_UUID_STRING_BUFLEN];
> +    virUUIDFormat(vm->def->uuid, uuidstr);
> +    VIR_DEBUG("vm=%s uuid=%s", vm->def->name, uuidstr);
> +    return virHashLookup(driver->autodestroy, uuidstr);
> +}
> +
> +
> +static int
> +virLXCProcessReboot(virLXCDriverPtr driver,
> +                    virDomainObjPtr vm)
> +{
> +    virConnectPtr conn = virLXCProcessAutoDestroyGetConn(driver, vm);
> +    int reason = vm->state.reason;
> +    bool autodestroy = false;
> +    int ret = -1;
> +    virDomainDefPtr savedDef;
> +
> +    if (conn) {
> +        virConnectRef(conn);
> +        autodestroy = true;
> +    } else {
> +        conn = virConnectOpen("lxc:///");
> +        /* Ignoring NULL conn which is mostly harmless here */

So why do we even try to connect here?

> +    }
> +
> +    /* In a reboot scenario, we need to make sure we continue
> +     * to use the current 'def', and not switch to 'newDef'.
> +     * So temporarily hide the newDef and then reinstate it
> +     */
> +    savedDef = vm->newDef;
> +    vm->newDef = NULL;
> +    virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
> +    vm->newDef = savedDef;
> +    if (virLXCProcessStart(conn, driver, vm, autodestroy, reason) < 0) {
> +        VIR_WARN("Unable to handle reboot of vm %s",
> +                 vm->def->name);
> +        goto cleanup;
> +    }
> +
> +    if (conn)
> +        virConnectClose(conn);
> +
> +    ret = 0;
> +
> +cleanup:
> +    return ret;
> +}
> +
>  
>  /**
>   * virLXCProcessCleanup:
> @@ -395,14 +447,37 @@ static int virLXCProcessSetupInterfaces(virConnectPtr conn,
>          case VIR_DOMAIN_NET_TYPE_NETWORK: {
>              virNetworkPtr network;
>              char *brname = NULL;
> +            bool fail = false;
> +            int active;
> +            virErrorPtr errobj;
>  
>              if (!(network = virNetworkLookupByName(conn,
>                                                     def->nets[i]->data.network.name)))
>                  goto cleanup;
>  
> -            brname = virNetworkGetBridgeName(network);
> +            active = virNetworkIsActive(network);
> +            if (active != 1) {
> +                fail = true;
> +                if (active == 0)
> +                    virReportError(VIR_ERR_INTERNAL_ERROR,
> +                                   _("Network '%s' is not active."),
> +                                   def->nets[i]->data.network.name);
> +                goto cleanup;
> +            }
> +
> +            if (!fail) {
> +                brname = virNetworkGetBridgeName(network);
> +                if (brname == NULL)
> +                    fail = true;
> +            }
> +
> +            /* Make sure any above failure is preserved */
> +            errobj = virSaveLastError();
>              virNetworkFree(network);
> -            if (!brname)
> +            virSetError(errobj);
> +            virFreeError(errobj);
> +
> +            if (fail)
>                  goto cleanup;
>  
>              if (virLXCProcessSetupInterfaceBridged(conn,
> @@ -496,19 +571,38 @@ static void virLXCProcessMonitorEOFNotify(virLXCMonitorPtr mon ATTRIBUTE_UNUSED,
>  {
>      virLXCDriverPtr driver = lxc_driver;
>      virDomainEventPtr event = NULL;
> +    virLXCDomainObjPrivatePtr priv;
>  
>      lxcDriverLock(driver);
>      virDomainObjLock(vm);
>      lxcDriverUnlock(driver);
>  
> -    virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
> -    event = virDomainEventNewFromObj(vm,
> -                                     VIR_DOMAIN_EVENT_STOPPED,
> -                                     VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
> -    virDomainAuditStop(vm, "shutdown");
> -    if (!vm->persistent) {
> -        virDomainRemoveInactive(&driver->domains, vm);
> -        vm = NULL;
> +    priv = vm->privateData;
> +    if (!priv->wantReboot) {
> +        virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
> +        event = virDomainEventNewFromObj(vm,
> +                                         VIR_DOMAIN_EVENT_STOPPED,
> +                                         VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
> +        virDomainAuditStop(vm, "shutdown");
> +        if (!vm->persistent) {
> +            virDomainRemoveInactive(&driver->domains, vm);
> +            vm = NULL;
> +        }
> +    } else {
> +        int ret =virLXCProcessReboot(driver, vm);

s/=/= /

> +        virDomainAuditStop(vm, "reboot");

Should we audit stopped domain before calling virLXCProcessReboot?

> +        virDomainAuditStart(vm, "reboot", ret == 0);
> +        if (ret == 0) {
> +            event = virDomainEventRebootNewFromObj(vm);
> +        } else {
> +            event = virDomainEventNewFromObj(vm,
> +                                             VIR_DOMAIN_EVENT_STOPPED,
> +                                             VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
> +            if (!vm->persistent) {
> +                virDomainRemoveInactive(&driver->domains, vm);
> +                vm = NULL;
> +            }
> +        }
>      }
>  
>      if (vm)
> @@ -533,6 +627,10 @@ static void virLXCProcessMonitorExitNotify(virLXCMonitorPtr mon ATTRIBUTE_UNUSED
>      case VIR_LXC_PROTOCOL_EXIT_STATUS_ERROR:
>          priv->shutoffReason = VIR_DOMAIN_SHUTOFF_CRASHED;
>          break;
> +    case VIR_LXC_PROTOCOL_EXIT_STATUS_REBOOT:
> +        priv->shutoffReason = VIR_DOMAIN_SHUTOFF_SHUTDOWN;
> +        priv->wantReboot = true;
> +        break;
>      default:
>          priv->shutoffReason = VIR_DOMAIN_SHUTOFF_UNKNOWN;
>          break;
> @@ -1015,6 +1113,7 @@ int virLXCProcessStart(virConnectPtr conn,
>      }
>  
>      priv->shutoffReason = VIR_DOMAIN_SHUTOFF_UNKNOWN;
> +    priv->wantReboot = false;
>      vm->def->id = vm->pid;
>      virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
>  
> diff --git a/src/lxc/lxc_protocol.x b/src/lxc/lxc_protocol.x
> index 4fdbe34..e437217 100644
> --- a/src/lxc/lxc_protocol.x
> +++ b/src/lxc/lxc_protocol.x
> @@ -6,7 +6,8 @@
>  
>  enum virLXCProtocolExitStatus {
>      VIR_LXC_PROTOCOL_EXIT_STATUS_ERROR,
> -    VIR_LXC_PROTOCOL_EXIT_STATUS_SHUTDOWN
> +    VIR_LXC_PROTOCOL_EXIT_STATUS_SHUTDOWN,
> +    VIR_LXC_PROTOCOL_EXIT_STATUS_REBOOT
>  };
>  
>  struct virLXCProtocolExitEventMsg {

ACK with nits addressed.

Jirka

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list


[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]