Re: [PATCH 7/8] ch: support restore with net devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 28, 2024 at 09:59:29AM -0500, Praveen K Paladugu wrote:
> 
> 
> On 6/28/2024 2:26 AM, Purna Pavan Chandra wrote:
> > Cloud-hypervisor now supports restoring with new net fds.
> > Ref: https://github.com/cloud-hypervisor/cloud-hypervisor/pull/6402
> > So, pass new tap fds via SCM_RIGHTS to CH's restore api.
> > 
> > Signed-off-by: Purna Pavan Chandra <paekkaladevi@xxxxxxxxxxxxxxxxxxx>
> > ---
> >   src/ch/ch_driver.c  |   9 +---
> >   src/ch/ch_monitor.c |  21 ++++++++-
> >   src/ch/ch_monitor.h |   2 +-
> >   src/ch/ch_process.c | 104 ++++++++++++++++++++++++++++++++++++--------
> >   4 files changed, 107 insertions(+), 29 deletions(-)
> > 
> > diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
> > index fbeac60825..7857515766 100644
> > --- a/src/ch/ch_driver.c
> > +++ b/src/ch/ch_driver.c
> > @@ -683,14 +683,9 @@ static int
> >   chDomainSaveAdditionalValidation(virDomainDef *vmdef)
> >   {
> >       /*
> > -    SAVE and RESTORE are functional only without any networking and
> > -    device passthrough configuration
> > +    SAVE and RESTORE are functional only without any host device
> > +    passthrough configuration
> >       */
> 
> Add a version based check as well here. We should check cloud-hypervisor
> version >= 40 here.
> For reference: https://github.com/libvirt/libvirt/commit/b2e43609fd68b84f626d63e836acd870c873dd7a

Sure, I'll add this change.
> 
> Similar to chDomainSaveAdditionalValidation, consider adding a validation
> step during Restore as well. This is required, if a user tries to save the
> VM on one host and restore it on a different host.
>
Even if a user saves a VM on one host and tries to restore in another
one (along with migrating the saved state files), the expectation is
that it would work, right ?

> > -    if (vmdef->nnets > 0) {
> > -        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> > -                       _("cannot save domain with network interfaces"));
> > -        return -1;
> > -    }
> >       if  (vmdef->nhostdevs > 0) {
> >           virReportError(VIR_ERR_OPERATION_INVALID, "%s",
> >                          _("cannot save domain with host devices"));
> > diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c
> > index 8d8be1f46e..3af5e7f2d1 100644
> > --- a/src/ch/ch_monitor.c
> > +++ b/src/ch/ch_monitor.c
> > @@ -978,14 +978,31 @@ virCHMonitorSaveVM(virCHMonitor *mon, const char *to)
> >   }
> >   int
> > -virCHMonitorBuildRestoreJson(const char *from, char **jsonstr)
> > +virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr)
> >   {
> > +    size_t i;
> >       g_autoptr(virJSONValue) restore_json = virJSONValueNewObject();
> > -
> >       g_autofree char *path_url = g_strdup_printf("file://%s", from);
> > +
> >       if (virJSONValueObjectAppendString(restore_json, "source_url", path_url))
> >           return -1;
> > +    /* Pass the netconfig needed to restore with new netfds */
> > +    if (vmdef->nnets) {
> > +        g_autoptr(virJSONValue) nets = virJSONValueNewArray();
> > +        for (i = 0; i < vmdef->nnets; i++) {
> > +            g_autoptr(virJSONValue) net_json = virJSONValueNewObject();
> > +            g_autofree char *id = g_strdup_printf("%s_%ld", CH_NET_ID_PREFIX, i);
> > +            if (virJSONValueObjectAppendString(net_json, "id", id) < 0)
> > +                return -1;
> > +            if (virJSONValueObjectAppendNumberInt(net_json, "num_fds", vmdef->nets[i]->driver.virtio.queues))
> > +                return -1;
> > +            virJSONValueArrayAppend(nets, &net_json);
> > +        }
> > +        if (virJSONValueObjectAppend(restore_json, "net_fds", &nets))
> > +            return -1;
> > +    }
> > +
> >       if (!(*jsonstr = virJSONValueToString(restore_json, false)))
> >           return -1;
> > diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h
> > index 375b7a49a4..02a6685d58 100644
> > --- a/src/ch/ch_monitor.h
> > +++ b/src/ch/ch_monitor.h
> > @@ -127,4 +127,4 @@ int virCHMonitorGetIOThreads(virCHMonitor *mon,
> >                                virDomainIOThreadInfo ***iothreads);
> >   int
> >   virCHMonitorBuildNetJson(virDomainNetDef *netdef, int netindex, char **jsonstr);
> > -int virCHMonitorBuildRestoreJson(const char *from, char **jsonstr);
> > +int virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr);
> > diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c
> > index b8c6080d5c..cb3539ebe5 100644
> > --- a/src/ch/ch_process.c
> > +++ b/src/ch/ch_process.c
> > @@ -29,6 +29,7 @@
> >   #include "ch_process.h"
> >   #include "domain_cgroup.h"
> >   #include "domain_interface.h"
> > +#include "viralloc.h"
> >   #include "virerror.h"
> >   #include "virfile.h"
> >   #include "virjson.h"
> > @@ -707,6 +708,56 @@ chProcessAddNetworkDevices(virCHDriver *driver,
> >       return 0;
> >   }
> > +/**
> > + * virCHRestoreCreateNetworkDevices:
> > + * @driver: pointer to driver structure
> > + * @vmdef: pointer to domain definition
> > + * @vmtapfds: returned array of FDs of guest interfaces
> > + * @nvmtapfds: returned number of network indexes
> > + * @nicindexes: returned array of network indexes
> > + * @nnicindexes: returned number of network indexes
> > + *
> > + * Create network devices for the domain. This function is called during
> > + * domain restore.
> > + *
> > + * Returns 0 on success or -1 in case of error
> > +*/
> > +static int
> > +virCHRestoreCreateNetworkDevices(virCHDriver *driver,
> > +                                 virDomainDef *vmdef,
> > +                                 int **vmtapfds,
> > +                                 size_t *nvmtapfds,
> > +                                 int **nicindexes,
> > +                                 size_t *nnicindexes)
> > +{
> > +    size_t i, j;
> > +    size_t tapfd_len;
> > +    size_t index_vmtapfds;
> > +    for (i = 0; i < vmdef->nnets; i++) {
> > +        g_autofree int *tapfds = NULL;
> > +        tapfd_len = vmdef->nets[i]->driver.virtio.queues;
> > +        if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) {
> > +            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> > +                           _("net definition failed validation"));
> > +            return -1;
> > +        }
> > +        tapfds = g_new0(int, tapfd_len);
> > +        memset(tapfds, -1, (tapfd_len) * sizeof(int));
> > +
> > +        /* Connect Guest interfaces */
> > +        if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds,
> > +                                          nicindexes, nnicindexes) < 0)
> > +            return -1;
> > +
> > +        index_vmtapfds = *nvmtapfds;
> > +        VIR_EXPAND_N(*vmtapfds, *nvmtapfds, tapfd_len);
> > +        for (j = 0; j < tapfd_len; j++) {
> > +            VIR_APPEND_ELEMENT_INPLACE(*vmtapfds, index_vmtapfds, tapfds[j]);
> > +        } > +    }
> > +    return 0;
> > +}
> > +
> >   /**
> >    * virCHProcessStartValidate:
> >    * @driver: pointer to driver structure
> > @@ -908,14 +959,19 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
> >       g_autofree char *payload = NULL;
> >       g_autofree char *response = NULL;
> >       VIR_AUTOCLOSE mon_sockfd = -1;
> > +    g_autofree int *tapfds = NULL;
> > +    g_autofree int *nicindexes = NULL;
> >       size_t payload_len;
> > +    size_t ntapfds = 0;
> > +    size_t nnicindexes = 0;
> > +    int ret = -1;
> >       if (!priv->monitor) {
> >           /* Get the first monitor connection if not already */
> >           if (!(priv->monitor = virCHProcessConnectMonitor(driver, vm))) {
> >               virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> >                              _("failed to create connection to CH socket"));
> > -            return -1;
> > +            goto cleanup;
> 
> you can continue returning -1 here, as tapfds have not yet been created.

The next commit "ch: kill CH process if restore fails" kills the process
incase of any errors during restore. Hence, all failures after creating
priv->monitor needs to do 'goto cleanup'. But I guess this is not
correct commit to make this change. I'll move this change to the above
mentioned commit.

> 
> >           }
> >       }
> > @@ -923,20 +979,10 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
> >       vm->def->id = vm->pid;
> >       priv->machineName = virCHDomainGetMachineName(vm);
> > -    /* Pass 0, NULL as restore only works without networking support */
> > -    if (virDomainCgroupSetupCgroup("ch", vm,
> > -                                   0, NULL, /* nnicindexes, nicindexes */
> > -                                   &priv->cgroup,
> > -                                   cfg->cgroupControllers,
> > -                                   0, /*maxThreadsPerProc*/
> > -                                   priv->driver->privileged,
> > -                                   priv->machineName) < 0)
> > -        return -1;
> > -
> > -    if (virCHMonitorBuildRestoreJson(from, &payload) < 0) {
> > +    if (virCHMonitorBuildRestoreJson(vm->def, from, &payload) < 0) {
> >           virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> >                          _("failed to restore domain"));
> > -        return -1;
> > +        goto cleanup;
> 
> Same as above comment.
> 
> >       }
> >       virBufferAddLit(&http_headers, "PUT /api/v1/vm.restore HTTP/1.1\r\n");
> > @@ -949,21 +995,41 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from
> >       payload = virBufferContentAndReset(&buf);
> >       if ((mon_sockfd = chMonitorSocketConnect(priv->monitor)) < 0)
> > -        return -1;
> > +        goto cleanup;
> 
> Ditto
> 
> > -    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, NULL, 0) < 0) {
> > +    if (virCHRestoreCreateNetworkDevices(driver, vm->def, &tapfds, &ntapfds, &nicindexes, &nnicindexes) < 0)
> > +        goto cleanup;
> > +
> > +    if (virDomainCgroupSetupCgroup("ch", vm,
> > +                                   nnicindexes, nicindexes,
> > +                                   &priv->cgroup,
> > +                                   cfg->cgroupControllers,
> > +                                   0, /*maxThreadsPerProc*/
> > +                                   priv->driver->privileged,
> > +                                   priv->machineName) < 0)
> > +        goto cleanup;
> > +
> > +    /* Bring up netdevs before restoring vm */
> > +    if (virDomainInterfaceStartDevices(vm->def) < 0)
> > +        goto cleanup;
> > +
> > +    if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, tapfds, ntapfds) < 0) {
> >           virReportSystemError(errno, "%s",
> >                                _("Failed to send restore request to CH"));
> > -        return -1;
> > +        goto cleanup;
> >       }
> >       if (chSocketProcessHttpResponse(mon_sockfd) < 0)
> > -        return -1;
> > +        goto cleanup;
> >       if (virCHProcessSetup(vm) < 0)
> > -        return -1;
> > +        goto cleanup;
> >       virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_FROM_SNAPSHOT);
> > +    ret = 0;
> > -    return 0;
> > + cleanup:
> > +    if (tapfds)
> > +        chCloseFDs(tapfds, ntapfds);
> > +    return ret;
> >   }
> 
> -- 
> Regards,
> Praveen



[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux