On Fri, Jun 28, 2024 at 09:59:29AM -0500, Praveen K Paladugu wrote: > > > On 6/28/2024 2:26 AM, Purna Pavan Chandra wrote: > > Cloud-hypervisor now supports restoring with new net fds. > > Ref: https://github.com/cloud-hypervisor/cloud-hypervisor/pull/6402 > > So, pass new tap fds via SCM_RIGHTS to CH's restore api. > > > > Signed-off-by: Purna Pavan Chandra <paekkaladevi@xxxxxxxxxxxxxxxxxxx> > > --- > > src/ch/ch_driver.c | 9 +--- > > src/ch/ch_monitor.c | 21 ++++++++- > > src/ch/ch_monitor.h | 2 +- > > src/ch/ch_process.c | 104 ++++++++++++++++++++++++++++++++++++-------- > > 4 files changed, 107 insertions(+), 29 deletions(-) > > > > diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c > > index fbeac60825..7857515766 100644 > > --- a/src/ch/ch_driver.c > > +++ b/src/ch/ch_driver.c > > @@ -683,14 +683,9 @@ static int > > chDomainSaveAdditionalValidation(virDomainDef *vmdef) > > { > > /* > > - SAVE and RESTORE are functional only without any networking and > > - device passthrough configuration > > + SAVE and RESTORE are functional only without any host device > > + passthrough configuration > > */ > > Add a version based check as well here. We should check cloud-hypervisor > version >= 40 here. > For reference: https://github.com/libvirt/libvirt/commit/b2e43609fd68b84f626d63e836acd870c873dd7a Sure, I'll add this change. > > Similar to chDomainSaveAdditionalValidation, consider adding a validation > step during Restore as well. This is required, if a user tries to save the > VM on one host and restore it on a different host. > Even if a user saves a VM on one host and tries to restore in another one (along with migrating the saved state files), the expectation is that it would work, right ? > > - if (vmdef->nnets > 0) { > > - virReportError(VIR_ERR_OPERATION_INVALID, "%s", > > - _("cannot save domain with network interfaces")); > > - return -1; > > - } > > if (vmdef->nhostdevs > 0) { > > virReportError(VIR_ERR_OPERATION_INVALID, "%s", > > _("cannot save domain with host devices")); > > diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c > > index 8d8be1f46e..3af5e7f2d1 100644 > > --- a/src/ch/ch_monitor.c > > +++ b/src/ch/ch_monitor.c > > @@ -978,14 +978,31 @@ virCHMonitorSaveVM(virCHMonitor *mon, const char *to) > > } > > int > > -virCHMonitorBuildRestoreJson(const char *from, char **jsonstr) > > +virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr) > > { > > + size_t i; > > g_autoptr(virJSONValue) restore_json = virJSONValueNewObject(); > > - > > g_autofree char *path_url = g_strdup_printf("file://%s", from); > > + > > if (virJSONValueObjectAppendString(restore_json, "source_url", path_url)) > > return -1; > > + /* Pass the netconfig needed to restore with new netfds */ > > + if (vmdef->nnets) { > > + g_autoptr(virJSONValue) nets = virJSONValueNewArray(); > > + for (i = 0; i < vmdef->nnets; i++) { > > + g_autoptr(virJSONValue) net_json = virJSONValueNewObject(); > > + g_autofree char *id = g_strdup_printf("%s_%ld", CH_NET_ID_PREFIX, i); > > + if (virJSONValueObjectAppendString(net_json, "id", id) < 0) > > + return -1; > > + if (virJSONValueObjectAppendNumberInt(net_json, "num_fds", vmdef->nets[i]->driver.virtio.queues)) > > + return -1; > > + virJSONValueArrayAppend(nets, &net_json); > > + } > > + if (virJSONValueObjectAppend(restore_json, "net_fds", &nets)) > > + return -1; > > + } > > + > > if (!(*jsonstr = virJSONValueToString(restore_json, false))) > > return -1; > > diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h > > index 375b7a49a4..02a6685d58 100644 > > --- a/src/ch/ch_monitor.h > > +++ b/src/ch/ch_monitor.h > > @@ -127,4 +127,4 @@ int virCHMonitorGetIOThreads(virCHMonitor *mon, > > virDomainIOThreadInfo ***iothreads); > > int > > virCHMonitorBuildNetJson(virDomainNetDef *netdef, int netindex, char **jsonstr); > > -int virCHMonitorBuildRestoreJson(const char *from, char **jsonstr); > > +int virCHMonitorBuildRestoreJson(virDomainDef *vmdef, const char *from, char **jsonstr); > > diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c > > index b8c6080d5c..cb3539ebe5 100644 > > --- a/src/ch/ch_process.c > > +++ b/src/ch/ch_process.c > > @@ -29,6 +29,7 @@ > > #include "ch_process.h" > > #include "domain_cgroup.h" > > #include "domain_interface.h" > > +#include "viralloc.h" > > #include "virerror.h" > > #include "virfile.h" > > #include "virjson.h" > > @@ -707,6 +708,56 @@ chProcessAddNetworkDevices(virCHDriver *driver, > > return 0; > > } > > +/** > > + * virCHRestoreCreateNetworkDevices: > > + * @driver: pointer to driver structure > > + * @vmdef: pointer to domain definition > > + * @vmtapfds: returned array of FDs of guest interfaces > > + * @nvmtapfds: returned number of network indexes > > + * @nicindexes: returned array of network indexes > > + * @nnicindexes: returned number of network indexes > > + * > > + * Create network devices for the domain. This function is called during > > + * domain restore. > > + * > > + * Returns 0 on success or -1 in case of error > > +*/ > > +static int > > +virCHRestoreCreateNetworkDevices(virCHDriver *driver, > > + virDomainDef *vmdef, > > + int **vmtapfds, > > + size_t *nvmtapfds, > > + int **nicindexes, > > + size_t *nnicindexes) > > +{ > > + size_t i, j; > > + size_t tapfd_len; > > + size_t index_vmtapfds; > > + for (i = 0; i < vmdef->nnets; i++) { > > + g_autofree int *tapfds = NULL; > > + tapfd_len = vmdef->nets[i]->driver.virtio.queues; > > + if (virCHDomainValidateActualNetDef(vmdef->nets[i]) < 0) { > > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > + _("net definition failed validation")); > > + return -1; > > + } > > + tapfds = g_new0(int, tapfd_len); > > + memset(tapfds, -1, (tapfd_len) * sizeof(int)); > > + > > + /* Connect Guest interfaces */ > > + if (virCHConnetNetworkInterfaces(driver, vmdef, vmdef->nets[i], tapfds, > > + nicindexes, nnicindexes) < 0) > > + return -1; > > + > > + index_vmtapfds = *nvmtapfds; > > + VIR_EXPAND_N(*vmtapfds, *nvmtapfds, tapfd_len); > > + for (j = 0; j < tapfd_len; j++) { > > + VIR_APPEND_ELEMENT_INPLACE(*vmtapfds, index_vmtapfds, tapfds[j]); > > + } > + } > > + return 0; > > +} > > + > > /** > > * virCHProcessStartValidate: > > * @driver: pointer to driver structure > > @@ -908,14 +959,19 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from > > g_autofree char *payload = NULL; > > g_autofree char *response = NULL; > > VIR_AUTOCLOSE mon_sockfd = -1; > > + g_autofree int *tapfds = NULL; > > + g_autofree int *nicindexes = NULL; > > size_t payload_len; > > + size_t ntapfds = 0; > > + size_t nnicindexes = 0; > > + int ret = -1; > > if (!priv->monitor) { > > /* Get the first monitor connection if not already */ > > if (!(priv->monitor = virCHProcessConnectMonitor(driver, vm))) { > > virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > _("failed to create connection to CH socket")); > > - return -1; > > + goto cleanup; > > you can continue returning -1 here, as tapfds have not yet been created. The next commit "ch: kill CH process if restore fails" kills the process incase of any errors during restore. Hence, all failures after creating priv->monitor needs to do 'goto cleanup'. But I guess this is not correct commit to make this change. I'll move this change to the above mentioned commit. > > > } > > } > > @@ -923,20 +979,10 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from > > vm->def->id = vm->pid; > > priv->machineName = virCHDomainGetMachineName(vm); > > - /* Pass 0, NULL as restore only works without networking support */ > > - if (virDomainCgroupSetupCgroup("ch", vm, > > - 0, NULL, /* nnicindexes, nicindexes */ > > - &priv->cgroup, > > - cfg->cgroupControllers, > > - 0, /*maxThreadsPerProc*/ > > - priv->driver->privileged, > > - priv->machineName) < 0) > > - return -1; > > - > > - if (virCHMonitorBuildRestoreJson(from, &payload) < 0) { > > + if (virCHMonitorBuildRestoreJson(vm->def, from, &payload) < 0) { > > virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > _("failed to restore domain")); > > - return -1; > > + goto cleanup; > > Same as above comment. > > > } > > virBufferAddLit(&http_headers, "PUT /api/v1/vm.restore HTTP/1.1\r\n"); > > @@ -949,21 +995,41 @@ virCHProcessStartRestore(virCHDriver *driver, virDomainObj *vm, const char *from > > payload = virBufferContentAndReset(&buf); > > if ((mon_sockfd = chMonitorSocketConnect(priv->monitor)) < 0) > > - return -1; > > + goto cleanup; > > Ditto > > > - if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, NULL, 0) < 0) { > > + if (virCHRestoreCreateNetworkDevices(driver, vm->def, &tapfds, &ntapfds, &nicindexes, &nnicindexes) < 0) > > + goto cleanup; > > + > > + if (virDomainCgroupSetupCgroup("ch", vm, > > + nnicindexes, nicindexes, > > + &priv->cgroup, > > + cfg->cgroupControllers, > > + 0, /*maxThreadsPerProc*/ > > + priv->driver->privileged, > > + priv->machineName) < 0) > > + goto cleanup; > > + > > + /* Bring up netdevs before restoring vm */ > > + if (virDomainInterfaceStartDevices(vm->def) < 0) > > + goto cleanup; > > + > > + if (virSocketSendMsgWithFDs(mon_sockfd, payload, payload_len, tapfds, ntapfds) < 0) { > > virReportSystemError(errno, "%s", > > _("Failed to send restore request to CH")); > > - return -1; > > + goto cleanup; > > } > > if (chSocketProcessHttpResponse(mon_sockfd) < 0) > > - return -1; > > + goto cleanup; > > if (virCHProcessSetup(vm) < 0) > > - return -1; > > + goto cleanup; > > virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_FROM_SNAPSHOT); > > + ret = 0; > > - return 0; > > + cleanup: > > + if (tapfds) > > + chCloseFDs(tapfds, ntapfds); > > + return ret; > > } > > -- > Regards, > Praveen