On Tue, 24 Mar 2015, Jim Fehlig wrote: > Recent testing on large memory systems revealed a bug in the Xen xl > tool's freemem() function. When autoballooning is enabled, freemem() > is used to ensure enough memory is available to start a domain, > ballooning dom0 if necessary. When ballooning large amounts of memory > from dom0, freemem() would exceed its self-imposed wait time and > return an error. Meanwhile, dom0 continued to balloon. Starting the > domain later, after sufficient memory was ballooned from dom0, would > succeed. The libvirt implementation in libxlDomainFreeMem() suffers > the same bug since it is modeled after freemem(). > > In the end, the best place to fix the bug on the Xen side was to > slightly change the behavior of libxl_wait_for_memory_target(). > Instead of failing after caller-provided wait_sec, the function now > blocks as long as dom0 memory ballooning is progressing. It will return > failure only when more memory is needed to reach the target and wait_sec > have expired with no progress being made. See xen.git commit fd3aa246. > There was a dicussion on how this would affect other libxl apps like > libvirt > > http://lists.xen.org/archives/html/xen-devel/2015-03/msg00739.html > > If libvirt containing this patch was build against a Xen containing > the old libxl_wait_for_memory_target() behavior, libxlDomainFreeMem() > will fail after 30 sec and domain creation will be terminated. > Without this patch and with old libxl_wait_for_memory_target() behavior, > libxlDomainFreeMem() does not succeed after 30 sec, but returns success > anyway. Domain creation continues resulting in all sorts of fun stuff > like cpu soft lockups in the guest OS. It was decided to properly fix > libxl_wait_for_memory_target(), and if anything improve the default > behavior of apps using the freemem reference impl in xl. > > xl was patched to accommodate the change in libxl_wait_for_memory_target() > with xen.git commit 883b30a0. This patch does the same in the libxl > driver. While at it, I changed the logic to essentially match > freemem() in $xensrc/tools/libxl/xl_cmdimpl.c. It was a bit cleaner > IMO and will make it easier to spot future, potentially interesting > divergences. > > Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx> Reviewed-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> > V2: Actually use libxl_wait_for_memory_target(), instead of > libxl_wait_for_free_memory() > > src/libxl/libxl_domain.c | 55 +++++++++++++++++++++++------------------------- > 1 file changed, 26 insertions(+), 29 deletions(-) > > diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c > index 407a9bd..a1739aa 100644 > --- a/src/libxl/libxl_domain.c > +++ b/src/libxl/libxl_domain.c > @@ -1121,38 +1121,39 @@ libxlDomainFreeMem(libxlDomainObjPrivatePtr priv, libxl_domain_config *d_config) > { > uint32_t needed_mem; > uint32_t free_mem; > - size_t i; > - int ret = -1; > + int ret; > int tries = 3; > int wait_secs = 10; > > - if ((ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, > - &needed_mem)) >= 0) { > - for (i = 0; i < tries; ++i) { > - if ((ret = libxl_get_free_memory(priv->ctx, &free_mem)) < 0) > - break; > + ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, &needed_mem); > + if (ret < 0) > + goto error; > > - if (free_mem >= needed_mem) { > - ret = 0; > - break; > - } > + do { > + ret = libxl_get_free_memory(priv->ctx, &free_mem); > + if (ret < 0) > + goto error; > > - if ((ret = libxl_set_memory_target(priv->ctx, 0, > - free_mem - needed_mem, > - /* relative */ 1, 0)) < 0) > - break; > + if (free_mem >= needed_mem) > + return 0; > > - ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, > - wait_secs); > - if (ret == 0 || ret != ERROR_NOMEM) > - break; > + ret = libxl_set_memory_target(priv->ctx, 0, > + free_mem - needed_mem, > + /* relative */ 1, 0); > + if (ret < 0) > + goto error; > > - if ((ret = libxl_wait_for_memory_target(priv->ctx, 0, 1)) < 0) > - break; > - } > - } > + ret = libxl_wait_for_memory_target(priv->ctx, 0, wait_secs); > + if (ret < 0) > + goto error; > > - return ret; > + tries--; > + } while (tries > 0); > + > + error: > + virReportSystemError(ret, "%s", > + _("Failed to balloon domain0 memory")); > + return -1; > } > > static void > @@ -1271,12 +1272,8 @@ libxlDomainStart(libxlDriverPrivatePtr driver, virDomainObjPtr vm, > priv->ctx, &d_config) < 0) > goto endjob; > > - if (cfg->autoballoon && libxlDomainFreeMem(priv, &d_config) < 0) { > - virReportError(VIR_ERR_INTERNAL_ERROR, > - _("libxenlight failed to get free memory for domain '%s'"), > - d_config.c_info.name); > + if (cfg->autoballoon && libxlDomainFreeMem(priv, &d_config) < 0) > goto endjob; > - } > > if (virHostdevPrepareDomainDevices(hostdev_mgr, LIBXL_DRIVER_NAME, > vm->def, VIR_HOSTDEV_SP_PCI) < 0) > -- > 1.8.4.5 > -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list