On Fri, Jun 28, 2013 at 17:24:42 -0400, Laine Stump wrote: > On 06/28/2013 11:04 AM, Jiri Denemark wrote: > > --- > > src/qemu/qemu_cgroup.c | 21 ++------------------- > > src/qemu/qemu_domain.c | 29 +++++++++++++++++++++++++++++ > > src/qemu/qemu_domain.h | 2 ++ > > 3 files changed, 33 insertions(+), 19 deletions(-) > > > > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c > > index 5f54ca6..22bf78e 100644 > > --- a/src/qemu/qemu_cgroup.c > > +++ b/src/qemu/qemu_cgroup.c > > @@ -461,9 +461,7 @@ static int > > qemuSetupMemoryCgroup(virDomainObjPtr vm) > > { > > qemuDomainObjPrivatePtr priv = vm->privateData; > > - unsigned long long hard_limit; > > int rc; > > - int i; > > > > if (!virCgroupHasController(priv->cgroup,VIR_CGROUP_CONTROLLER_MEMORY)) { > > if (vm->def->mem.hard_limit != 0 || > > @@ -477,23 +475,8 @@ qemuSetupMemoryCgroup(virDomainObjPtr vm) > > } > > } > > > > - hard_limit = vm->def->mem.hard_limit; > > - if (!hard_limit) { > > - /* If there is no hard_limit set, set a reasonable one to avoid > > - * system thrashing caused by exploited qemu. A 'reasonable > > - * limit' has been chosen: > > - * (1 + k) * (domain memory + total video memory) + (32MB for > > - * cache per each disk) + F > > - * where k = 0.5 and F = 200MB. The cache for disks is important as > > - * kernel cache on the host side counts into the RSS limit. */ > > - hard_limit = vm->def->mem.max_balloon; > > - for (i = 0; i < vm->def->nvideos; i++) > > - hard_limit += vm->def->videos[i]->vram; > > - hard_limit = hard_limit * 1.5 + 204800; > > - hard_limit += vm->def->ndisks * 32768; > > - } > > - > > - rc = virCgroupSetMemoryHardLimit(priv->cgroup, hard_limit); > > + rc = virCgroupSetMemoryHardLimit(priv->cgroup, > > + qemuDomainMemoryLimit(vm->def)); > > if (rc != 0) { > > virReportSystemError(-rc, > > _("Unable to set memory hard limit for domain %s"), > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > > index 8d79066..77b94ec 100644 > > --- a/src/qemu/qemu_domain.c > > +++ b/src/qemu/qemu_domain.c > > @@ -2181,3 +2181,32 @@ cleanup: > > virObjectUnref(cfg); > > return ret; > > } > > + > > + > > +unsigned long long > > +qemuDomainMemoryLimit(virDomainDefPtr def) > > +{ > > + unsigned long long mem; > > + int i; > > + > > + if (def->mem.hard_limit) { > > + mem = def->mem.hard_limit; > > + } else { > > + /* If there is no hard_limit set, compute a reasonable one to avoid > > + * system thrashing caused by exploited qemu. A 'reasonable > > + * limit' has been chosen: > > + * (1 + k) * (domain memory + total video memory) + (32MB for > > + * cache per each disk) + F > > + * where k = 0.5 and F = 200MB. The cache for disks is important as > > + * kernel cache on the host side counts into the RSS limit. > > + */ > > + mem = def->mem.max_balloon; > > + for (i = 0; i < def->nvideos; i++) > > + mem += def->videos[i]->vram; > > + mem *= 1.5; > > + mem += def->ndisks * 32768; > > + mem += 204800; > > + } > > > I know you're just doing a cut-paste here, but I'm curious how we > arrived at this formula. On systems using vfio, it ends up locking *way* > more memory than previously. For my test guest with 4GB of ram assigned, > the old computation would lock 5GB of ram for the qemu process. With the > new method in this patch it ends up locking just about 7GB. That's because previously this computation was made only for hard_limit and the limit for locking was just completely ignoring it and used max_balloon + 1GB. However, this computation does not count with VFIO in any way so I just combined the two computations by adding the extra 1GB for VFIO into this shared code (in patch 2/3). I'm not sure if that's the right think to do or not but it's certainly better than before when memory locking limit completely ignore the need for VRAM and per-disk cache. Unfortunately, the original formula was suggested by Avi, who moved to new challenges. Perhaps others could jump in and share their opinions (Paolo? :-P). Be careful, though, that we're actually discussing qemuDomainMemoryLimit code after patch 2/3 is applied, that is with the VFIO stuff counted in. The complete code of this function is: unsigned long long qemuDomainMemoryLimit(virDomainDefPtr def) { unsigned long long mem; int i; if (def->mem.hard_limit) { mem = def->mem.hard_limit; } else { /* If there is no hard_limit set, compute a reasonable one to avoid * system thrashing caused by exploited qemu. A 'reasonable * limit' has been chosen: * (1 + k) * (domain memory + total video memory) + (32MB for * cache per each disk) + F * where k = 0.5 and F = 200MB. The cache for disks is important as * kernel cache on the host side counts into the RSS limit. * * Moreover, VFIO requires some amount for IO space. Alex Williamson * suggested adding 1GiB for IO space just to be safe (some finer * tuning might be nice, though). */ mem = def->mem.max_balloon; for (i = 0; i < def->nvideos; i++) mem += def->videos[i]->vram; mem *= 1.5; mem += def->ndisks * 32768; mem += 204800; for (i = 0; i < def->nhostdevs; i++) { virDomainHostdevDefPtr hostdev = def->hostdevs[i]; if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && hostdev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) { mem += 1024 * 1024; break; } } } return mem; } Jirka -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list