On Thu, Nov 17, 2011 at 05:44:20PM +0800, Hu Tao wrote: > --- > src/qemu/qemu_driver.c | 399 ++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 399 insertions(+), 0 deletions(-) > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 5f4a18d..5b6398c 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -94,6 +94,8 @@ > > #define QEMU_NB_MEM_PARAM 3 > > +#define QEMU_NB_NUMA_PARAM 2 > + > #if HAVE_LINUX_KVM_H > # include <linux/kvm.h> > #endif > @@ -6524,6 +6526,401 @@ cleanup: > return ret; > } > > +static int qemuDomainSetNumaParameters(virDomainPtr dom, > + virTypedParameterPtr params, > + int nparams, > + unsigned int flags) > +{ > + struct qemud_driver *driver = dom->conn->privateData; > + int i; > + virDomainDefPtr persistentDef = NULL; > + virCgroupPtr group = NULL; > + virDomainObjPtr vm = NULL; > + int ret = -1; > + bool isActive; > + > + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | > + VIR_DOMAIN_AFFECT_CONFIG, -1); > + > + qemuDriverLock(driver); > + > + vm = virDomainFindByUUID(&driver->domains, dom->uuid); > + > + if (vm == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + _("No such domain %s"), dom->uuid); > + goto cleanup; > + } > + > + isActive = virDomainObjIsActive(vm); > + > + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { > + if (isActive) > + flags = VIR_DOMAIN_AFFECT_LIVE; > + else > + flags = VIR_DOMAIN_AFFECT_CONFIG; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_LIVE) { > + if (!isActive) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("domain is not running")); > + goto cleanup; > + } > + > + if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPUSET)) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("cgroup cpuset controller is not mounted")); > + goto cleanup; > + } > + > + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + _("cannot find cgroup for domain %s"), vm->def->name); > + goto cleanup; > + } > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + if (!vm->persistent) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("cannot change persistent config of a transient domain")); > + goto cleanup; > + } > + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) > + goto cleanup; > + } > + > + ret = 0; > + for (i = 0; i < nparams; i++) { > + virTypedParameterPtr param = ¶ms[i]; > + > + if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { > + int rc; > + if (param->type != VIR_TYPED_PARAM_STRING) { > + qemuReportError(VIR_ERR_INVALID_ARG, "%s", > + _("invalid type for numa nodeset tunable, expected a 'string'")); > + ret = -1; > + continue; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_LIVE) { > + rc = virCgroupSetCpusetMems(group, params[i].value.s); > + if (rc != 0) { > + virReportSystemError(-rc, "%s", > + _("unable to set memory hard_limit tunable")); > + ret = -1; > + continue; > + } > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + char *oldnodemask = strdup(persistentDef->numatune.memory.nodemask); > + if (!oldnodemask) { > + virReportOOMError(); > + ret = -1; > + continue; > + } > + if (virDomainCpuSetParse((const char **)¶ms[i].value.s, > + 0, > + persistentDef->numatune.memory.nodemask, > + VIR_DOMAIN_CPUMASK_LEN) < 0) { > + VIR_FREE(persistentDef->numatune.memory.nodemask); > + persistentDef->numatune.memory.nodemask = oldnodemask; > + ret = -1; > + continue; > + } > + } > + } else if (STREQ(param->field, VIR_DOMAIN_NUMA_MODE)) { > + int rc; > + if (param->type != VIR_TYPED_PARAM_ULLONG) { > + qemuReportError(VIR_ERR_INVALID_ARG, "%s", > + _("invalid type for numa strict tunable, expected a 'ullong'")); > + ret = -1; > + continue; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_LIVE) { > + switch(params[i].value.i) { > + case VIR_DOMAIN_NUMATUNE_MEM_STRICT: > + rc = virCgroupSetCpusetHardwall(group, params[i].value.i); > + if (rc != 0) { > + virReportSystemError(-rc, "%s", > + _("unable to set memory hard_limit tunable")); > + ret = -1; > + } > + break; > + case VIR_DOMAIN_NUMATUNE_MEM_PREFERRED: > + rc = virCgroupSetCpusetMemorySpreadPage(group, 0); > + if (rc != 0) { > + virReportSystemError(-rc, > + _("Unable to set cpuset.memory_spread_page for domain %s"), > + vm->def->name); > + goto cleanup; > + } > + rc = virCgroupSetCpusetMemorySpreadSlab(group, 0); > + if (rc != 0) { > + virReportSystemError(-rc, > + _("Unable to set cpuset.memory_spread_slab for domain %s"), > + vm->def->name); > + goto cleanup; > + } > + break; > + case VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE: > + rc = virCgroupSetCpusetMemorySpreadPage(group, 1); > + if (rc != 0) { > + virReportSystemError(-rc, > + _("Unable to set cpuset.memory_spread_page for domain %s"), > + vm->def->name); > + goto cleanup; > + } > + rc = virCgroupSetCpusetMemorySpreadSlab(group, 1); > + if (rc != 0) { > + virReportSystemError(-rc, > + _("Unable to set cpuset.memory_spread_slab for domain %s"), > + vm->def->name); > + goto cleanup; > + } > + > + break; > + default: > + qemuReportError(VIR_ERR_INVALID_ARG, _("Unsupported mode")); > + goto cleanup; > + } > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + persistentDef->numatune.memory.mode = params[i].value.i; > + } > + } else { > + qemuReportError(VIR_ERR_INVALID_ARG, > + _("Parameter `%s' not supported"), param->field); > + ret = -1; > + } > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + if (virDomainSaveConfig(driver->configDir, persistentDef) < 0) > + ret = -1; > + } > + > +cleanup: > + virCgroupFree(&group); > + if (vm) > + virDomainObjUnlock(vm); > + qemuDriverUnlock(driver); > + return ret; > +} > + > +static int qemuDomainGetNumaParameters(virDomainPtr dom, > + virTypedParameterPtr params, > + int *nparams, > + unsigned int flags) > +{ > + struct qemud_driver *driver = dom->conn->privateData; > + int i; > + virCgroupPtr group = NULL; > + virDomainObjPtr vm = NULL; > + virDomainDefPtr persistentDef = NULL; > + char *nodeset = NULL; > + unsigned long long val; > + int ret = -1; > + int rc; > + bool isActive; > + > + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | > + VIR_DOMAIN_AFFECT_CONFIG | > + VIR_TYPED_PARAM_STRING_OKAY, -1); > + > + qemuDriverLock(driver); > + > + flags &= ~VIR_TYPED_PARAM_STRING_OKAY; > + > + vm = virDomainFindByUUID(&driver->domains, dom->uuid); > + > + if (vm == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + _("No such domain %s"), dom->uuid); > + goto cleanup; > + } > + > + isActive = virDomainObjIsActive(vm); > + > + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { > + if (isActive) > + flags = VIR_DOMAIN_AFFECT_LIVE; > + else > + flags = VIR_DOMAIN_AFFECT_CONFIG; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_LIVE) { > + if (!isActive) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("domain is not running")); > + goto cleanup; > + } > + > + if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_MEMORY)) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("cgroup memory controller is not mounted")); > + goto cleanup; > + } > + > + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + _("cannot find cgroup for domain %s"), vm->def->name); > + goto cleanup; > + } > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + if (!vm->persistent) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("cannot change persistent config of a transient domain")); > + goto cleanup; > + } > + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) > + goto cleanup; > + } > + > + if ((*nparams) == 0) { > + *nparams = QEMU_NB_NUMA_PARAM; > + ret = 0; > + goto cleanup; > + } > + > + if ((*nparams) < QEMU_NB_NUMA_PARAM) { > + qemuReportError(VIR_ERR_INVALID_ARG, > + "%s", _("Invalid parameter count")); > + goto cleanup; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + for (i = 0; i < *nparams; i++) { > + virMemoryParameterPtr param = ¶ms[i]; > + val = 0; > + param->value.ul = 0; > + param->type = VIR_TYPED_PARAM_ULLONG; > + > + switch (i) { > + case 0: /* fill numa nodeset here */ > + if (virStrcpyStatic(param->field, VIR_DOMAIN_NUMA_NODESET) == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("Field numa nodeset too long for destination")); > + goto cleanup; > + } > + if (persistentDef->numatune.memory.nodemask) { > + nodeset = virDomainCpuSetFormat(persistentDef->numatune.memory.nodemask, > + VIR_DOMAIN_CPUMASK_LEN); > + if (!nodeset) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("failed to format nodeset for NUMA memory tuning")); > + goto cleanup; > + } > + param->value.s = nodeset; > + nodeset = NULL; > + } else { > + param->value.s = strdup(""); > + } > + param->type = VIR_TYPED_PARAM_STRING; > + break; > + > + case 1: /* fill numa mode here */ > + if (virStrcpyStatic(param->field, VIR_DOMAIN_NUMA_MODE) == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("Field numa mode too long for destination")); > + goto cleanup; > + } > + param->value.i = persistentDef->numatune.memory.mode; > + break; > + > + default: > + break; > + /* should not hit here */ > + } > + } > + goto out; > + } > + > + for (i = 0; i < QEMU_NB_NUMA_PARAM; i++) { > + virTypedParameterPtr param = ¶ms[i]; > + val = 0; > + param->value.ul = 0; > + param->type = VIR_TYPED_PARAM_ULLONG; > + > + /* Coverity does not realize that if we get here, group is set. */ > + sa_assert(group); > + > + switch (i) { > + case 0: /* fill numa nodeset here */ > + rc = virCgroupGetCpusetHardwall(group, &val); > + rc = virCgroupGetCpusetMems(group, &nodeset); > + if (rc != 0) { > + virReportSystemError(-rc, "%s", > + _("unable to get numa nodeset")); > + goto cleanup; > + } > + if (virStrcpyStatic(param->field, VIR_DOMAIN_NUMA_NODESET) == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("Field numa nodeset too long for destination")); > + VIR_FREE(nodeset); > + goto cleanup; > + } > + param->value.s = nodeset; > + param->type = VIR_TYPED_PARAM_STRING; > + break; > + > + case 1: /* file numa mode here */ > + if (virStrcpyStatic(param->field, VIR_DOMAIN_NUMA_MODE) == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("Field numa exclusive too long for destination")); > + goto cleanup; > + } > + rc = virCgroupGetCpusetHardwall(group, &val); > + if (rc != 0) { > + virReportSystemError(-rc, "%s", > + _("unable to get numa mode")); > + goto cleanup; > + } > + if (val) { > + param->value.ul = VIR_DOMAIN_NUMATUNE_MEM_STRICT; > + break; > + } > + rc = virCgroupGetCpusetMemorySpreadPage(group, &val); > + if (rc != 0) { > + virReportSystemError(-rc, "%s", > + _("unable to get numa mode")); > + goto cleanup; > + } > + if (val) { > + param->value.ul = VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE; > + break; > + } else { > + param->value.ul = VIR_DOMAIN_NUMATUNE_MEM_PREFERRED; > + break; > + } > + param->value.ul = VIR_DOMAIN_NUMATUNE_MEM_NONE; > + break; > + > + default: > + break; > + /* should not hit here */ > + } > + } > + > +out: > + *nparams = QEMU_NB_NUMA_PARAM; > + ret = 0; > + > +cleanup: > + if (group) > + virCgroupFree(&group); > + if (vm) > + virDomainObjUnlock(vm); > + qemuDriverUnlock(driver); > + return ret; > +} Same API comments as with patch 7 of course. In terms of logic, we can only allow the nodeset mask to be changed on the fly given current cpuset functionality. We can't support switching between policy modes. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list