On Thu, Jul 21, 2011 at 10:11:32AM +0800, Wen Congyang wrote: > --- > src/qemu/qemu_driver.c | 312 ++++++++++++++++++++++++++++++++++++++++++++---- > 1 files changed, 287 insertions(+), 25 deletions(-) > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index cd65bce..fd80537 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -5139,11 +5139,46 @@ cleanup: > } > > > +/* > + * check whether the host supports CFS bandwidth > + * > + * Return 1 when CFS bandwidth is supported, 0 when CFS bandwidth is not > + * supported, -1 on error. > + */ > +static int qemuGetCpuBWStatus(virCgroupPtr cgroup) > +{ > + char *cfs_period_path = NULL; > + int ret = -1; > + > + if (!cgroup) > + return 0; > + > + if (virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_CPU, > + "cpu.cfs_period_us", &cfs_period_path) < 0) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", > + _("cannot get the path of cgroup CPU controller")); Hum, I'm not sure we should really flag this as an error here It should be made an INFO I think. What should get an error is if we try to start using cpu control on a guest while the host doesn't support it. In practice we need to check proper handling in 3 cases: - at guest startup - on migration when checking the target host - when activated at runtime > + goto cleanup; > + } > + > + if (access(cfs_period_path, F_OK) < 0) { > + ret = 0; > + } else { > + ret = 1; > + } > + > +cleanup: > + VIR_FREE(cfs_period_path); > + return ret; > +} > + > + > static char *qemuGetSchedulerType(virDomainPtr dom, > int *nparams) > { > struct qemud_driver *driver = dom->conn->privateData; > char *ret = NULL; > + int rc; > > qemuDriverLock(driver); > if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { > @@ -5152,8 +5187,15 @@ static char *qemuGetSchedulerType(virDomainPtr dom, > goto cleanup; > } > > - if (nparams) > - *nparams = 1; > + if (nparams) { > + rc = qemuGetCpuBWStatus(driver->cgroup); > + if (rc < 0) > + goto cleanup; > + else if (rc == 0) > + *nparams = 1; > + else > + *nparams = 3; > + } > > ret = strdup("posix"); > if (!ret) > @@ -5786,6 +5828,58 @@ cleanup: > return ret; > } > > +static int > +qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, > + unsigned long long period, long long quota) > +{ > + int i; > + qemuDomainObjPrivatePtr priv = vm->privateData; > + virCgroupPtr cgroup_vcpu = NULL; > + int rc; > + > + if (period == 0 && quota == 0) > + return 0; > + > + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { > + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same > + * thread, we can not control each vcpu. > + */ > + /* Ensure that we can multiply by vcpus without overflowing. */ > + if (quota > LLONG_MAX / vm->def->vcpus) { > + virReportSystemError(EINVAL, > + _("%s"), > + "Unable to set cpu bandwidth quota"); should probably give an hint of why in the error message > + goto cleanup; > + } > + > + if (quota > 0) > + quota *= vm->def->vcpus; > + return qemuSetupCgroupVcpuBW(cgroup, period, quota); > + } > + > + for (i = 0; i < priv->nvcpupids; i++) { > + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); > + if (rc < 0) { > + virReportSystemError(-rc, > + _("Unable to find vcpu cgroup for %s(vcpu:" > + " %d)"), > + vm->def->name, i); > + goto cleanup; > + } > + > + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) > + goto cleanup; > + > + virCgroupFree(&cgroup_vcpu); > + } > + > + return 0; > + > +cleanup: > + virCgroupFree(&cgroup_vcpu); > + return -1; > +} > + > static int qemuSetSchedulerParametersFlags(virDomainPtr dom, > virTypedParameterPtr params, > int nparams, > @@ -5795,9 +5889,10 @@ static int qemuSetSchedulerParametersFlags(virDomainPtr dom, > int i; > virCgroupPtr group = NULL; > virDomainObjPtr vm = NULL; > - virDomainDefPtr persistentDef = NULL; > + virDomainDefPtr vmdef = NULL; > int ret = -1; > bool isActive; > + int rc; > > virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | > VIR_DOMAIN_AFFECT_CONFIG, -1); > @@ -5821,10 +5916,17 @@ static int qemuSetSchedulerParametersFlags(virDomainPtr dom, > flags = VIR_DOMAIN_AFFECT_CONFIG; > } > > - if ((flags & VIR_DOMAIN_AFFECT_CONFIG) && !vm->persistent) { > - qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", > - _("cannot change persistent config of a transient domain")); > - goto cleanup; > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + if (!vm->persistent) { > + qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("cannot change persistent config of a transient domain")); > + goto cleanup; > + } > + > + /* Make a copy for updated domain. */ > + vmdef = virDomainObjCopyPersistentDef(driver->caps, vm); > + if (!vmdef) > + goto cleanup; > } > > if (flags & VIR_DOMAIN_AFFECT_LIVE) { > @@ -5851,7 +5953,6 @@ static int qemuSetSchedulerParametersFlags(virDomainPtr dom, > virTypedParameterPtr param = ¶ms[i]; > > if (STREQ(param->field, "cpu_shares")) { > - int rc; > if (param->type != VIR_TYPED_PARAM_ULLONG) { > qemuReportError(VIR_ERR_INVALID_ARG, "%s", > _("invalid type for cpu_shares tunable, expected a 'ullong'")); > @@ -5870,19 +5971,47 @@ static int qemuSetSchedulerParametersFlags(virDomainPtr dom, > } > > if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > - persistentDef = virDomainObjGetPersistentDef(driver->caps, vm); > - if (!persistentDef) { > - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("can't get persistentDef")); > + vmdef->cputune.shares = params[i].value.ul; > + } > + } else if (STREQ(param->field, "cfs_period")) { > + if (param->type != VIR_TYPED_PARAM_ULLONG) { > + qemuReportError(VIR_ERR_INVALID_ARG, "%s", > + _("invalid type for cfs_period tunable," > + " expected a 'ullong'")); > + goto cleanup; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_LIVE) { > + rc = qemuSetVcpusBWLive(vm, group, params[i].value.ul, 0); > + if (rc != 0) > goto cleanup; > - } > - persistentDef->cputune.shares = params[i].value.ul; > - rc = virDomainSaveConfig(driver->configDir, persistentDef); > - if (rc) { > - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("can't save config")); > + > + if (params[i].value.ul) > + vm->def->cputune.period = params[i].value.ul; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + vmdef->cputune.period = params[i].value.ul; > + } > + } else if (STREQ(param->field, "cfs_quota")) { > + if (param->type != VIR_TYPED_PARAM_LLONG) { > + qemuReportError(VIR_ERR_INVALID_ARG, "%s", > + _("invalid type for cfs_quota tunable," > + " expected a 'llong'")); > + goto cleanup; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_LIVE) { > + rc = qemuSetVcpusBWLive(vm, group, 0, params[i].value.l); > + if (rc != 0) > goto cleanup; > - } > + > + if (params[i].value.l) > + vm->def->cputune.quota = params[i].value.l; > + } > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + vmdef->cputune.quota = params[i].value.l; > } > } else { > qemuReportError(VIR_ERR_INVALID_ARG, > @@ -5891,9 +6020,23 @@ static int qemuSetSchedulerParametersFlags(virDomainPtr dom, > } > } > > + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) > + goto cleanup; > + > + > + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { > + rc = virDomainSaveConfig(driver->configDir, vmdef); > + if (rc < 0) > + goto cleanup; > + > + virDomainObjAssignDef(vm, vmdef, false); > + vmdef = NULL; > + } > + > ret = 0; > > cleanup: > + virDomainDefFree(vmdef); > virCgroupFree(&group); > if (vm) > virDomainObjUnlock(vm); > @@ -5912,6 +6055,71 @@ static int qemuSetSchedulerParameters(virDomainPtr dom, > } > > static int > +qemuGetVcpuBWLive(virCgroupPtr cgroup, unsigned long long *period, > + long long *quota) > +{ > + int rc; > + > + rc = virCgroupGetCpuCfsPeriod(cgroup, period); > + if (rc < 0) { > + virReportSystemError(-rc, "%s", > + _("unable to get cpu bandwidth period tunable")); > + return -1; > + } > + > + rc = virCgroupGetCpuCfsQuota(cgroup, quota); > + if (rc < 0) { > + virReportSystemError(-rc, "%s", > + _("unable to get cpu bandwidth tunable")); > + return -1; > + } > + > + return 0; > +} > + > +static int > +qemuGetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, > + unsigned long long *period, long long *quota) > +{ > + virCgroupPtr cgroup_vcpu = NULL; > + qemuDomainObjPrivatePtr priv = NULL; > + int rc; > + int ret = -1; > + > + priv = vm->privateData; > + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { > + /* We do not create sub dir for each vcpu */ > + rc = qemuGetVcpuBWLive(cgroup, period, quota); > + if (rc < 0) > + goto cleanup; > + > + if (*quota > 0) > + *quota /= vm->def->vcpus; > + goto out; > + } > + > + /* get period and quota for vcpu0 */ > + rc = virCgroupForVcpu(cgroup, 0, &cgroup_vcpu, 0); > + if (!cgroup_vcpu) { > + virReportSystemError(-rc, > + _("Unable to find vcpu cgroup for %s(vcpu: 0)"), > + vm->def->name); > + goto cleanup; > + } > + > + rc = qemuGetVcpuBWLive(cgroup_vcpu, period, quota); > + if (rc < 0) > + goto cleanup; > + > +out: > + ret = 0; > + > +cleanup: > + virCgroupFree(&cgroup_vcpu); > + return ret; > +} > + > +static int > qemuGetSchedulerParametersFlags(virDomainPtr dom, > virTypedParameterPtr params, > int *nparams, > @@ -5920,10 +6128,14 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, > struct qemud_driver *driver = dom->conn->privateData; > virCgroupPtr group = NULL; > virDomainObjPtr vm = NULL; > - unsigned long long val; > + unsigned long long shares; > + unsigned long long period; > + long long quota; > int ret = -1; > int rc; > bool isActive; > + bool cpu_bw_status; > + int saved_nparams = 0; > > virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | > VIR_DOMAIN_AFFECT_CONFIG, -1); > @@ -5943,6 +6155,13 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, > goto cleanup; > } > > + if (*nparams > 1) { > + rc = qemuGetCpuBWStatus(driver->cgroup); > + if (rc < 0) > + goto cleanup; > + cpu_bw_status = !!rc; > + } > + > vm = virDomainFindByUUID(&driver->domains, dom->uuid); > > if (vm == NULL) { > @@ -5976,9 +6195,17 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, > _("can't get persistentDef")); > goto cleanup; > } > - val = persistentDef->cputune.shares; > + shares = persistentDef->cputune.shares; > + if (*nparams > 1 && cpu_bw_status) { > + period = persistentDef->cputune.period; > + quota = persistentDef->cputune.quota; > + } > } else { > - val = vm->def->cputune.shares; > + shares = vm->def->cputune.shares; > + if (*nparams > 1 && cpu_bw_status) { > + period = vm->def->cputune.period; > + quota = vm->def->cputune.quota; > + } > } > goto out; > } > @@ -6001,14 +6228,20 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, > goto cleanup; > } > > - rc = virCgroupGetCpuShares(group, &val); > + rc = virCgroupGetCpuShares(group, &shares); > if (rc != 0) { > virReportSystemError(-rc, "%s", > _("unable to get cpu shares tunable")); > goto cleanup; > } > + > + if (*nparams > 1 && cpu_bw_status) { > + rc = qemuGetVcpusBWLive(vm, group, &period, "a); > + if (rc != 0) > + goto cleanup; > + } > out: > - params[0].value.ul = val; > + params[0].value.ul = shares; > params[0].type = VIR_TYPED_PARAM_ULLONG; > if (virStrcpyStatic(params[0].field, "cpu_shares") == NULL) { > qemuReportError(VIR_ERR_INTERNAL_ERROR, > @@ -6016,7 +6249,36 @@ out: > goto cleanup; > } > > - *nparams = 1; > + saved_nparams++; > + > + if (cpu_bw_status) { > + if (*nparams > saved_nparams) { > + params[1].value.ul = period; > + params[1].type = VIR_TYPED_PARAM_ULLONG; > + if (virStrcpyStatic(params[1].field, "cfs_period") == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", > + _("Field cfs_period too long for destination")); > + goto cleanup; > + } > + saved_nparams++; > + } > + > + if (*nparams > saved_nparams) { > + params[2].value.ul = quota; > + params[2].type = VIR_TYPED_PARAM_LLONG; > + if (virStrcpyStatic(params[2].field, "cfs_quota") == NULL) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", > + _("Field cfs_quota too long for destination")); > + goto cleanup; > + } > + saved_nparams++; > + } > + } > + > + *nparams = saved_nparams; > + > ret = 0; > > cleanup: ACK but please add a commit message Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list