On Wed, Jun 12, 2024 at 03:02:17 -0700, wucf@xxxxxxxxxxxxx wrote: > From: Chun Feng Wu <wucf@xxxxxxxxxxxxx> > > When attaching disk along with specified throttle groups, those groups will be chained up by parent node name, this change includes service side codes: > * Each filter references one throttle group by group name > * Update "qemuDomainDiskGetTopNodename" to take top throttle node name if disk has throttles > * Each filter has a nodename, and those filters are chained up in sequence > * Filter nodename index is generated by reusing qemuDomainStorageIDNew and current global sequence number is persistented in virDomainObj->privateData(qemuDomainObjPrivate)->nodenameindex > * During hotplug, filter is created through QMP request("blockdev-add" with "driver":"throttle") to QEMU > * Delete filters by "qemuBlockThrottleFiltersDetach"("blockdev-del") when detaching device > * Use "iotune" and "throttlefilters" exclusively for specific disk > > Signed-off-by: Chun Feng Wu <wucf@xxxxxxxxxxxxx> > --- > src/conf/domain_validate.c | 8 +++ > src/qemu/qemu_block.c | 131 +++++++++++++++++++++++++++++++++++++ > src/qemu/qemu_block.h | 53 +++++++++++++++ > src/qemu/qemu_command.c | 84 ++++++++++++++++++++++++ > src/qemu/qemu_command.h | 9 +++ > src/qemu/qemu_domain.c | 39 ++++++++++- > src/qemu/qemu_domain.h | 8 +++ > src/qemu/qemu_driver.c | 6 ++ > src/qemu/qemu_hotplug.c | 33 ++++++++++ > 9 files changed, 370 insertions(+), 1 deletion(-) > > diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c > index 395e036e8f..4cc5ed7577 100644 > --- a/src/conf/domain_validate.c > +++ b/src/conf/domain_validate.c > @@ -942,6 +942,14 @@ virDomainDiskDefValidate(const virDomainDef *def, > return -1; > } > > + if (disk->throttlefilters && (disk->blkdeviotune.group_name || > + virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) { This is hard to read as you've broken up the line within a nested brace. It may confuse readers. Rewrite as: if (disk->throttlefilters && (disk->blkdeviotune.group_name || virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) { > + virReportError(VIR_ERR_XML_ERROR, Operation unsupported. > + _("block 'throttlefilters' can't be used together with 'iotune' for disk '%1$s'"), > + disk->dst); > + return -1; > + } > + > return 0; > } > > diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c > index 738b72d7ea..9b8ff65887 100644 > --- a/src/qemu/qemu_block.c > +++ b/src/qemu/qemu_block.c > @@ -2739,6 +2739,137 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData, > } > > > +void > +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter, > + char *nodename) > +{ > + g_free(filter->nodename); > + filter->nodename = nodename; > +} > + > + > +const char * > +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter) > +{ > + return filter->nodename; > +} All of this helper infrastructure doesn't belong to a patch that is declaring that it's dealing with the setup of qemu. (I also wrote that below as I've noticed it there). It makes this patch too complex and thus hard to review. It's also the reason it takes me so long. I'm demotivated on such commits as it takes way too long to go through. Any setup of nodenames and other libvirt-internal stuff, such as validatio of config etc should be separated. > +/** > + * qemuBlockThrottleFilterGetProps: > + * @filter: throttle filter > + * @parentNodeName: parent nodename of @filter > + * > + * Build "arguments" part of "blockdev-add" QMP cmd. > + * e.g. {"execute":"blockdev-add", "arguments":{"driver":"throttle", > + * "node-name":"libvirt-2-filter", "throttle-group":"limits0", > + * "file": "libvirt-1-format"}} Avoid this JSON in comments, as it's obvious from the code. Rather describe any special handling if needed. > + */ > +virJSONValue * > +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter, > + const char *parentNodeName) > +{ > + g_autoptr(virJSONValue) props = NULL; > + > + if (virJSONValueObjectAdd(&props, > + "s:driver", "throttle", > + "s:node-name", qemuBlockThrottleFilterGetNodename(filter), > + "s:throttle-group", filter->group_name, > + "s:file", parentNodeName, > + NULL) < 0) > + return 0; return NULL; It looks confusing because 0 is success in our code. > + > + return g_steal_pointer(&props); > +} [...] > +qemuBlockThrottleFilterAttachData * > +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *filter, > + const char *parentNodeName) > +{ > + g_autoptr(qemuBlockThrottleFilterAttachData) data = NULL; > + > + data = g_new0(qemuBlockThrottleFilterAttachData, 1); > + > + if (!(data->filterProps = qemuBlockThrottleFilterGetProps(filter, parentNodeName))) > + return NULL; > + > + data->filterNodeName = qemuBlockThrottleFilterGetNodename(filter); > + data->filterAttached = true; Well, if you set this to true right here, it's pointless to even have the variable. The code you've copied this from uses the 'Attached' variable so that it's set only after the object is created in qemu. This allows safe rollback. If you set it to true for everything you will attempt to potentially roll back stuff that was not yet set in qemu, making the variable itself pointless. > + > + return g_steal_pointer(&data); > +} > + > + > +void > +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon, > + qemuBlockThrottleFilterAttachData *data) > +{ > + virErrorPtr orig_err; > + > + virErrorPreserveLast(&orig_err); > + > + if (data->filterAttached) > + ignore_value(qemuMonitorBlockdevDel(mon, data->filterNodeName)); ... here ... > + > + virErrorRestore(&orig_err); > +} > + > + > +void > +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data) > +{ > + size_t i; > + > + if (!data) > + return; > + > + for (i = 0; i < data->nfilterdata; i++) > + qemuBlockThrottleFilterAttachDataFree(data->filterdata[i]); > + > + g_free(data->filterdata); > + g_free(data); > +} > + > + > +int > +qemuBlockThrottleFiltersAttach(qemuMonitor *mon, > + qemuBlockThrottleFiltersData *data) > +{ > + size_t i; > + > + for (i = 0; i < data->nfilterdata; i++) { > + if (qemuMonitorBlockdevAdd(mon, &data->filterdata[i]->filterProps) < 0) This function requires the monitor to be locked. If you want to have this as a separate function you must document it and state this fact. > + return -1; > + data->filterdata[i]->filterAttached = true; > + } > + > + return 0; > +} > + > + > +void > +qemuBlockThrottleFiltersDetach(qemuMonitor *mon, > + qemuBlockThrottleFiltersData *data) > +{ > + size_t i; > + > + for (i = data->nfilterdata; i > 0; i--) > + qemuBlockThrottleFilterAttachRollback(mon, data->filterdata[i-1]); > +} > + > + > int > qemuBlockRemoveImageMetadata(virQEMUDriver *driver, > virDomainObj *vm, > diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h > index f9e961d85d..9888954ce4 100644 > --- a/src/qemu/qemu_block.h > +++ b/src/qemu/qemu_block.h > @@ -214,6 +214,59 @@ qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData, > virStorageSource *src, > virStorageSource *templ); > > +void > +qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter, > + char *nodename); > + > +const char * > +qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter); > + > +virJSONValue * > +qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter, > + const char *parentNodeName); This function isn't used outside of qemu_block.c so it doesn't need to be exported. > + > +typedef struct qemuBlockThrottleFilterAttachData qemuBlockThrottleFilterAttachData; > +struct qemuBlockThrottleFilterAttachData { > + virJSONValue *filterProps; > + const char *filterNodeName; > + bool filterAttached; > +}; > + > +qemuBlockThrottleFilterAttachData * > +qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef *throttlefilter, > + const char *parentNodeName); > + > +void > +qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData *data); > + > +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFilterAttachData, > + qemuBlockThrottleFilterAttachDataFree); > + > +void > +qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon, > + qemuBlockThrottleFilterAttachData *data); > + > +struct _qemuBlockThrottleFiltersData { > + qemuBlockThrottleFilterAttachData **filterdata; > + size_t nfilterdata; > +}; > + > +typedef struct _qemuBlockThrottleFiltersData qemuBlockThrottleFiltersData; > + > +void > +qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data); > + > +G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFiltersData, > + qemuBlockThrottleFiltersDataFree); > + > +int > +qemuBlockThrottleFiltersAttach(qemuMonitor *mon, > + qemuBlockThrottleFiltersData *data); > + > +void > +qemuBlockThrottleFiltersDetach(qemuMonitor *mon, > + qemuBlockThrottleFiltersData *data); > + > int > qemuBlockRemoveImageMetadata(virQEMUDriver *driver, > virDomainObj *vm, > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c > index 2d0eddc79e..5ccae956d3 100644 > --- a/src/qemu/qemu_command.c > +++ b/src/qemu/qemu_command.c > @@ -1582,6 +1582,13 @@ qemuDiskConfigBlkdeviotuneEnabled(const virDomainDiskDef *disk) > } > > > +bool > +qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk) This function doesn't need to be exported. > +{ > + return disk->nthrottlefilters > 0; > +} > + > + > /** > * qemuDiskBusIsSD: > * @bus: disk bus > @@ -11055,6 +11062,83 @@ qemuBuildStorageSourceChainAttachPrepareBlockdevOne(qemuBlockStorageSourceChainD > } > > > +/** > + * qemuBuildThrottleFiltersAttachPrepareBlockdevOne: > + * @data: filter chain data, which consists of array of filters and size of such array > + * @throttlefilter: new filter to be added into filter array > + * @parentNodeName: parent nodename for this new throttlefilter, which is used to build "blockdev-add" QMP request the stuff after the comma can be dropped. > + * > + * Build filter node chain to provide more flexibility for block disk I/O limits > + */ > +static int > +qemuBuildThrottleFiltersAttachPrepareBlockdevOne(qemuBlockThrottleFiltersData *data, > + virDomainThrottleFilterDef *throttlefilter, > + const char *parentNodeName) > +{ > + g_autoptr(qemuBlockThrottleFilterAttachData) elem = NULL; > + > + if (!(elem = qemuBlockThrottleFilterAttachPrepareBlockdev(throttlefilter, parentNodeName))) > + return -1; > + > + VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem); > + return 0; > +} > + > + > +/** > + * qemuBuildThrottleFiltersAttachPrepareBlockdev: > + * @disk: domain disk > + * > + * Build filter node chain to provide more flexibility for block disk I/O limits > + */ > +qemuBlockThrottleFiltersData * > +qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk) > +{ > + g_autoptr(qemuBlockThrottleFiltersData) data = NULL; > + size_t i; > + const char * parentNodeName = NULL; > + g_autofree char *tmp_nodename = NULL; > + > + data = g_new0(qemuBlockThrottleFiltersData, 1); > + /* get starting parentNodename, e.g. libvirt-1-format */ > + parentNodeName = qemuBlockStorageSourceGetEffectiveNodename(disk->src); > + /* build filterdata, which contains all filters info and sequence info through parentNodeName */ > + for (i = 0; i < disk->nthrottlefilters; i++) { > + tmp_nodename = g_strdup(parentNodeName); > + if (qemuBuildThrottleFiltersAttachPrepareBlockdevOne(data, disk->throttlefilters[i], tmp_nodename) < 0) > + return NULL; The nodename copied into 'tmp_nodename' is leaked on every iteration. Also qemuBuildThrottleFiltersAttachPrepareBlockdevOne doesn't modify it so there's no point in copying it. > + parentNodeName = disk->throttlefilters[i]->nodename; > + } > + > + return g_steal_pointer(&data); > +} > + > + > +/** > + * qemuBuildThrottleFiltersDetachPrepareBlockdev: > + * @disk: domain disk > + * > + * Build filters data for later "blockdev-del" > + */ > +qemuBlockThrottleFiltersData * > +qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk) > +{ > + g_autoptr(qemuBlockThrottleFiltersData) data = g_new0(qemuBlockThrottleFiltersData, 1); > + size_t i; > + > + /* build filterdata, which contains filters info and sequence info */ > + for (i = 0; i < disk->nthrottlefilters; i++) { > + g_autoptr(qemuBlockThrottleFilterAttachData) elem = g_new0(qemuBlockThrottleFilterAttachData, 1); > + /* ignore other fields since the following info are enough for "blockdev-del" */ > + elem->filterNodeName = qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]); So I didn't yet see any code that serializes any of this in the status XML, thus it seems that this will not work after you restart libvirtd/virtqemud if a VM with filters is running, and then try to detach disks. You'll need to add that, or base the filter nodename on something else. Note that presence of the node name is authoritative and we must not try to regenerate it. That would hinder changing the node names in the future. See how the copyOnRead layer node name is stored. > + elem->filterAttached = true; > + > + VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem); > + } > + return g_steal_pointer(&data); > +} > + > + > /** > * qemuBuildStorageSourceChainAttachPrepareBlockdev: > * @top: storage source chain [...] > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 7ba2ea4a5e..2831036e23 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -7989,7 +7989,8 @@ qemuDomainDetermineDiskChain(virQEMUDriver *driver, > * @disk: disk definition object > * > * Returns the pointer to the node-name of the topmost layer used by @disk as > - * backend. Currently returns the nodename of the copy-on-read filter if enabled > + * backend. Currently returns the nodename of top throttle filter if enabled > + * or the nodename of the copy-on-read filter if enabled > * or the nodename of the top image's format driver. Empty disks return NULL. > * This must be used only with disks instantiated via -blockdev (thus not > * for SD cards). > @@ -8005,6 +8006,10 @@ qemuDomainDiskGetTopNodename(virDomainDiskDef *disk) > if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON) > return priv->nodeCopyOnRead; > > + /* If disk has throttles, take top throttle node name */ > + if (disk->nthrottlefilters > 0) > + return disk->throttlefilters[disk->nthrottlefilters-1]->nodename; return disk->throttlefilters[disk->nthrottlefilters - 1]->nodename; (extra spaces around subtraction operator) > return qemuBlockStorageSourceGetEffectiveNodename(disk->src); > } > > @@ -11336,6 +11341,32 @@ qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk, > } > > > +int > +qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef *filter, This function is used only in this file so it doesn't need to be exported. Also it's used only in one place so it can be inlined as it makes the code harder to follow than necessary. > + const char *nodenameprefix) > +{ > + char *nodename = g_strdup_printf("%s-filter", nodenameprefix); > + > + qemuBlockThrottleFilterSetNodename(filter, nodename); > + > + return 0; > +} > + > + > +int > +qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef *filter, > + qemuDomainObjPrivate *priv) This function is used only in this module thus it doesn't need to be exported. Also it can't fail so there's no point in having a return value. > +{ > + g_autofree char *nodenameprefix = NULL; > + > + filter->id = qemuDomainStorageIDNew(priv); > + > + nodenameprefix = g_strdup_printf("libvirt-%u", filter->id); nodename = g_strdup_printf("libvirt-%u-filter" ... Instead of the convoluted mess and the below call. > + > + return qemuDomainPrepareThrottleFilterBlockdevNodename(filter, nodenameprefix); [...] > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index f7e435d6d5..60989ae693 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -14828,6 +14828,12 @@ qemuDomainDiskBlockIoTuneIsSupported(virDomainDiskDef *disk) > return false; > } > > + if (disk->throttlefilters) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > + _("block 'iotune' can't be used together with 'throttlefilters' for disk '%1$s'"), disk->dst); > + return false; > + } Preferrably add all this infrastructure which doesn't deal with the setup of qemu in a separate patch. This patch is overly complex as it intermixes all of these validation bits with the actual qemu interaction. > + > return true; > } > > diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c > index 4a3f4f657e..103b9e9f00 100644 > --- a/src/qemu/qemu_hotplug.c > +++ b/src/qemu/qemu_hotplug.c > @@ -657,6 +657,7 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm, > virDomainAsyncJob asyncJob) > { > g_autoptr(qemuBlockStorageSourceChainData) data = NULL; > + g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL; > qemuDomainObjPrivate *priv = vm->privateData; > g_autoptr(virJSONValue) devprops = NULL; > bool extensionDeviceAttached = false; > @@ -695,6 +696,19 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm, > if (rc < 0) > goto rollback; > > + /* Setup throttling filters > + * add additional "blockdev-add"(throttle filter) between "blockdev-add" (qemuBlockStorageSourceChainAttach) and "device_add" (qemuDomainAttachExtensionDevice) Drop above line. > + */ > + if ((filterData = qemuBuildThrottleFiltersAttachPrepareBlockdev(disk))) { > + if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0) > + return -1; > + /* QMP requests("blockdev-add" with "driver":"throttle") to QEMU */ > + rc = qemuBlockThrottleFiltersAttach(priv->mon, filterData); > + qemuDomainObjExitMonitor(vm); > + if (rc < 0) > + goto rollback; > + } The ordering is wrong. In the prepare step you are ordering the node name dependencies such as device -> copyOnRead Layer -> throttle -> image chain OBviously to ensure hierarchy at hotplug they need to be plugged from the end. This block here is placed _AFTER_ copyOnRead layer instantiation, so that will not work with disks with copyOnRead. > + > if (disk->transient) { > g_autoptr(qemuBlockStorageSourceAttachData) backend = NULL; > g_autoptr(GHashTable) blockNamedNodeData = NULL; > @@ -766,6 +780,8 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm, > if (extensionDeviceAttached) > ignore_value(qemuDomainDetachExtensionDevice(priv->mon, &disk->info)); > > + qemuBlockThrottleFiltersDetach(priv->mon, filterData); > + > qemuBlockStorageSourceChainDetach(priv->mon, data); > > qemuDomainObjExitMonitor(vm); > @@ -921,6 +937,14 @@ qemuDomainAttachDeviceDiskLiveInternal(virQEMUDriver *driver, > bool releaseSeclabel = false; > int ret = -1; > > + if (disk->device == VIR_DOMAIN_DISK_DEVICE_CDROM) { > + if (disk->nthrottlefilters > 0) { > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", > + _("cdrom device with throttle filters isn't supported")); So and now this is why I don't like the fact that you are doing this on a per-disk level. On a per-image level (if you'd instantiate 'throttle' layers when adding the image) this would not be a problem. I'd strongly prefer if you modify this such that the trottle layers will be instantiated at per-storage-source level both in XML and in the code. > + return -1; > + } > + } > + > if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) { > virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", > _("floppy device hotplug isn't supported")); > @@ -4499,6 +4523,7 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver, > { > qemuDomainDiskPrivate *diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk); > g_autoptr(qemuBlockStorageSourceChainData) diskBackend = NULL; > + g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL; > size_t i; > qemuDomainObjPrivate *priv = vm->privateData; > int ret = -1; > @@ -4537,6 +4562,14 @@ qemuDomainRemoveDiskDevice(virQEMUDriver *driver, > } > } > > + qemuDomainObjEnterMonitor(vm); > + /* QMP request("blockdev-del") to QEMU to delete throttle filter*/ > + if ((filterData = qemuBuildThrottleFiltersDetachPrepareBlockdev(disk))) { > + /* "qemuBlockThrottleFiltersDetach" is used in rollback within "qemuDomainAttachDiskGeneric" as well */ > + qemuBlockThrottleFiltersDetach(priv->mon, filterData); In which case this'd be automatic. > + } > + qemuDomainObjExitMonitor(vm); > + > qemuDomainObjEnterMonitor(vm); > > if (diskBackend) > -- > 2.34.1 > This patch should also contain the corresponding commandline infrastructure for the filter itself. The disk backend code is specifically unified by using the intermediate struct so that it's guaranteed that both hotplug and commandline work the same. Thus they need to be added simultaenously.