On Wed, Jun 12, 2024 at 03:02:17 -0700,wucf@xxxxxxxxxxxxx wrote:
From: Chun Feng Wu<wucf@xxxxxxxxxxxxx>
When attaching disk along with specified throttle groups, those
groups will be chained up by parent node name, this change includes
service side codes:
* Each filter references one throttle group by group name
* Update "qemuDomainDiskGetTopNodename" to take top throttle node
name if disk has throttles
* Each filter has a nodename, and those filters are chained up in
sequence
* Filter nodename index is generated by reusing
qemuDomainStorageIDNew and current global sequence number is
persistented in
virDomainObj->privateData(qemuDomainObjPrivate)->nodenameindex
* During hotplug, filter is created through QMP
request("blockdev-add" with "driver":"throttle") to QEMU
* Delete filters by "qemuBlockThrottleFiltersDetach"("blockdev-del")
when detaching device
* Use "iotune" and "throttlefilters" exclusively for specific disk
Signed-off-by: Chun Feng Wu<wucf@xxxxxxxxxxxxx>
---
src/conf/domain_validate.c | 8 +++
src/qemu/qemu_block.c | 131
+++++++++++++++++++++++++++++++++++++
src/qemu/qemu_block.h | 53 +++++++++++++++
src/qemu/qemu_command.c | 84 ++++++++++++++++++++++++
src/qemu/qemu_command.h | 9 +++
src/qemu/qemu_domain.c | 39 ++++++++++-
src/qemu/qemu_domain.h | 8 +++
src/qemu/qemu_driver.c | 6 ++
src/qemu/qemu_hotplug.c | 33 ++++++++++
9 files changed, 370 insertions(+), 1 deletion(-)
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 395e036e8f..4cc5ed7577 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -942,6 +942,14 @@ virDomainDiskDefValidate(const virDomainDef *def,
return -1;
}
+ if (disk->throttlefilters && (disk->blkdeviotune.group_name ||
+ virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
This is hard to read as you've broken up the line within a nested brace.
It may confuse readers.
Rewrite as:
if (disk->throttlefilters &&
(disk->blkdeviotune.group_name ||
virDomainBlockIoTuneInfoHasAny(&disk->blkdeviotune))) {
+ virReportError(VIR_ERR_XML_ERROR,
Operation unsupported.
+ _("block 'throttlefilters' can't be used
together with 'iotune' for disk '%1$s'"),
+ disk->dst);
+ return -1;
+ }
+
return 0;
}
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index 738b72d7ea..9b8ff65887 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -2739,6 +2739,137 @@
qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
}
+void
+qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
+ char *nodename)
+{
+ g_free(filter->nodename);
+ filter->nodename = nodename;
+}
+
+
+const char *
+qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef *filter)
+{
+ return filter->nodename;
+}
All of this helper infrastructure doesn't belong to a patch that is
declaring that it's dealing with the setup of qemu. (I also wrote that
below as I've noticed it there).
It makes this patch too complex and thus hard to review. It's also the
reason it takes me so long. I'm demotivated on such commits as it takes
way too long to go through.
Any setup of nodenames and other libvirt-internal stuff, such as
validatio of config etc should be separated.
+/**
+ * qemuBlockThrottleFilterGetProps:
+ * @filter: throttle filter
+ * @parentNodeName: parent nodename of @filter
+ *
+ * Build "arguments" part of "blockdev-add" QMP cmd.
+ * e.g. {"execute":"blockdev-add", "arguments":{"driver":"throttle",
+ * "node-name":"libvirt-2-filter", "throttle-group":"limits0",
+ * "file": "libvirt-1-format"}}
Avoid this JSON in comments, as it's obvious from the code. Rather
describe any special handling if needed.
+ */
+virJSONValue *
+qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
+ const char *parentNodeName)
+{
+ g_autoptr(virJSONValue) props = NULL;
+
+ if (virJSONValueObjectAdd(&props,
+ "s:driver", "throttle",
+ "s:node-name",
qemuBlockThrottleFilterGetNodename(filter),
+ "s:throttle-group", filter->group_name,
+ "s:file", parentNodeName,
+ NULL) < 0)
+ return 0;
return NULL; It looks confusing because 0 is success in our code.
+
+ return g_steal_pointer(&props);
+}
[...]
+qemuBlockThrottleFilterAttachData *
+qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef
*filter,
+ const char
*parentNodeName)
+{
+ g_autoptr(qemuBlockThrottleFilterAttachData) data = NULL;
+
+ data = g_new0(qemuBlockThrottleFilterAttachData, 1);
+
+ if (!(data->filterProps =
qemuBlockThrottleFilterGetProps(filter, parentNodeName)))
+ return NULL;
+
+ data->filterNodeName = qemuBlockThrottleFilterGetNodename(filter);
+ data->filterAttached = true;
Well, if you set this to true right here, it's pointless to even have
the variable.
The code you've copied this from uses the 'Attached' variable so that
it's set only after the object is created in qemu. This allows safe
rollback.
If you set it to true for everything you will attempt to potentially
roll back stuff that was not yet set in qemu, making the variable itself
pointless.
+
+ return g_steal_pointer(&data);
+}
+
+
+void
+qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
+ qemuBlockThrottleFilterAttachData *data)
+{
+ virErrorPtr orig_err;
+
+ virErrorPreserveLast(&orig_err);
+
+ if (data->filterAttached)
+ ignore_value(qemuMonitorBlockdevDel(mon,
data->filterNodeName));
... here ...
+
+ virErrorRestore(&orig_err);
+}
+
+
+void
+qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data)
+{
+ size_t i;
+
+ if (!data)
+ return;
+
+ for (i = 0; i < data->nfilterdata; i++)
+ qemuBlockThrottleFilterAttachDataFree(data->filterdata[i]);
+
+ g_free(data->filterdata);
+ g_free(data);
+}
+
+
+int
+qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
+ qemuBlockThrottleFiltersData *data)
+{
+ size_t i;
+
+ for (i = 0; i < data->nfilterdata; i++) {
+ if (qemuMonitorBlockdevAdd(mon,
&data->filterdata[i]->filterProps) < 0)
This function requires the monitor to be locked. If you want to have
this as a separate function you must document it and state this fact.
+ return -1;
+ data->filterdata[i]->filterAttached = true;
+ }
+
+ return 0;
+}
+
+
+void
+qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
+ qemuBlockThrottleFiltersData *data)
+{
+ size_t i;
+
+ for (i = data->nfilterdata; i > 0; i--)
+ qemuBlockThrottleFilterAttachRollback(mon,
data->filterdata[i-1]);
+}
+
+
int
qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
virDomainObj *vm,
diff --git a/src/qemu/qemu_block.h b/src/qemu/qemu_block.h
index f9e961d85d..9888954ce4 100644
--- a/src/qemu/qemu_block.h
+++ b/src/qemu/qemu_block.h
@@ -214,6 +214,59 @@
qemuBlockStorageSourceCreateDetectSize(GHashTable *blockNamedNodeData,
virStorageSource *src,
virStorageSource *templ);
+void
+qemuBlockThrottleFilterSetNodename(virDomainThrottleFilterDef *filter,
+ char *nodename);
+
+const char *
+qemuBlockThrottleFilterGetNodename(virDomainThrottleFilterDef
*filter);
+
+virJSONValue *
+qemuBlockThrottleFilterGetProps(virDomainThrottleFilterDef *filter,
+ const char *parentNodeName);
This function isn't used outside of qemu_block.c so it doesn't need to
be exported.
+
+typedef struct qemuBlockThrottleFilterAttachData
qemuBlockThrottleFilterAttachData;
+struct qemuBlockThrottleFilterAttachData {
+ virJSONValue *filterProps;
+ const char *filterNodeName;
+ bool filterAttached;
+};
+
+qemuBlockThrottleFilterAttachData *
+qemuBlockThrottleFilterAttachPrepareBlockdev(virDomainThrottleFilterDef
*throttlefilter,
+ const char
*parentNodeName);
+
+void
+qemuBlockThrottleFilterAttachDataFree(qemuBlockThrottleFilterAttachData
*data);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFilterAttachData,
+ qemuBlockThrottleFilterAttachDataFree);
+
+void
+qemuBlockThrottleFilterAttachRollback(qemuMonitor *mon,
+ qemuBlockThrottleFilterAttachData *data);
+
+struct _qemuBlockThrottleFiltersData {
+ qemuBlockThrottleFilterAttachData **filterdata;
+ size_t nfilterdata;
+};
+
+typedef struct _qemuBlockThrottleFiltersData
qemuBlockThrottleFiltersData;
+
+void
+qemuBlockThrottleFiltersDataFree(qemuBlockThrottleFiltersData *data);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(qemuBlockThrottleFiltersData,
+ qemuBlockThrottleFiltersDataFree);
+
+int
+qemuBlockThrottleFiltersAttach(qemuMonitor *mon,
+ qemuBlockThrottleFiltersData *data);
+
+void
+qemuBlockThrottleFiltersDetach(qemuMonitor *mon,
+ qemuBlockThrottleFiltersData *data);
+
int
qemuBlockRemoveImageMetadata(virQEMUDriver *driver,
virDomainObj *vm,
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 2d0eddc79e..5ccae956d3 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -1582,6 +1582,13 @@ qemuDiskConfigBlkdeviotuneEnabled(const
virDomainDiskDef *disk)
}
+bool
+qemuDiskConfigThrottleFiltersEnabled(const virDomainDiskDef *disk)
This function doesn't need to be exported.
+{
+ return disk->nthrottlefilters > 0;
+}
+
+
/**
* qemuDiskBusIsSD:
* @bus: disk bus
@@ -11055,6 +11062,83 @@
qemuBuildStorageSourceChainAttachPrepareBlockdevOne(qemuBlockStorageSourceChainD
}
+/**
+ * qemuBuildThrottleFiltersAttachPrepareBlockdevOne:
+ * @data: filter chain data, which consists of array of filters and
size of such array
+ * @throttlefilter: new filter to be added into filter array
+ * @parentNodeName: parent nodename for this new throttlefilter,
which is used to build "blockdev-add" QMP request
the stuff after the comma can be dropped.
+ *
+ * Build filter node chain to provide more flexibility for block
disk I/O limits
+ */
+static int
+qemuBuildThrottleFiltersAttachPrepareBlockdevOne(qemuBlockThrottleFiltersData
*data,
+ virDomainThrottleFilterDef *throttlefilter,
+ const char
*parentNodeName)
+{
+ g_autoptr(qemuBlockThrottleFilterAttachData) elem = NULL;
+
+ if (!(elem =
qemuBlockThrottleFilterAttachPrepareBlockdev(throttlefilter,
parentNodeName)))
+ return -1;
+
+ VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
+ return 0;
+}
+
+
+/**
+ * qemuBuildThrottleFiltersAttachPrepareBlockdev:
+ * @disk: domain disk
+ *
+ * Build filter node chain to provide more flexibility for block
disk I/O limits
+ */
+qemuBlockThrottleFiltersData *
+qemuBuildThrottleFiltersAttachPrepareBlockdev(virDomainDiskDef *disk)
+{
+ g_autoptr(qemuBlockThrottleFiltersData) data = NULL;
+ size_t i;
+ const char * parentNodeName = NULL;
+ g_autofree char *tmp_nodename = NULL;
+
+ data = g_new0(qemuBlockThrottleFiltersData, 1);
+ /* get starting parentNodename, e.g. libvirt-1-format */
+ parentNodeName =
qemuBlockStorageSourceGetEffectiveNodename(disk->src);
+ /* build filterdata, which contains all filters info and
sequence info through parentNodeName */
+ for (i = 0; i < disk->nthrottlefilters; i++) {
+ tmp_nodename = g_strdup(parentNodeName);
+ if (qemuBuildThrottleFiltersAttachPrepareBlockdevOne(data,
disk->throttlefilters[i], tmp_nodename) < 0)
+ return NULL;
The nodename copied into 'tmp_nodename' is leaked on every iteration.
Also qemuBuildThrottleFiltersAttachPrepareBlockdevOne doesn't modify it
so there's no point in copying it.
+ parentNodeName = disk->throttlefilters[i]->nodename;
+ }
+
+ return g_steal_pointer(&data);
+}
+
+
+/**
+ * qemuBuildThrottleFiltersDetachPrepareBlockdev:
+ * @disk: domain disk
+ *
+ * Build filters data for later "blockdev-del"
+ */
+qemuBlockThrottleFiltersData *
+qemuBuildThrottleFiltersDetachPrepareBlockdev(virDomainDiskDef *disk)
+{
+ g_autoptr(qemuBlockThrottleFiltersData) data =
g_new0(qemuBlockThrottleFiltersData, 1);
+ size_t i;
+
+ /* build filterdata, which contains filters info and sequence
info */
+ for (i = 0; i < disk->nthrottlefilters; i++) {
+ g_autoptr(qemuBlockThrottleFilterAttachData) elem =
g_new0(qemuBlockThrottleFilterAttachData, 1);
+ /* ignore other fields since the following info are enough
for "blockdev-del" */
+ elem->filterNodeName =
qemuBlockThrottleFilterGetNodename(disk->throttlefilters[i]);
So I didn't yet see any code that serializes any of this in the status
XML, thus it seems that this will not work after you restart
libvirtd/virtqemud if a VM with filters is running, and then try to
detach disks. You'll need to add that, or base the filter nodename on
something else.
Note that presence of the node name is authoritative and we must not try
to regenerate it. That would hinder changing the node names in the
future.
See how the copyOnRead layer node name is stored.
+ elem->filterAttached = true;
+
+ VIR_APPEND_ELEMENT(data->filterdata, data->nfilterdata, elem);
+ }
+ return g_steal_pointer(&data);
+}
+
+
/**
* qemuBuildStorageSourceChainAttachPrepareBlockdev:
* @top: storage source chain
[...]
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 7ba2ea4a5e..2831036e23 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -7989,7 +7989,8 @@ qemuDomainDetermineDiskChain(virQEMUDriver
*driver,
* @disk: disk definition object
*
* Returns the pointer to the node-name of the topmost layer used
by @disk as
- * backend. Currently returns the nodename of the copy-on-read
filter if enabled
+ * backend. Currently returns the nodename of top throttle filter
if enabled
+ * or the nodename of the copy-on-read filter if enabled
* or the nodename of the top image's format driver. Empty disks
return NULL.
* This must be used only with disks instantiated via -blockdev
(thus not
* for SD cards).
@@ -8005,6 +8006,10 @@ qemuDomainDiskGetTopNodename(virDomainDiskDef
*disk)
if (disk->copy_on_read == VIR_TRISTATE_SWITCH_ON)
return priv->nodeCopyOnRead;
+ /* If disk has throttles, take top throttle node name */
+ if (disk->nthrottlefilters > 0)
+ return
disk->throttlefilters[disk->nthrottlefilters-1]->nodename;
return disk->throttlefilters[disk->nthrottlefilters -
1]->nodename;
(extra spaces around subtraction operator)
return qemuBlockStorageSourceGetEffectiveNodename(disk->src);
}
@@ -11336,6 +11341,32 @@
qemuDomainPrepareStorageSourceBlockdevNodename(virDomainDiskDef *disk,
}
+int
+qemuDomainPrepareThrottleFilterBlockdevNodename(virDomainThrottleFilterDef
*filter,
This function is used only in this file so it doesn't need to be
exported. Also it's used only in one place so it can be inlined as it
makes the code harder to follow than necessary.
+ const char *nodenameprefix)
+{
+ char *nodename = g_strdup_printf("%s-filter", nodenameprefix);
+
+ qemuBlockThrottleFilterSetNodename(filter, nodename);
+
+ return 0;
+}
+
+
+int
+qemuDomainPrepareThrottleFilterBlockdev(virDomainThrottleFilterDef
*filter,
+ qemuDomainObjPrivate *priv)
This function is used only in this module thus it doesn't need to be
exported. Also it can't fail so there's no point in having a return
value.
+{
+ g_autofree char *nodenameprefix = NULL;
+
+ filter->id = qemuDomainStorageIDNew(priv);
+
+ nodenameprefix = g_strdup_printf("libvirt-%u", filter->id);
nodename = g_strdup_printf("libvirt-%u-filter" ...
Instead of the convoluted mess and the below call.
+
+ return qemuDomainPrepareThrottleFilterBlockdevNodename(filter,
nodenameprefix);
[...]
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index f7e435d6d5..60989ae693 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -14828,6 +14828,12 @@
qemuDomainDiskBlockIoTuneIsSupported(virDomainDiskDef *disk)
return false;
}
+ if (disk->throttlefilters) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("block 'iotune' can't be used together
with 'throttlefilters' for disk '%1$s'"), disk->dst);
+ return false;
+ }
Preferrably add all this infrastructure which doesn't deal with the
setup of qemu in a separate patch.
This patch is overly complex as it intermixes all of these validation
bits with the actual qemu interaction.
+
return true;
}
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 4a3f4f657e..103b9e9f00 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -657,6 +657,7 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
virDomainAsyncJob asyncJob)
{
g_autoptr(qemuBlockStorageSourceChainData) data = NULL;
+ g_autoptr(qemuBlockThrottleFiltersData) filterData = NULL;
qemuDomainObjPrivate *priv = vm->privateData;
g_autoptr(virJSONValue) devprops = NULL;
bool extensionDeviceAttached = false;
@@ -695,6 +696,19 @@ qemuDomainAttachDiskGeneric(virDomainObj *vm,
if (rc < 0)
goto rollback;
+ /* Setup throttling filters
+ * add additional "blockdev-add"(throttle filter) between
"blockdev-add" (qemuBlockStorageSourceChainAttach) and "device_add"
(qemuDomainAttachExtensionDevice)
Drop above line.
+ */
+ if ((filterData =
qemuBuildThrottleFiltersAttachPrepareBlockdev(disk))) {
+ if (qemuDomainObjEnterMonitorAsync(vm, asyncJob) < 0)
+ return -1;
+ /* QMP requests("blockdev-add" with
"driver":"throttle") to QEMU */
+ rc = qemuBlockThrottleFiltersAttach(priv->mon,
filterData);
+ qemuDomainObjExitMonitor(vm);
+ if (rc < 0)
+ goto rollback;
+ }
The ordering is wrong. In the prepare step you are ordering the node
name dependencies such as
device -> copyOnRead Layer -> throttle -> image chain
OBviously to ensure hierarchy at hotplug they need to be plugged from
the end.
This block here is placed _AFTER_ copyOnRead layer instantiation, so
that will not work with disks with copyOnRead.