qemu 2.3 added a new QMP command block-set-write-threshold, which allows callers to get an interrupt when a file hits a write threshold, rather than the current approach of repeatedly polling for file allocation. This patch prepares the API for callers to register to receive the event, as well as a way to query the threshold via virDomainListGetStats(). The event is one-shot in qemu - a guest must re-register a new threshold after each time it triggers. However, the virConnectDomainEventRegisterAny() call does not allow parameterization, so callers must use a pair of APIs - one to register the callback (one-time call) that will be used each time a threshold triggers for any guest disk, and another to repeatedly set the desired threshold (must be called each time a threshold should be changed). Note that the threshold can either be registered by a byte offset, or by a parts-per-million proportion (a value between 0 and 1000000, scaled to the disk size). But the value is always reported as a byte offset, even when registered as a proportion. I also considered having the setup parameter be a double, to allow a finer resolution rather than fixed-point proportion; but that much resolution is probably not necessary (for a 100G disk, the resulting 100k granularity is pretty much in the noise). To make the patch series more digestible, this patch intentionally omits remote support, by using a couple of placeholders at a point where the compiler forces the addition of a case label within a switch statement. * include/libvirt/libvirt-domain.h (virDomainBlockSetWriteThreshold): New API. (virConnectDomainEventWriteThresholdCallback): New event. * src/libvirt_public.syms (LIBVIRT_1.3.0): Export it. * src/libvirt-domain.c (virDomainBlockSetWriteThreshold): New API. (virConnectGetAllDomainStats): New stat. * src/driver-hypervisor.h (virDrvDomainBlockSetWriteThreshold): New hypervisor entry point. * tools/virsh-domain.c (vshEventWriteThresholdPrint): Print new event. * tools/virsh.pod (domstats): Document new stat. * daemon/remote.c (domainEventCallbacks): Add stub. * src/conf/domain_event.c (virDomainEventDispatchDefaultFunc): Likewise. Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> --- daemon/remote.c | 2 + include/libvirt/libvirt-domain.h | 53 ++++++++++++++++++++++ src/conf/domain_event.c | 4 +- src/driver-hypervisor.h | 7 +++ src/libvirt-domain.c | 98 ++++++++++++++++++++++++++++++++++++++++ src/libvirt_public.syms | 1 + tools/virsh-domain.c | 24 ++++++++++ tools/virsh.pod | 1 + 8 files changed, 189 insertions(+), 1 deletion(-) diff --git a/daemon/remote.c b/daemon/remote.c index e9e2dca..283ece2 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -1102,6 +1102,8 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventTunable), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventAgentLifecycle), VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceAdded), + /* TODO: Implement RPC support for this */ + VIR_DOMAIN_EVENT_CALLBACK(NULL), }; verify(ARRAY_CARDINALITY(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST); diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 7564c20..ca2f929 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -1306,6 +1306,18 @@ int virDomainBlockStatsFlags (virDomainPtr dom, virTypedParameterPtr params, int *nparams, unsigned int flags); + +typedef enum { + /* threshold is a parts-per-million proportion of the image size + * rather than byte limit */ + VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PROPORTION = (1 << 0), +} virDomainBlockSetWriteThresholdFlags; + +int virDomainBlockSetWriteThreshold(virDomainPtr dom, + const char *disk, + unsigned long long threshold, + unsigned int flags); + int virDomainInterfaceStats (virDomainPtr dom, const char *path, virDomainInterfaceStatsPtr stats, @@ -3255,6 +3267,46 @@ typedef void (*virConnectDomainEventDeviceAddedCallback)(virConnectPtr conn, void *opaque); /** + * virConnectDomainEventWriteThresholdCallback: + * @conn: connection object + * @dom: domain on which the event occurred + * @devAlias: device alias + * @path: a local path name of the host resource, or NULL if not available + * @threshold: threshold that was exceeded, in bytes + * @length: length beyond @threshold that was involved in the triggering + * write, or 0 if not known + * @opaque: application specified data + * + * The callback signature to use when registering for an event of type + * VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD with virConnectDomainEventRegisterAny() + * + * This callback occurs when a block device detects a write event that + * exceeds a non-zero threshold set by + * virDomainBlockSetWriteThreshold(). When this event occurs, the + * threshold is reset to 0, and a new limit must be installed to see + * the event again on the same device. The intent of this event is to + * allow time for the underlying storage to be resized dynamically + * prior to the point where the guest would be paused due to running + * out of space, without having to poll for allocation values. + * + * The contents of @devAlias will be "vda" when the threshold is + * triggered on the active layer of guest disk vda. Some hypervisors + * also support threshold reporting on backing images, such as during + * a block commit; when that happens, @devAlias will be "vda[1]" for + * the backingStore at index 1 within the chain of host resources for + * guest disk vda. For convenience, if the host resource has a local + * file name, that will be listed in @path (note that @path will be + * NULL for network resources). + */ +typedef void (*virConnectDomainEventWriteThresholdCallback)(virConnectPtr conn, + virDomainPtr dom, + const char *devAlias, + const char *path, + unsigned long long threshold, + unsigned long long length, + void *opaque); + +/** * VIR_DOMAIN_TUNABLE_CPU_VCPUPIN: * * Macro represents formatted pinning for one vcpu specified by id which is @@ -3537,6 +3589,7 @@ typedef enum { VIR_DOMAIN_EVENT_ID_TUNABLE = 17, /* virConnectDomainEventTunableCallback */ VIR_DOMAIN_EVENT_ID_AGENT_LIFECYCLE = 18,/* virConnectDomainEventAgentLifecycleCallback */ VIR_DOMAIN_EVENT_ID_DEVICE_ADDED = 19, /* virConnectDomainEventDeviceAddedCallback */ + VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD = 20, /* virConnectDomainEventWriteThreshold */ # ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_EVENT_ID_LAST diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c index 20d66e1..c43799f 100644 --- a/src/conf/domain_event.c +++ b/src/conf/domain_event.c @@ -1,7 +1,7 @@ /* * domain_event.c: domain event queue processing helpers * - * Copyright (C) 2010-2014 Red Hat, Inc. + * Copyright (C) 2010-2015 Red Hat, Inc. * Copyright (C) 2008 VirtualIron * Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany. * @@ -1614,6 +1614,8 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn, goto cleanup; } + case VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD: + /* TODO: Implement RPC support for this */ case VIR_DOMAIN_EVENT_ID_LAST: break; } diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h index 3275343..b5b51f1 100644 --- a/src/driver-hypervisor.h +++ b/src/driver-hypervisor.h @@ -484,6 +484,12 @@ typedef int unsigned int flags); typedef int +(*virDrvDomainBlockSetWriteThreshold)(virDomainPtr domain, + const char *disk, + unsigned long long threshold, + unsigned int flags); + +typedef int (*virDrvDomainInterfaceStats)(virDomainPtr domain, const char *path, virDomainInterfaceStatsPtr stats); @@ -1324,6 +1330,7 @@ struct _virHypervisorDriver { virDrvDomainBlockResize domainBlockResize; virDrvDomainBlockStats domainBlockStats; virDrvDomainBlockStatsFlags domainBlockStatsFlags; + virDrvDomainBlockSetWriteThreshold domainBlockSetWriteThreshold; virDrvDomainInterfaceStats domainInterfaceStats; virDrvDomainSetInterfaceParameters domainSetInterfaceParameters; virDrvDomainGetInterfaceParameters domainGetInterfaceParameters; diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index 4d7b88a..4ac9325 100644 --- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -5743,6 +5743,102 @@ virDomainBlockStatsFlags(virDomainPtr dom, /** + * virDomainBlockSetWriteThreshold: + * @dom: pointer to domain object + * @disk: path to the block device, or device shorthand + * @threshold: limit at which a write threshold event can trigger + * @flags: bitwise-OR of virDomainBlockSetWriteThresholdFlags + * + * This function is used to set a one-shot write threshold. It should + * be used in tandem with virConnectDomainEventRegisterAny() + * installing a handler for VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD. If + * the hypervisor detects that a write request (whether guest data, or + * host metadata) would exceed the host byte offset specified in + * @threshold, then an event is raised, and the threshold is reset to + * 0 at that time. The event registration is only needed once, but + * this function must be called each time a new threshold is desired; + * the event will only fire if a non-zero threshold is + * exceeded. + * + * By default, @threshold is specified in bytes, and must not exceed + * the size of the block device. However, when @flags includes + * VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PROPORTION, @threshold is + * instead a value between 0 an 1,000,000, as a parts-per-million + * proportion to the current size of the disk, and the driver will + * compute the corresponding byte value. For example, 500000 + * represents a threshold when half the disk has been allocated. A + * driver may round the requested threshold to a granularity that can + * actually be supported. + * + * Setting a threshold allows event-based resizing of host resources + * that back a guest disk without having to poll the current disk + * allocation, while still having enough time to complete the resize + * before the guest would end up halting due to insufficient space. + * Calling this function to set the threshold back to zero will stop + * further firing of the event. virConnectGetAllDomainStats() can be + * used to track the current threshold value, always in the form + * normalized to bytes. + * + * The @disk parameter is either the device target shorthand (the + * <target dev='...'/> sub-element, such as "vda"), or an unambiguous + * source name of the block device (the <source file='...'/> + * sub-element, such as "/path/to/image"). Valid names can be found + * by calling virDomainGetXMLDesc() and inspecting elements within + * //domain/devices/disk. Some drivers might also accept strings such + * as "vda[1]" for setting the threshold of a backing image, useful + * when doing a block commit into the backing image. Hypervisors may + * restrict threshold reporting to certain types of host resources, + * such as a qcow2 format on top of a block device (as allocation + * tracking differs according to the type of host resource). + * + * Domains may have more than one block device. To set thresholds for + * each you should make multiple calls to this function. If write + * thresholds are not supported, an application will have to instead + * poll virDomainGetBlockInfo() or similar to track allocation. + * + * Returns -1 in case of error, 0 in case of success. + */ +int +virDomainBlockSetWriteThreshold(virDomainPtr dom, + const char *disk, + unsigned long long threshold, + unsigned int flags) +{ + virConnectPtr conn; + + VIR_DOMAIN_DEBUG(dom, "disk=%s, threshold=%llu, flags=%x", + disk, threshold, flags); + + virResetLastError(); + + virCheckDomainReturn(dom, -1); + virCheckNonNullArgGoto(disk, error); + if (flags & VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PROPORTION && + threshold > 1000000) { + virReportError(VIR_ERR_INVALID_ARG, + _("threshold in %s is larger than 100%%"), + __FUNCTION__); + goto error; + } + conn = dom->conn; + + if (conn->driver->domainBlockSetWriteThreshold) { + int ret; + ret = conn->driver->domainBlockSetWriteThreshold(dom, disk, threshold, + flags); + if (ret < 0) + goto error; + return ret; + } + virReportUnsupportedError(); + + error: + virDispatchError(dom->conn); + return -1; +} + + +/** * virDomainInterfaceStats: * @dom: pointer to the domain object * @path: path to the interface @@ -11176,6 +11272,8 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * unsigned long long. * "block.<num>.errors" - Xen only: the 'oo_req' value as * unsigned long long. + * "block.<num>.write-threshold" - byte at which a write threshold event + * will fire, as unsigned long long. * "block.<num>.allocation" - offset of the highest written sector * as unsigned long long. * "block.<num>.capacity" - logical size in bytes of the block device backing diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms index 59d8c12..14b2373 100644 --- a/src/libvirt_public.syms +++ b/src/libvirt_public.syms @@ -717,6 +717,7 @@ LIBVIRT_1.2.16 { LIBVIRT_1.3.0 { global: + virDomainBlockSetWriteThreshold; virTypedParamsAddStringList; } LIBVIRT_1.2.16; diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index baf4fa3..141be3a 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -12082,6 +12082,28 @@ vshEventDeviceAddedPrint(virConnectPtr conn ATTRIBUTE_UNUSED, } static void +vshEventWriteThresholdPrint(virConnectPtr conn ATTRIBUTE_UNUSED, + virDomainPtr dom, + const char *alias, + const char *path, + unsigned long long threshold, + unsigned long long length, + void *opaque) +{ + vshDomEventData *data = opaque; + + if (!data->loop && *data->count) + return; + vshPrint(data->ctl, + _("event 'write-threshold' for domain %s disk %s (%s): " + "threshold %llu exceeded by %llu bytes\n"), + virDomainGetName(dom), alias, NULLSTR(path), threshold, length); + (*data->count)++; + if (!data->loop) + vshEventDone(data->ctl); +} + +static void vshEventTunablePrint(virConnectPtr conn ATTRIBUTE_UNUSED, virDomainPtr dom, virTypedParameterPtr params, @@ -12188,6 +12210,8 @@ static vshEventCallback vshEventCallbacks[] = { VIR_DOMAIN_EVENT_CALLBACK(vshEventAgentLifecyclePrint), }, { "device-added", VIR_DOMAIN_EVENT_CALLBACK(vshEventDeviceAddedPrint), }, + { "write-threshold", + VIR_DOMAIN_EVENT_CALLBACK(vshEventWriteThresholdPrint), }, }; verify(VIR_DOMAIN_EVENT_ID_LAST == ARRAY_CARDINALITY(vshEventCallbacks)); diff --git a/tools/virsh.pod b/tools/virsh.pod index bcfa165..600ea42 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -919,6 +919,7 @@ local file or block device, "block.<num>.fl.reqs" - total flush requests, "block.<num>.fl.times" - total time (ns) spent on cache flushing, "block.<num>.errors" - Xen only: the 'oo_req' value, +"block.<num>.write-threshold" - write threshold event trigger, in bytes, "block.<num>.allocation" - offset of highest written sector in bytes, "block.<num>.capacity" - logical size of source file in bytes, "block.<num>.physical" - physical size of source file in bytes -- 2.4.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list