On 2021/7/2 15:45, Peter Krempa wrote: > On Fri, Jul 02, 2021 at 12:53:29 +0800, Yang Fei wrote: >> This patch add the ability to statistic the halt polling time when >> VM execute HLT(arm is WFI). >> >> In actual services, the execution of the HLT instruction by the >> guest is an important cause of virtualization overhead. The halt >> polling feature is introduced to solve this problem. When a guest >> idle VM exit occurs, the host continues polling for a period of >> time to reduce the guest service delay. This mechanism may cause >> the CPU usage to be 100% when the physical CPU is idle. If the >> guest service model is woken up at an interval to process a small >> amount of traffic, and the interval is shorter than kvm halt_poll_ns. >> The host polls the block time of the entire VM and the CPU usage >> increases to 100%. >> >> The kernel provides the capability of collecting statistics on the >> halt polling time after v5.8, Introduced by commit >> <cb953129bfe5c0f2da835a0469930873fb7e71df>. It is rendered in debugfs. >> Therefore, we can use this kernel feature to provide the halt poll >> time to the user to obtain a more accurate CPU usage as required. > > Note that I'm reviewing just the code side, not the justification or > usefullnes of the data reported. > >> >> Signed-off-by: Yang Fei <yangfei85@xxxxxxxxxx> >> --- >> src/libvirt_private.syms | 2 + >> src/qemu/qemu_driver.c | 29 +++++++++++++ >> src/util/virutil.c | 89 ++++++++++++++++++++++++++++++++++++++++ >> src/util/virutil.h | 9 ++++ >> 4 files changed, 129 insertions(+) >> >> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms >> index 68e4b6aab8..f92213b8c2 100644 >> --- a/src/libvirt_private.syms >> +++ b/src/libvirt_private.syms >> @@ -3479,6 +3479,8 @@ virDoesUserExist; >> virDoubleToStr; >> virFormatIntDecimal; >> virFormatIntPretty; >> +virGetCpuHaltPollTime; >> +virGetDebugFsKvmValue; >> virGetDeviceID; >> virGetDeviceUnprivSGIO; >> virGetGroupID; >> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c >> index 235f575901..3a2b530ecf 100644 >> --- a/src/qemu/qemu_driver.c >> +++ b/src/qemu/qemu_driver.c >> @@ -17812,6 +17812,32 @@ qemuDomainGetStatsCpuCache(virQEMUDriver *driver, >> return ret; >> } >> >> +#ifdef __linux__ >> +static int >> +qemuDomainGetStatsCpuHaltPollTime(virDomainObj *dom, >> + virTypedParamList *params) >> +{ >> + unsigned long long haltPollSuccess = 0; >> + unsigned long long haltPollFail = 0; >> + pid_t pid = dom->pid; >> + >> + if (virGetCpuHaltPollTime(pid, &haltPollSuccess, &haltPollFail) != 0) >> + return -1; >> + if (virTypedParamListAddULLong(params, haltPollSuccess, "haltpollsuccess.time") < 0) >> + return -1; >> + if (virTypedParamListAddULLong(params, haltPollFail, "haltpollfail.time") < 0) >> + return -1; >> + >> + return 0; >> +} >> +#else >> +static int >> +qemuDomainGetStatsCpuHaltPollTime(virDomainObj *dom, >> + virTypedParamList *params) >> +{ >> + return -1; > > This breaks domstats on non-linux ... > >> +} >> +#endif >> >> static int >> qemuDomainGetStatsCpuCgroup(virDomainObj *dom, >> @@ -17852,6 +17878,9 @@ qemuDomainGetStatsCpu(virQEMUDriver *driver, >> if (qemuDomainGetStatsCpuCache(driver, dom, params) < 0) >> return -1; >> >> + if (qemuDomainGetStatsCpuHaltPollTime(dom, params) < 0) >> + return -1; > > since you'd return -1 here. > > Additionally the qemuDomainGetStatsCpuHaltPollTime function IMO should > not be conditionally compiled on linux. The helpers below should and the > qemu code should just conditionally fill in the data if it's available. > My consideration is not to compile this feature on non-Linux platforms, since the function of get halt polling time is based on the capabilities of Linux kernel. How about return 0 in non-linux platform? >> + >> return 0; >> } >> >> diff --git a/src/util/virutil.c b/src/util/virutil.c >> index 311cbbf93a..8715deaca5 100644 >> --- a/src/util/virutil.c >> +++ b/src/util/virutil.c >> @@ -1936,3 +1936,92 @@ virPipeNonBlock(int fds[2]) >> { >> return virPipeImpl(fds, true, true); >> } >> + >> +int >> +virGetDebugFsKvmValue(struct dirent *ent, >> + const char *path, >> + const char *filename, >> + unsigned long long *value) > > This helper should be added in a separate patch, to separate it from the > other changes, especially the qemu driver change. > I will remove it to a separate patch in next version. >> +{ >> + g_autofree char *valToStr = NULL; >> + g_autofree char *valPath = NULL; >> + int rc = -1; >> + int ret = -1; >> + >> + valPath = g_strdup_printf("%s/%s/%s", path, ent->d_name, filename); >> + >> + if ((rc = virFileReadAll(valPath, 16, &valToStr)) < 0) { > > Why just 16 bytes? > Cause the data type of halt_poll_success_ns and halt_poll_fail_ns is u64, it only requires 8 bytes of memory. >> + virReportError(VIR_ERR_INTERNAL_ERROR, >> + _("Unable to read from '%s'"), valPath); >> + goto cleanup; >> + } >> + >> + /* Terminated with '\n' has sometimes harmful effects to the caller */ > > To the caller? I don't quite understand what you mean here. > The code under this comment is copied from virCgroupGetValueRaw which can only be used to obtain Cgroup data. I plan extract this function to virutil.c in an independent patch. So that we can call this function directly. >> + if (rc > 0 && (valToStr)[rc - 1] == '\n') >> + (valToStr)[rc - 1] = '\0'; >> + >> + /* 10 is a Cardinality, must be between 2 and 36 inclusive, >> + * or special value 0. Used in fuction strtoull() >> + */ > > What's the point of this comment? > Since 10 here is a magic number, I've given an comment to explain it. >> + if (virStrToLong_ull(valToStr, NULL, 10, value) < 0) { >> + virReportError(VIR_ERR_INTERNAL_ERROR, >> + _("Unable to parse '%s' as an integer"), valToStr); >> + goto cleanup; >> + } >> + >> + ret = 0; >> +cleanup: > > No need for cleanup label since it's not cleaning up anything. > OK, I will remove it in the next version. >> + return ret; >> +} >> + >> +int >> +virGetCpuHaltPollTime(pid_t pid, >> + unsigned long long *haltPollSuccess, >> + unsigned long long *haltPollFail) >> +{ >> + g_autofree char *pidToStr = NULL; >> + g_autofree char *debugFsPath = NULL; >> + g_autofree char *completePath = NULL; >> + struct dirent *ent = NULL; >> + DIR *dir = NULL; >> + int ret = -1; >> + int flag = 0; >> + >> + if (!(debugFsPath = virFileFindMountPoint("debugfs"))) { >> + virReportSystemError(errno, "%s", >> + _("unable to find debugfs mountpoint")); >> + goto cleanup; >> + } >> + >> + completePath = g_strdup_printf("%s/%s", debugFsPath, "kvm"); >> + if (virDirOpen(&dir, completePath) < 0) { >> + virReportError(VIR_ERR_INTERNAL_ERROR, >> + "%s %s", "Can not open directory", completePath); >> + return ret; >> + } >> + >> + pidToStr = g_strdup_printf("%d", pid); > > %d for pid_t is invalid. > Should I use %lld? >> + while (virDirRead(dir, &ent, NULL) > 0) { >> + if (strncmp(ent->d_name, pidToStr, strlen(pidToStr)) == 0 && > > We mandate use of our wrappers, in this case STRPREFIX > >> + ent->d_name[strlen(pidToStr)] == '-') { > > Formatting the pidToStr including the '-' will simplify the code since > you can match the full prefix. > OK, I'll use the STRPREFIX in next version. >> + flag = 1; >> + break; >> + } >> + } >> + >> + if (flag == 0) { >> + virReportError(VIR_ERR_INTERNAL_ERROR, >> + _("Could not find VM(Pid %s) in '%s'"), pidToStr, completePath); > > Reporting errors in optional stats code is not acceptable. The domstats > code is best-effort and should return what it can. > So, the function should not report any errors or return any error number in any case. If something happends, e.g. debugfs isn't mounted, the caller should not add halt_poll_success_ns and halt_poll_fail_ns to domstats. Have I got that right? >> + goto cleanup; >> + } >> + >> + if (virGetDebugFsKvmValue(ent, completePath, "halt_poll_success_ns", haltPollSuccess) < 0 || >> + virGetDebugFsKvmValue(ent, completePath, "halt_poll_fail_ns", haltPollFail) < 0) { >> + goto cleanup; >> + } >> + >> + ret = 0; >> +cleanup: >> + closedir(dir); >> + return ret; >> +} > > . > Thanks, Fei.