KVM added ability to get the thread ID for vCPUs via the monitor (qemu) info cpus * CPU #0: pc=0x00000000000ffff0 thread_id=11463 CPU #1: pc=0x00000000fffffff0 thread_id=11464 CPU #2: pc=0x00000000fffffff0 thread_id=11465 With this we have enough information to be able to support vCPU pinning in the QEMU driver for KVM. For QEMU/KQEMU it is trivial, since they have a single thread. The following patch implements CPU pinning and fetching of CPU affinity information. In this example I pin one of the 2 cpus in a guest: [berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system start VirtTest Domain VirtTest started [berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpuinfo VirtTest VCPU: 0 CPU: 0 State: running CPU Affinity: yy VCPU: 1 CPU: 0 State: running CPU Affinity: yy [berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpupin VirtTest 1 0 [berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpuinfo VirtTest VCPU: 0 CPU: 0 State: running CPU Affinity: yy VCPU: 1 CPU: 0 State: running CPU Affinity: y- This is implemented using sched_setaffinity/sched_getaffinity which are Linux specific. There doesn't appear to be a portable process affinity API in POSIX. If the KVM instance does not support the 'thread_id' data in 'info cpus', we simply print out a suitable error message. We detect the mapping at startup and cache it thereafter. Dan. diff -r 0f537442ce97 src/qemu_conf.h --- a/src/qemu_conf.h Fri May 16 16:09:57 2008 -0400 +++ b/src/qemu_conf.h Fri May 16 17:39:29 2008 -0400 @@ -328,6 +328,9 @@ int *tapfds; int ntapfds; + int nvcpupids; + int *vcpupids; + int qemuVersion; int qemuCmdFlags; /* values from enum qemud_cmd_flags */ diff -r 0f537442ce97 src/qemu_driver.c --- a/src/qemu_driver.c Fri May 16 16:09:57 2008 -0400 +++ b/src/qemu_driver.c Fri May 16 17:39:29 2008 -0400 @@ -61,6 +61,7 @@ #include "nodeinfo.h" #include "stats_linux.h" #include "capabilities.h" +#include "memory.h" static int qemudShutdown(void); @@ -118,6 +119,10 @@ struct qemud_network *network); static int qemudDomainGetMaxVcpus(virDomainPtr dom); +static int qemudMonitorCommand (const struct qemud_driver *driver, + const struct qemud_vm *vm, + const char *cmd, + char **reply); static struct qemud_driver *qemu_driver = NULL; @@ -608,6 +613,106 @@ return ret; } +static int +qemudDetectVcpuPIDs(virConnectPtr conn, + struct qemud_driver *driver, + struct qemud_vm *vm) { + char *qemucpus = NULL; + char *line; + int lastVcpu = -1; + + /* Only KVM has seperate threads for CPUs, + others just use main QEMU process for CPU */ + if (vm->def->virtType != QEMUD_VIRT_KVM) + vm->nvcpupids = 1; + else + vm->nvcpupids = vm->def->vcpus; + + if (VIR_ALLOC_N(vm->vcpupids, vm->nvcpupids) < 0) { + qemudReportError(conn, NULL, NULL, VIR_ERR_NO_MEMORY, + "%s", _("allocate cpumap")); + return -1; + } + + if (vm->def->virtType != QEMUD_VIRT_KVM) { + vm->vcpupids[0] = vm->pid; + return 0; + } + + if (qemudMonitorCommand(driver, vm, "info cpus", &qemucpus) < 0) { + qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot run monitor command to fetch CPU thread info")); + VIR_FREE(vm->vcpupids); + vm->nvcpupids = 0; + return -1; + } + + /* + * This is the gross format we're about to parse :-{ + * + * (qemu) info cpus + * * CPU #0: pc=0x00000000000f0c4a thread_id=30019 + * CPU #1: pc=0x00000000fffffff0 thread_id=30020 + * CPU #2: pc=0x00000000fffffff0 thread_id=30021 + * + */ + line = qemucpus; + do { + char *offset = strchr(line, '#'); + char *end = NULL; + int vcpu = 0, tid = 0; + + /* See if we're all done */ + if (offset == NULL) + break; + + /* Extract VCPU number */ + if (virStrToLong_i(offset + 1, &end, 10, &vcpu) < 0) + goto error; + if (end == NULL || *end != ':') + goto error; + + /* Extract host Thread ID */ + if ((offset = strstr(line, "thread_id=")) == NULL) + goto error; + if (virStrToLong_i(offset + strlen("thread_id="), &end, 10, &tid) < 0) + goto error; + if (end == NULL || !c_isspace(*end)) + goto error; + + /* Validate the VCPU is in expected range & order */ + if (vcpu > vm->nvcpupids || + vcpu != (lastVcpu + 1)) + goto error; + + lastVcpu = vcpu; + vm->vcpupids[vcpu] = tid; + + /* Skip to next data line */ + line = strchr(offset, '\r'); + if (line == NULL) + line = strchr(offset, '\n'); + } while (line != NULL); + + /* Validate we got data for all VCPUs we expected */ + if (lastVcpu != (vm->def->vcpus - 1)) + goto error; + + free(qemucpus); + return 0; + +error: + VIR_FREE(vm->vcpupids); + vm->vcpupids = 0; + free(qemucpus); + + /* Explicitly return success, not error. Older KVM does + not have vCPU -> Thread mapping info and we don't + want to break its use. This merely disables ability + to pin vCPUS with libvirt */ + return 0; +} + static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { int i; @@ -785,6 +890,11 @@ qemudShutdownVMDaemon(conn, driver, vm); return -1; } + + if (qemudDetectVcpuPIDs(conn, driver, vm) < 0) { + qemudShutdownVMDaemon(conn, driver, vm); + return -1; + } } return ret; @@ -857,6 +967,9 @@ vm->pid = -1; vm->id = -1; vm->state = VIR_DOMAIN_SHUTOFF; + free(vm->vcpupids); + vm->vcpupids = NULL; + vm->nvcpupids = 0; if (vm->newDef) { qemudFreeVMDef(vm->def); @@ -2271,6 +2384,127 @@ vm->def->vcpus = nvcpus; return 0; +} + + +static int +qemudDomainPinVcpu(virDomainPtr dom, + unsigned int vcpu, + unsigned char *cpumap, + int maplen) { + struct qemud_driver *driver = (struct qemud_driver *)dom->conn->privateData; + struct qemud_vm *vm = qemudFindVMByUUID(driver, dom->uuid); + cpu_set_t mask; + int i, maxcpu; + virNodeInfo nodeinfo; + + if (!qemudIsActiveVM(vm)) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG, + "%s",_("cannot pin vcpus on an inactive domain")); + return -1; + } + + if (vcpu > (vm->nvcpupids-1)) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG, + _("vcpu number out of range %d > %d"), + vcpu, vm->nvcpupids); + return -1; + } + + if (virNodeInfoPopulate(dom->conn, &nodeinfo) < 0) + return -1; + + maxcpu = maplen * 8; + if (maxcpu > nodeinfo.cpus) + maxcpu = nodeinfo.cpus; + + CPU_ZERO(&mask); + for (i = 0 ; i < maxcpu ; i++) { + if ((cpumap[i/8] >> (i % 8)) & 1) + CPU_SET(i, &mask); + } + + if (vm->vcpupids != NULL) { + if (sched_setaffinity(vm->vcpupids[vcpu], sizeof(mask), &mask) < 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG, + _("cannot set affinity: %s"), strerror(errno)); + return -1; + } + } else { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + "%s", _("cpu affinity is not supported")); + return -1; + } + + return 0; +} + +static int +qemudDomainGetVcpus(virDomainPtr dom, + virVcpuInfoPtr info, + int maxinfo, + unsigned char *cpumaps, + int maplen) { + struct qemud_driver *driver = (struct qemud_driver *)dom->conn->privateData; + struct qemud_vm *vm = qemudFindVMByUUID(driver, dom->uuid); + virNodeInfo nodeinfo; + int i, v, maxcpu; + + if (!qemudIsActiveVM(vm)) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG, + "%s",_("cannot pin vcpus on an inactive domain")); + return -1; + } + + if (virNodeInfoPopulate(dom->conn, &nodeinfo) < 0) + return -1; + + maxcpu = maplen * 8; + if (maxcpu > nodeinfo.cpus) + maxcpu = nodeinfo.cpus; + + /* Clamp to actual number of vcpus */ + if (maxinfo > vm->nvcpupids) + maxinfo = vm->nvcpupids; + + if (maxinfo < 1) + return 0; + + if (info != NULL) { + memset(info, 0, sizeof(*info) * maxinfo); + for (i = 0 ; i < maxinfo ; i++) { + info[i].number = i; + info[i].state = VIR_VCPU_RUNNING; + /* XXX cpu time, current pCPU mapping */ + } + } + + if (cpumaps != NULL) { + memset(cpumaps, 0, maplen * maxinfo); + if (vm->vcpupids != NULL) { + for (v = 0 ; v < maxinfo ; v++) { + cpu_set_t mask; + unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, v); + CPU_ZERO(&mask); + + if (sched_getaffinity(vm->vcpupids[v], sizeof(mask), &mask) < 0) { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG, + _("cannot get affinity: %s"), strerror(errno)); + return -1; + } + + for (i = 0 ; i < maxcpu ; i++) + if (CPU_ISSET(i, &mask)) + VIR_USE_CPU(cpumap, i); + } + } else { + qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT, + "%s", _("cpu affinity is not available")); + return -1; + } + } + + return maxinfo; } static int qemudDomainGetMaxVcpus(virDomainPtr dom) { @@ -3221,8 +3455,8 @@ qemudDomainRestore, /* domainRestore */ NULL, /* domainCoreDump */ qemudDomainSetVcpus, /* domainSetVcpus */ - NULL, /* domainPinVcpu */ - NULL, /* domainGetVcpus */ + qemudDomainPinVcpu, /* domainPinVcpu */ + qemudDomainGetVcpus, /* domainGetVcpus */ qemudDomainGetMaxVcpus, /* domainGetMaxVcpus */ qemudDomainDumpXML, /* domainDumpXML */ qemudListDefinedDomains, /* listDomains */ -- |: Red Hat, Engineering, Boston -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list