This patch addresses the same aspects on PPC the bug 1103314 addressed on x86. PCI expander bus creates multiple primary PCI busses, where each of these busses can be assigned a specific NUMA affinity, which, on x86 is advertised through ACPI on a per-bus basis. For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and there is no mechanism for advertising NUMA affinities to a guest on a per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus topology working using PXB, there is no way to expose the affinities of these busses to the guest. It can only be exposed on a per-PHB/per-domain basis. So, enable NUMA node tag in pci-root controller on PPC. Signed-off-by: Shivaprasad G Bhat <sbhat@xxxxxxxxxxxxxxxxxx> --- docs/formatdomain.html.in | 5 ++ src/qemu/qemu_command.c | 25 +++++++++++- src/qemu/qemu_domain.c | 15 ++++--- ...emuxml2argv-spapr-pci-hos-bridge-numa-node.args | 26 ++++++++++++ ...qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml | 43 ++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + 6 files changed, 109 insertions(+), 7 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 11b3330..ea45146 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3492,6 +3492,11 @@ part of the specified NUMA node (it is up to the user of the libvirt API to attach host devices to the correct pci-expander-bus when assigning them to the domain). + On PPC64, the PCI devices can be specified to be part of a NUMA + node using only the pci-root controller with an optional + <code><node></code> subelement within the + <code><target></code> subelement. All the PCI devices of + the guest will be part of the specified NUMA node. </dd> </dl> <p> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 9adf0fe..ec794f0 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3055,6 +3055,25 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef, return NULL; } +static int qemuBuildSPAPRGlobalPCIRootNodeCommandLine(virCommandPtr cmd, + virDomainControllerDefPtr def, + virQEMUCapsPtr qemuCaps) +{ + if (def->opts.pciopts.numaNode != -1) { + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("the numa_node option with spapr-pci-host-bridge controller " + "is not supported in this QEMU binary")); + return -1; + } + virCommandAddArg(cmd, "-global"); + virCommandAddArgFormat(cmd, "spapr-pci-host-bridge.numa_node=%d", + def->opts.pciopts.numaNode); + } + + return 0; +} + static int qemuBuildControllerDevCommandLine(virCommandPtr cmd, @@ -3107,8 +3126,12 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, /* skip pci-root/pcie-root */ if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI && (cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT || - cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) + cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) { + if (ARCH_IS_PPC64(def->os.arch)) + if (qemuBuildSPAPRGlobalPCIRootNodeCommandLine(cmd, cont, qemuCaps) < 0) + return -1; continue; + } /* first SATA controller on Q35 machines is implicit */ if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SATA && diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 8cba755..b5f89a6 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -3058,12 +3058,14 @@ qemuDomainDeviceDefPostParse(virDomainDeviceDefPtr dev, /* if a PCI expander bus has a NUMA node set, make sure * that NUMA node is configured in the guest <cpu><numa> * array. NUMA cell id's in this array are numbered - * from 0 .. size-1. + * from 0 .. size-1. Or On PPC, if the pci/pcie-root has the + * NUMA node set, do the same. */ - if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS || - cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) && - (int) virDomainNumaGetNodeCount(def->numa) - <= cont->opts.pciopts.numaNode) { + if (((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS || + cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) || + (qemuDomainMachineIsPSeries(def) && + cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT)) && + (int) virDomainNumaGetNodeCount(def->numa) <= cont->opts.pciopts.numaNode) { virReportError(VIR_ERR_XML_ERROR, _("%s with index %d is " "configured for a NUMA node (%d) " @@ -3814,7 +3816,8 @@ qemuDomainDefFormatBuf(virQEMUDriverPtr driver, } if (pci && pci->idx == 0 && - pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) { + pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT && + pci->opts.pciopts.numaNode == -1) { VIR_DEBUG("Removing default pci-root from domain '%s'" " for migration compatibility", def->name); toremove++; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args new file mode 100644 index 0000000..7b70cb6 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-ppc64 \ +-name QEMUGuest1 \ +-S \ +-M pseries \ +-m 2048 \ +-smp 8,sockets=3,cores=1,threads=8 \ +-numa node,nodeid=0,cpus=0-3,mem=1024 \ +-numa node,nodeid=1,cpus=4-7,mem=1024 \ +-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-global spapr-pci-host-bridge.numa_node=1 \ +-device spapr-vscsi,id=scsi0,reg=0x2000 \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-scsi0-0-0-0 \ +-device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,\ +drive=drive-scsi0-0-0-0,id=scsi0-0-0-0 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml new file mode 100644 index 0000000..4dcd68b --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml @@ -0,0 +1,43 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid> + <memory unit='KiB'>2097152</memory> + <currentMemory unit='MiB'>2048</currentMemory> + <vcpu placement='static'>8</vcpu> + <numatune> + <memory mode='strict' nodeset='1'/> + </numatune> + <cpu> + <topology sockets='3' cores='1' threads='8'/> + <numa> + <cell id='0' cpus='0-3' memory='1048576' unit='KiB'/> + <cell id='1' cpus='4-7' memory='1048576' unit='KiB'/> + </numa> + </cpu> + <os> + <type arch='ppc64' machine='pseries'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-ppc64</emulator> + <disk type='block' device='disk'> + <driver name='qemu' type='raw'/> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='scsi'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='scsi' index='0'/> + <controller type='pci' index='0' model='pci-root'> + <target> + <node>1</node> + </target> + </controller> + <memballoon model='none'/> + <panic model='pseries'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index d025930..8a5b96a 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2219,6 +2219,8 @@ mymain(void) QEMU_CAPS_DEVICE_DMI_TO_PCI_BRIDGE, QEMU_CAPS_MACHINE_IOMMU); DO_TEST("cpu-hotplug-startup", QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS); + DO_TEST("spapr-pci-hos-bridge-numa-node", QEMU_CAPS_NUMA, + QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE); qemuTestDriverFree(&driver); -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list