[PATCH 2/2] qemu: Enable NUMA node tag in pci-root for PPC64

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch addresses the same aspects on PPC the bug 1103314 addressed
on x86.

PCI expander bus creates multiple primary PCI busses, where each of these
busses can be assigned a specific NUMA affinity, which, on x86 is
advertised through ACPI on a per-bus basis.

For SPAPR, a PHB's NUMA affinities are assigned on a per-PHB basis, and
there is no mechanism for advertising NUMA affinities to a guest on a
per-bus basis. So, even if qemu-ppc manages to get some sort of multi-bus
topology working using PXB, there is no way to expose the affinities
of these busses to the guest. It can only be exposed on a per-PHB/per-domain
basis.

So, enable NUMA node tag in pci-root controller on PPC.

Signed-off-by: Shivaprasad G Bhat <sbhat@xxxxxxxxxxxxxxxxxx>
---
 docs/formatdomain.html.in                          |    5 ++
 src/qemu/qemu_command.c                            |   25 +++++++++++-
 src/qemu/qemu_domain.c                             |   15 ++++---
 ...emuxml2argv-spapr-pci-hos-bridge-numa-node.args |   26 ++++++++++++
 ...qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml |   43 ++++++++++++++++++++
 tests/qemuxml2argvtest.c                           |    2 +
 6 files changed, 109 insertions(+), 7 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
 create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 11b3330..ea45146 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -3492,6 +3492,11 @@
         part of the specified NUMA node (it is up to the user of the
         libvirt API to attach host devices to the correct
         pci-expander-bus when assigning them to the domain).
+        On PPC64, the PCI devices can be specified to be part of a NUMA
+        node using only the pci-root controller with an optional
+        <code>&lt;node&gt;</code> subelement within the
+        <code>&lt;target&gt;</code> subelement. All the PCI devices of
+        the guest will be part of the specified NUMA node.
       </dd>
     </dl>
     <p>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 9adf0fe..ec794f0 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3055,6 +3055,25 @@ qemuBuildControllerDevStr(const virDomainDef *domainDef,
     return NULL;
 }
 
+static int qemuBuildSPAPRGlobalPCIRootNodeCommandLine(virCommandPtr cmd,
+                                                      virDomainControllerDefPtr def,
+                                                      virQEMUCapsPtr qemuCaps)
+{
+    if (def->opts.pciopts.numaNode != -1) {
+        if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("the numa_node option with spapr-pci-host-bridge controller "
+                             "is not supported in this QEMU binary"));
+            return -1;
+        }
+        virCommandAddArg(cmd, "-global");
+        virCommandAddArgFormat(cmd, "spapr-pci-host-bridge.numa_node=%d",
+                               def->opts.pciopts.numaNode);
+    }
+
+    return 0;
+}
+
 
 static int
 qemuBuildControllerDevCommandLine(virCommandPtr cmd,
@@ -3107,8 +3126,12 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
             /* skip pci-root/pcie-root */
             if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI &&
                 (cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT ||
-                 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT))
+                 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT)) {
+                if (ARCH_IS_PPC64(def->os.arch))
+                    if (qemuBuildSPAPRGlobalPCIRootNodeCommandLine(cmd, cont, qemuCaps) < 0)
+                        return -1;
                 continue;
+            }
 
             /* first SATA controller on Q35 machines is implicit */
             if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SATA &&
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 8cba755..b5f89a6 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3058,12 +3058,14 @@ qemuDomainDeviceDefPostParse(virDomainDeviceDefPtr dev,
             /* if a PCI expander bus has a NUMA node set, make sure
              * that NUMA node is configured in the guest <cpu><numa>
              * array. NUMA cell id's in this array are numbered
-             * from 0 .. size-1.
+             * from 0 .. size-1. Or On PPC, if the pci/pcie-root has the
+             * NUMA node set, do the same.
              */
-            if ((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
-                 cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) &&
-                (int) virDomainNumaGetNodeCount(def->numa)
-                <= cont->opts.pciopts.numaNode) {
+            if (((cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_EXPANDER_BUS ||
+                  cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCIE_EXPANDER_BUS) ||
+                 (qemuDomainMachineIsPSeries(def) &&
+                  cont->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT)) &&
+                (int) virDomainNumaGetNodeCount(def->numa) <= cont->opts.pciopts.numaNode) {
                 virReportError(VIR_ERR_XML_ERROR,
                                _("%s with index %d is "
                                  "configured for a NUMA node (%d) "
@@ -3814,7 +3816,8 @@ qemuDomainDefFormatBuf(virQEMUDriverPtr driver,
         }
 
         if (pci && pci->idx == 0 &&
-            pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) {
+            pci->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT &&
+            pci->opts.pciopts.numaNode == -1) {
             VIR_DEBUG("Removing default pci-root from domain '%s'"
                       " for migration compatibility", def->name);
             toremove++;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
new file mode 100644
index 0000000..7b70cb6
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.args
@@ -0,0 +1,26 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-ppc64 \
+-name QEMUGuest1 \
+-S \
+-M pseries \
+-m 2048 \
+-smp 8,sockets=3,cores=1,threads=8 \
+-numa node,nodeid=0,cpus=0-3,mem=1024 \
+-numa node,nodeid=1,cpus=4-7,mem=1024 \
+-uuid 87eedafe-eedc-4336-8130-ed9fe5dc90c8 \
+-nographic \
+-nodefaults \
+-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \
+-no-acpi \
+-boot c \
+-global spapr-pci-host-bridge.numa_node=1 \
+-device spapr-vscsi,id=scsi0,reg=0x2000 \
+-usb \
+-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-scsi0-0-0-0 \
+-device scsi-disk,bus=scsi0.0,channel=0,scsi-id=0,lun=0,\
+drive=drive-scsi0-0-0-0,id=scsi0-0-0-0
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
new file mode 100644
index 0000000..4dcd68b
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-spapr-pci-hos-bridge-numa-node.xml
@@ -0,0 +1,43 @@
+<domain type='qemu'>
+  <name>QEMUGuest1</name>
+  <uuid>87eedafe-eedc-4336-8130-ed9fe5dc90c8</uuid>
+  <memory unit='KiB'>2097152</memory>
+  <currentMemory unit='MiB'>2048</currentMemory>
+  <vcpu placement='static'>8</vcpu>
+  <numatune>
+    <memory mode='strict' nodeset='1'/>
+  </numatune>
+  <cpu>
+    <topology sockets='3' cores='1' threads='8'/>
+    <numa>
+      <cell id='0' cpus='0-3' memory='1048576' unit='KiB'/>
+      <cell id='1' cpus='4-7' memory='1048576' unit='KiB'/>
+    </numa>
+  </cpu>
+  <os>
+    <type arch='ppc64' machine='pseries'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <emulator>/usr/bin/qemu-system-ppc64</emulator>
+    <disk type='block' device='disk'>
+      <driver name='qemu' type='raw'/>
+      <source dev='/dev/HostVG/QEMUGuest1'/>
+      <target dev='hda' bus='scsi'/>
+      <address type='drive' controller='0' bus='0' target='0' unit='0'/>
+    </disk>
+    <controller type='usb' index='0'/>
+    <controller type='scsi' index='0'/>
+    <controller type='pci' index='0' model='pci-root'>
+      <target>
+        <node>1</node>
+      </target>
+    </controller>
+    <memballoon model='none'/>
+    <panic model='pseries'/>
+  </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index d025930..8a5b96a 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -2219,6 +2219,8 @@ mymain(void)
             QEMU_CAPS_DEVICE_DMI_TO_PCI_BRIDGE, QEMU_CAPS_MACHINE_IOMMU);
 
     DO_TEST("cpu-hotplug-startup", QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS);
+    DO_TEST("spapr-pci-hos-bridge-numa-node", QEMU_CAPS_NUMA,
+            QEMU_CAPS_SPAPR_PCI_HOST_BRIDGE_NUMA_NODE);
 
     qemuTestDriverFree(&driver);
 

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list



[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]