[PATCH 3/3] NUMA: realize NUMA memory pinning

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



According to the user-provided assignment bind the respective part
of the guest's memory to the given host node. This uses Linux'
libnuma interface to realize the pinning right after the allocation.
Failures are not fatal, but produce a warning.

Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
---
 hw/pc.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 1f61609..b6d4d7a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -41,6 +41,11 @@
 #include "device-assignment.h"
 #include "kvm.h"
 
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#include <numaif.h>
+#endif
+
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
 
@@ -874,6 +879,49 @@ void pc_cpus_init(const char *cpu_model)
     }
 }
 
+static void bind_numa(ram_addr_t ram_addr, ram_addr_t border_4g,
+                      int below_4g)
+{
+#ifdef CONFIG_NUMA
+    int i, skip;
+    char* ram_ptr;
+    nodemask_t nodemask;
+    ram_addr_t len, ram_offset;
+
+    ram_ptr = qemu_get_ram_ptr(ram_addr);
+
+    ram_offset = 0;
+    skip = !below_4g;
+    for (i = 0; i < nb_numa_nodes; i++) {
+        len = node_mem[i];
+        if (ram_offset <= border_4g && ram_offset + len > border_4g) {
+            len = border_4g - ram_offset;
+   	        if (skip) {
+                ram_offset = 0;
+                len = node_mem[i] - len;
+                skip = 0;
+            }
+        }
+        if (skip && ram_offset + len <= border_4g) {
+            ram_offset += len;
+            continue;
+        }
+        if (!skip && node_pin[i] >= 0) {
+            nodemask_zero(&nodemask);
+            nodemask_set_compat(&nodemask, node_pin[i]);
+           	if (mbind(ram_ptr + ram_offset, len, MPOL_BIND,
+           	    nodemask.n, NUMA_NUM_NODES, 0)) {
+           	        perror("mbind");
+            }
+        }
+        ram_offset += len;
+        if (below_4g && ram_offset >= border_4g)
+            return;
+    }
+#endif
+    return;
+}
+
 void pc_memory_init(ram_addr_t ram_size,
                     const char *kernel_filename,
                     const char *kernel_cmdline,
@@ -906,6 +954,8 @@ void pc_memory_init(ram_addr_t ram_size,
                  below_4g_mem_size - 0x100000,
                  ram_addr + 0x100000);
 
+    bind_numa(ram_addr, below_4g_mem_size, 1);
+
     /* above 4giga memory allocation */
     if (above_4g_mem_size > 0) {
 #if TARGET_PHYS_ADDR_BITS == 32
@@ -915,6 +965,7 @@ void pc_memory_init(ram_addr_t ram_size,
         cpu_register_physical_memory(0x100000000ULL,
                                      above_4g_mem_size,
                                      ram_addr);
+        bind_numa(ram_addr, below_4g_mem_size, 0);
 #endif
     }
 
-- 
1.6.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux