To separate the host and the guest NUMA part the host NUMA options can be specified separately from the guest ones. Mimicing numactl's syntax the parser allows to specify the NUMA binding policy for each guest node. It supports membind, interleave and preferred together with negation (!) and CPUSET relative addressing (+). Since the comma is already used by the QEMU command line interpreter, it cannot be used here to enumerate host nodes (but '-' is supported). Example: $ qemu ... -numa node -numa host,nodeid=0,interleave=+0-1 (uses interleaving on the first two nodes belonging to the current CPUSET for the one guest node) $ qemu ... -numa node -numa node -numa host,nodeid=0,membind=3 \ -numa host,nodeid=1,preferred=!2-3 (binding the first guest node to host node 3 and the second guest node to any node expect 2 and 3) Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx> --- sysemu.h | 8 ++++++++ vl.c | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 0 deletions(-) diff --git a/sysemu.h b/sysemu.h index e5f88d1..52fedd4 100644 --- a/sysemu.h +++ b/sysemu.h @@ -140,10 +140,18 @@ extern long hpagesize; #ifndef MAX_NUMA_VCPUS #define MAX_NUMA_VCPUS 256 #endif +#define NODE_HOST_NONE 0x00 +#define NODE_HOST_BIND 0x01 +#define NODE_HOST_INTERLEAVE 0x02 +#define NODE_HOST_PREFERRED 0x03 +#define NODE_HOST_POLICY_MASK 0x03 +#define NODE_HOST_RELATIVE 0x04 extern int nb_numa_nodes; struct numa_info { uint64_t guest_mem; DECLARE_BITMAP(guest_cpu, MAX_NUMA_VCPUS); + DECLARE_BITMAP(host_mem, MAX_NUMA_VCPUS); + unsigned int flags; }; extern struct numa_info numa_info[MAX_NODES]; diff --git a/vl.c b/vl.c index 40fac59..6df9cc9 100644 --- a/vl.c +++ b/vl.c @@ -792,6 +792,29 @@ static void numa_add(const char *optarg) parse_bitmap(option, numa_info[nodenr].guest_cpu, MAX_NUMA_VCPUS); } nb_numa_nodes++; + } else if (!strcmp(option, "host")) { + if (get_param_value(option, 128, "nodeid", optarg) == 0) { + fprintf(stderr, "error: need nodeid for -numa host,...\n"); + exit(1); + } + nodenr = strtoull(option, NULL, 10); + if (nodenr >= nb_numa_nodes) { + fprintf(stderr, "nodeid exceeds specified NUMA nodes\n"); + exit(1); + } + numa_info[nodenr].flags = NODE_HOST_NONE; + option[0] = 0; + if (get_param_value(option, 128, "interleave", optarg) != 0) + numa_info[nodenr].flags |= NODE_HOST_INTERLEAVE; + else if (get_param_value(option, 128, "preferred", optarg) != 0) + numa_info[nodenr].flags |= NODE_HOST_PREFERRED; + else if (get_param_value(option, 128, "membind", optarg) != 0) + numa_info[nodenr].flags |= NODE_HOST_BIND; + if (option[0] != 0) { + if (parse_bitmap(option, numa_info[nodenr].host_mem, + MAX_NUMA_VCPUS) & 1) + numa_info[nodenr].flags |= NODE_HOST_RELATIVE; + } } return; } @@ -1895,6 +1918,8 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { numa_info[i].guest_mem = 0; bitmap_zero(numa_info[i].guest_cpu, MAX_NUMA_VCPUS); + bitmap_zero(numa_info[i].host_mem, MAX_NUMA_VCPUS); + numa_info[i].flags = NODE_HOST_NONE; } assigned_devices_index = 0; -- 1.6.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html