Re: [PATCH v2] Fixes related to processing of qemu's -numa option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



(removed root@xxxxxxxxxxx from CC)

On Tue, Jun 19, 2012 at 03:45:27PM -0700, Chegu Vinod wrote:
> From: root <root@xxxxxxxxxxx>
> 
> Changes since v1:
> ----------------
> 
>  - Use bitmap functions that are already in qemu (instead
>    of cpu_set_t macro's)
>  - Added a check for endvalue >= max_cpus.
>  - Fix to address the round-robbing assignment (for the case
>    when cpu's are not explicitly specified)

Tested-by: Eduardo Habkost <ehabkost@xxxxxxxxxx>

It works as expected, now.


> 
> Note: Continuing to use a new constant for
>       allocation of the cpumask (max_cpus was
>       not getting set early enough).

You could simply change the code so that the bitmaps are created only
after max_cpus is set.

Except for that, the rest of the patch looks OK, now.

> 
[...]
> diff --git a/cpus.c b/cpus.c
> index b182b3d..89ce04d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -36,6 +36,7 @@
>  #include "cpus.h"
>  #include "qtest.h"
>  #include "main-loop.h"
> +#include "bitmap.h"
>  
>  #ifndef _WIN32
>  #include "compatfd.h"
> @@ -1145,7 +1146,7 @@ void set_numa_modes(void)
>  
>      for (env = first_cpu; env != NULL; env = env->next_cpu) {
>          for (i = 0; i < nb_numa_nodes; i++) {
> -            if (node_cpumask[i] & (1 << env->cpu_index)) {
> +            if (test_bit(env->cpu_index,  node_cpumask[i])) {
>                  env->numa_node = i;
>              }
>          }
> diff --git a/hw/pc.c b/hw/pc.c
> index c7e9ab3..8800fc0 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -48,6 +48,7 @@
>  #include "memory.h"
>  #include "exec-memory.h"
>  #include "arch_init.h"
> +#include "bitmap.h"
>  
>  /* output Bochs bios info messages */
>  //#define DEBUG_BIOS
> @@ -637,9 +638,10 @@ static void *bochs_bios_init(void)
>       */
>      numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8);
>      numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
> +
>      for (i = 0; i < max_cpus; i++) {
>          for (j = 0; j < nb_numa_nodes; j++) {
> -            if (node_cpumask[j] & (1 << i)) {
> +            if (test_bit(i, node_cpumask[j])) {
>                  numa_fw_cfg[i + 1] = cpu_to_le64(j);
>                  break;
>              }
> diff --git a/sysemu.h b/sysemu.h
> index bc2c788..6813115 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -133,9 +133,10 @@ extern uint8_t qemu_extra_params_fw[2];
>  extern QEMUClock *rtc_clock;
>  
>  #define MAX_NODES 64
> +#define MAX_CPUMASK_BITS 254

Why 254, specifically?

>  extern int nb_numa_nodes;
>  extern uint64_t node_mem[MAX_NODES];
> -extern uint64_t node_cpumask[MAX_NODES];
> +extern uint64_t *node_cpumask[MAX_NODES];
>  
>  #define MAX_OPTION_ROMS 16
>  typedef struct QEMUOptionRom {
> diff --git a/vl.c b/vl.c
> index 1329c30..dd4ba75 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -28,6 +28,7 @@
>  #include <errno.h>
>  #include <sys/time.h>
>  #include <zlib.h>
> +#include "bitmap.h"
>  
>  /* Needed early for CONFIG_BSD etc. */
>  #include "config-host.h"
> @@ -240,7 +241,7 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order
>  
>  int nb_numa_nodes;
>  uint64_t node_mem[MAX_NODES];
> -uint64_t node_cpumask[MAX_NODES];
> +uint64_t *node_cpumask[MAX_NODES];
>  
>  uint8_t qemu_uuid[16];
>  
> @@ -950,6 +951,9 @@ static void numa_add(const char *optarg)
>      char *endptr;
>      unsigned long long value, endvalue;
>      int nodenr;
> +    int i;
> +
> +    value = endvalue = 0;
>  
>      optarg = get_opt_name(option, 128, optarg, ',') + 1;
>      if (!strcmp(option, "node")) {
> @@ -970,27 +974,24 @@ static void numa_add(const char *optarg)
>              }
>              node_mem[nodenr] = sval;
>          }
> -        if (get_param_value(option, 128, "cpus", optarg) == 0) {
> -            node_cpumask[nodenr] = 0;
> -        } else {
> +        if (get_param_value(option, 128, "cpus", optarg) != 0) {
>              value = strtoull(option, &endptr, 10);
> -            if (value >= 64) {
> -                value = 63;
> -                fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
> +            if (*endptr == '-') {
> +                endvalue = strtoull(endptr+1, &endptr, 10);
>              } else {
> -                if (*endptr == '-') {
> -                    endvalue = strtoull(endptr+1, &endptr, 10);
> -                    if (endvalue >= 63) {
> -                        endvalue = 62;
> -                        fprintf(stderr,
> -                            "only 63 CPUs in NUMA mode supported.\n");
> -                    }
> -                    value = (2ULL << endvalue) - (1ULL << value);
> -                } else {
> -                    value = 1ULL << value;
> -                }
> +                endvalue = value;
> +            }
> +
> +            if (endvalue >= max_cpus) {
> +                endvalue = max_cpus - 1;
> +                fprintf(stderr,
> +                    "A max of %d CPUs are supported in a guest on this host\n",
> +                                           max_cpus);
> +            }
> +
> +            for (i = value; i <= endvalue; ++i) {
> +                set_bit(i, node_cpumask[nodenr]);
>              }
> -            node_cpumask[nodenr] = value;

You changed behavior, here. Now the following command-line:

"qemu -numa node,nodeid=0,cpus=1 -numa node,nodeid=0,cpus=2"

will get both cpus 1 and 2 on node 0. Before your patch, node 0 would
get only CPU 2.

I think it is a desirable change, and the above command-line was already
ambiguous, but this could be documented in the patch description.


>          }
>          nb_numa_nodes++;
>      }
> @@ -2331,7 +2332,8 @@ int main(int argc, char **argv, char **envp)
>  
>      for (i = 0; i < MAX_NODES; i++) {
>          node_mem[i] = 0;
> -        node_cpumask[i] = 0;
> +        node_cpumask[i] = (unsigned long *) g_malloc0(MAX_CPUMASK_BITS);

You are allocating 254 bytes here, not 254 bits.

> +        bitmap_zero(node_cpumask[i], MAX_CPUMASK_BITS);
>      }

If MAX_CPUMASK_BITS is really going to be a constant (instead of using
max_cpus), you don't even need to allocate the bitmaps dynamically. They
could be static arrays.

>  
>      nb_numa_nodes = 0;
> @@ -3469,16 +3471,18 @@ int main(int argc, char **argv, char **envp)
>          }
>  
>          for (i = 0; i < nb_numa_nodes; i++) {
> -            if (node_cpumask[i] != 0)
> +            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
>                  break;
> +            }
>          }
> +
>          /* assigning the VCPUs round-robin is easier to implement, guest OSes
>           * must cope with this anyway, because there are BIOSes out there in
>           * real machines which also use this scheme.
>           */
>          if (i == nb_numa_nodes) {
>              for (i = 0; i < max_cpus; i++) {
> -                node_cpumask[i % nb_numa_nodes] |= 1 << i;
> +                set_bit(i, node_cpumask[i % nb_numa_nodes]);
>              }
>          }
>      }
> -- 
> 1.7.1
> 

-- 
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux