Re: [PATCH] cpu hotplug issue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When rebooting after a CPU hotplug in qemu-kvm, Seabios can get stuck in smp_probe().
Normally cmos_smp_count is read from cmos and the smp_ap_boot_code is run on all cpus 
except bootstrap. The expected result is CountCPUs == cmos_smp_count + 1.  After a 
cpu hotplug, more than cmos_smp_count cpus are active, so we get a situation where 
CountCPUs > cmos_smp_count + 1 and Seabios keeps looping forever in smp_probe. In some
cases, the while loop exit condition is tested before CountCPUs gets larger (i.e. 
before smp_ap_boot_code runs on all CPUs), so the hang is not always reproducible.

This patch introduces a new fw_cfg variable called hotplugged_cpus that gets updated from
qemu-kvm hoplug code. Seabios reads this variable on each call to smp_probe() and adjusts
the expected number of online CPUs.

The qemu-kvm part of this patch is against Jan Kiszka's cpu-hotplug tree:
git://git.kiszka.org/qemu-kvm.git queues/cpu-hotplug
tested with qemu-system-x86_64.

An alternative to this patch would be to update the smp_cpus variable in qemu-kvm and 
do a "cmos update" to 0x5f from the cpu-hotplug code. Access to the rtc_state (cmos device) 
would be required in hw/acpi_piix4.c.  This way no change to Seabios would be required.

---

 hw/acpi_piix4.c |    6 ++++++
 hw/fw_cfg.c     |   14 ++++++++++++++
 hw/fw_cfg.h     |    2 ++
 hw/loader.c     |    2 +-
 sysemu.h        |    1 +
 vl.c            |    1 +
 6 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index bbb9edd..54d98ae 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -24,6 +24,7 @@
 #include "sysemu.h"
 #include "range.h"
 #include "ioport.h"
+#include "fw_cfg.h"
 
 //#define DEBUG
 
@@ -539,6 +540,7 @@ static void pcirmv_write(void *opaque, uint32_t addr, uint32_t val)
 }
 
 extern const char *global_cpu_model;
+extern FWCfgState *fw_cfg;
 
 static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
                                 PCIHotplugState state);
@@ -580,6 +582,8 @@ static void enable_processor(PIIX4PMState *s, int cpu)
 
     *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
     g->cpus_sts[cpu/8] |= (1 << (cpu%8));
+    hotplugged_cpus++;
+    fw_cfg_update_i16(fw_cfg, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus);
 }
 
 static void disable_processor(PIIX4PMState *s, int cpu)
@@ -589,6 +593,8 @@ static void disable_processor(PIIX4PMState *s, int cpu)
 
     *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS;
     g->cpus_sts[cpu/8] &= ~(1 << (cpu%8));
+    hotplugged_cpus--;
+    fw_cfg_update_i16(fw_cfg, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus);
 }
 
 void qemu_system_cpu_hot_add(int cpu, int state)
diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index a29db90..228f517 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -395,6 +395,19 @@ int fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value)
     return fw_cfg_add_bytes(s, key, (uint8_t *)copy, sizeof(value));
 }
 
+int fw_cfg_update_i16(FWCfgState *s, uint16_t key, uint16_t data)
+{
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+    uint16_t *p;
+
+    key &= FW_CFG_ENTRY_MASK;
+
+    p = (uint16_t*)s->entries[arch][key].data;
+    *p = cpu_to_le16(data);
+    
+    return 1;
+}
+
 int fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
                         void *callback_opaque, uint8_t *data, size_t len)
 {
@@ -489,6 +502,7 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
     fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
+    fw_cfg_add_i16(s, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
 
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
index 856bf91..95d74b4 100644
--- a/hw/fw_cfg.h
+++ b/hw/fw_cfg.h
@@ -27,6 +27,7 @@
 #define FW_CFG_SETUP_SIZE       0x17
 #define FW_CFG_SETUP_DATA       0x18
 #define FW_CFG_FILE_DIR         0x19
+#define FW_CFG_HPLUG_CPUS       0x1a
 
 #define FW_CFG_FILE_FIRST       0x20
 #define FW_CFG_FILE_SLOTS       0x10
@@ -56,6 +57,7 @@ typedef void (*FWCfgCallback)(void *opaque, uint8_t *data);
 typedef struct FWCfgState FWCfgState;
 int fw_cfg_add_bytes(FWCfgState *s, uint16_t key, uint8_t *data, uint32_t len);
 int fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value);
+int fw_cfg_update_i16(FWCfgState *s, uint16_t key, uint16_t data);
 int fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value);
 int fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value);
 int fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
diff --git a/hw/loader.c b/hw/loader.c
index 35d792e..2a017d1 100644
--- a/hw/loader.c
+++ b/hw/loader.c
@@ -536,7 +536,7 @@ struct Rom {
     QTAILQ_ENTRY(Rom) next;
 };
 
-static FWCfgState *fw_cfg;
+FWCfgState *fw_cfg;
 static QTAILQ_HEAD(, Rom) roms = QTAILQ_HEAD_INITIALIZER(roms);
 
 static void rom_insert(Rom *rom)
diff --git a/sysemu.h b/sysemu.h
index 59fae48..a0658fd 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -115,6 +115,7 @@ extern int ctrl_grab;
 extern int usb_enabled;
 extern int smp_cpus;
 extern int max_cpus;
+extern int hotplugged_cpus;
 extern int cursor_hide;
 extern int graphic_rotate;
 extern int no_quit;
diff --git a/vl.c b/vl.c
index 3fa1711..4ab8c63 100644
--- a/vl.c
+++ b/vl.c
@@ -204,6 +204,7 @@ int rtc_td_hack = 0;
 int usb_enabled = 0;
 int singlestep = 0;
 int smp_cpus = 1;
+int hotplugged_cpus = 0;
 int max_cpus = 0;
 int smp_cores = 1;
 int smp_threads = 1;



 src/paravirt.c |   12 ++++++++++++
 src/paravirt.h |    2 ++
 src/smp.c      |    6 ++++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/paravirt.c b/src/paravirt.c
index 9cf77de..3367609 100644
--- a/src/paravirt.c
+++ b/src/paravirt.c
@@ -305,6 +305,18 @@ u16 qemu_cfg_get_max_cpus(void)
     return cnt;
 }
 
+u16 qemu_cfg_get_hplug_cpus(void)
+{
+    u16 cnt;
+
+    if (!qemu_cfg_present)
+        return 0;
+
+    qemu_cfg_read_entry(&cnt, QEMU_CFG_HPLUG_CPUS, sizeof(cnt));
+
+    return cnt;
+}
+
 static QemuCfgFile LastFile;
 
 static u32
diff --git a/src/paravirt.h b/src/paravirt.h
index 4a370a0..1a3a914 100644
--- a/src/paravirt.h
+++ b/src/paravirt.h
@@ -33,6 +33,7 @@ static inline int kvm_para_available(void)
 #define QEMU_CFG_BOOT_MENU		0x0e
 #define QEMU_CFG_MAX_CPUS		0x0f
 #define QEMU_CFG_FILE_DIR               0x19
+#define QEMU_CFG_HPLUG_CPUS             0x1a
 #define QEMU_CFG_ARCH_LOCAL		0x8000
 #define QEMU_CFG_ACPI_TABLES		(QEMU_CFG_ARCH_LOCAL + 0)
 #define QEMU_CFG_SMBIOS_ENTRIES		(QEMU_CFG_ARCH_LOCAL + 1)
@@ -55,6 +56,7 @@ int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs,
 int qemu_cfg_get_numa_nodes(void);
 void qemu_cfg_get_numa_data(u64 *data, int n);
 u16 qemu_cfg_get_max_cpus(void);
+u16 qemu_cfg_get_hplug_cpus(void);
 
 typedef struct QemuCfgFile {
     u32  size;        /* file size */
diff --git a/src/smp.c b/src/smp.c
index 8c077a1..b035870 100644
--- a/src/smp.c
+++ b/src/smp.c
@@ -36,6 +36,7 @@ wrmsr_smp(u32 index, u64 val)
 
 u32 CountCPUs VAR16VISIBLE;
 u32 MaxCountCPUs VAR16VISIBLE;
+u32 HotPlugCPUs VAR16VISIBLE;
 extern void smp_ap_boot_code(void);
 ASM16(
     "  .global smp_ap_boot_code\n"
@@ -114,8 +115,9 @@ smp_probe(void)
     if (CONFIG_COREBOOT) {
         msleep(10);
     } else {
-        u8 cmos_smp_count = inb_cmos(CMOS_BIOS_SMP_COUNT);
-        while (cmos_smp_count + 1 != readl(&CountCPUs))
+        u32 cmos_smp_count = (u32)inb_cmos(CMOS_BIOS_SMP_COUNT);
+        HotPlugCPUs = qemu_cfg_get_hplug_cpus();
+        while (cmos_smp_count + HotPlugCPUs + 1 != readl(&CountCPUs))
             yield();
     }
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux