When rebooting after a CPU hotplug in qemu-kvm, Seabios can get stuck in smp_probe(). Normally cmos_smp_count is read from cmos and the smp_ap_boot_code is run on all cpus except bootstrap. The expected result is CountCPUs == cmos_smp_count + 1. After a cpu hotplug, more than cmos_smp_count cpus are active, so we get a situation where CountCPUs > cmos_smp_count + 1 and Seabios keeps looping forever in smp_probe. In some cases, the while loop exit condition is tested before CountCPUs gets larger (i.e. before smp_ap_boot_code runs on all CPUs), so the hang is not always reproducible. This patch introduces a new fw_cfg variable called hotplugged_cpus that gets updated from qemu-kvm hoplug code. Seabios reads this variable on each call to smp_probe() and adjusts the expected number of online CPUs. The qemu-kvm part of this patch is against Jan Kiszka's cpu-hotplug tree: git://git.kiszka.org/qemu-kvm.git queues/cpu-hotplug tested with qemu-system-x86_64. An alternative to this patch would be to update the smp_cpus variable in qemu-kvm and do a "cmos update" to 0x5f from the cpu-hotplug code. Access to the rtc_state (cmos device) would be required in hw/acpi_piix4.c. This way no change to Seabios would be required. --- hw/acpi_piix4.c | 6 ++++++ hw/fw_cfg.c | 14 ++++++++++++++ hw/fw_cfg.h | 2 ++ hw/loader.c | 2 +- sysemu.h | 1 + vl.c | 1 + 6 files changed, 25 insertions(+), 1 deletions(-) diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index bbb9edd..54d98ae 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -24,6 +24,7 @@ #include "sysemu.h" #include "range.h" #include "ioport.h" +#include "fw_cfg.h" //#define DEBUG @@ -539,6 +540,7 @@ static void pcirmv_write(void *opaque, uint32_t addr, uint32_t val) } extern const char *global_cpu_model; +extern FWCfgState *fw_cfg; static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, PCIHotplugState state); @@ -580,6 +582,8 @@ static void enable_processor(PIIX4PMState *s, int cpu) *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS; g->cpus_sts[cpu/8] |= (1 << (cpu%8)); + hotplugged_cpus++; + fw_cfg_update_i16(fw_cfg, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus); } static void disable_processor(PIIX4PMState *s, int cpu) @@ -589,6 +593,8 @@ static void disable_processor(PIIX4PMState *s, int cpu) *gpe->sts = *gpe->sts | PIIX4_CPU_HOTPLUG_STATUS; g->cpus_sts[cpu/8] &= ~(1 << (cpu%8)); + hotplugged_cpus--; + fw_cfg_update_i16(fw_cfg, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus); } void qemu_system_cpu_hot_add(int cpu, int state) diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c index a29db90..228f517 100644 --- a/hw/fw_cfg.c +++ b/hw/fw_cfg.c @@ -395,6 +395,19 @@ int fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value) return fw_cfg_add_bytes(s, key, (uint8_t *)copy, sizeof(value)); } +int fw_cfg_update_i16(FWCfgState *s, uint16_t key, uint16_t data) +{ + int arch = !!(key & FW_CFG_ARCH_LOCAL); + uint16_t *p; + + key &= FW_CFG_ENTRY_MASK; + + p = (uint16_t*)s->entries[arch][key].data; + *p = cpu_to_le16(data); + + return 1; +} + int fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback, void *callback_opaque, uint8_t *data, size_t len) { @@ -489,6 +502,7 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port, fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC)); fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); + fw_cfg_add_i16(s, FW_CFG_HPLUG_CPUS, (uint16_t)hotplugged_cpus); fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu); fw_cfg_bootsplash(s); diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h index 856bf91..95d74b4 100644 --- a/hw/fw_cfg.h +++ b/hw/fw_cfg.h @@ -27,6 +27,7 @@ #define FW_CFG_SETUP_SIZE 0x17 #define FW_CFG_SETUP_DATA 0x18 #define FW_CFG_FILE_DIR 0x19 +#define FW_CFG_HPLUG_CPUS 0x1a #define FW_CFG_FILE_FIRST 0x20 #define FW_CFG_FILE_SLOTS 0x10 @@ -56,6 +57,7 @@ typedef void (*FWCfgCallback)(void *opaque, uint8_t *data); typedef struct FWCfgState FWCfgState; int fw_cfg_add_bytes(FWCfgState *s, uint16_t key, uint8_t *data, uint32_t len); int fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value); +int fw_cfg_update_i16(FWCfgState *s, uint16_t key, uint16_t data); int fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value); int fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value); int fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback, diff --git a/hw/loader.c b/hw/loader.c index 35d792e..2a017d1 100644 --- a/hw/loader.c +++ b/hw/loader.c @@ -536,7 +536,7 @@ struct Rom { QTAILQ_ENTRY(Rom) next; }; -static FWCfgState *fw_cfg; +FWCfgState *fw_cfg; static QTAILQ_HEAD(, Rom) roms = QTAILQ_HEAD_INITIALIZER(roms); static void rom_insert(Rom *rom) diff --git a/sysemu.h b/sysemu.h index 59fae48..a0658fd 100644 --- a/sysemu.h +++ b/sysemu.h @@ -115,6 +115,7 @@ extern int ctrl_grab; extern int usb_enabled; extern int smp_cpus; extern int max_cpus; +extern int hotplugged_cpus; extern int cursor_hide; extern int graphic_rotate; extern int no_quit; diff --git a/vl.c b/vl.c index 3fa1711..4ab8c63 100644 --- a/vl.c +++ b/vl.c @@ -204,6 +204,7 @@ int rtc_td_hack = 0; int usb_enabled = 0; int singlestep = 0; int smp_cpus = 1; +int hotplugged_cpus = 0; int max_cpus = 0; int smp_cores = 1; int smp_threads = 1; src/paravirt.c | 12 ++++++++++++ src/paravirt.h | 2 ++ src/smp.c | 6 ++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/paravirt.c b/src/paravirt.c index 9cf77de..3367609 100644 --- a/src/paravirt.c +++ b/src/paravirt.c @@ -305,6 +305,18 @@ u16 qemu_cfg_get_max_cpus(void) return cnt; } +u16 qemu_cfg_get_hplug_cpus(void) +{ + u16 cnt; + + if (!qemu_cfg_present) + return 0; + + qemu_cfg_read_entry(&cnt, QEMU_CFG_HPLUG_CPUS, sizeof(cnt)); + + return cnt; +} + static QemuCfgFile LastFile; static u32 diff --git a/src/paravirt.h b/src/paravirt.h index 4a370a0..1a3a914 100644 --- a/src/paravirt.h +++ b/src/paravirt.h @@ -33,6 +33,7 @@ static inline int kvm_para_available(void) #define QEMU_CFG_BOOT_MENU 0x0e #define QEMU_CFG_MAX_CPUS 0x0f #define QEMU_CFG_FILE_DIR 0x19 +#define QEMU_CFG_HPLUG_CPUS 0x1a #define QEMU_CFG_ARCH_LOCAL 0x8000 #define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0) #define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1) @@ -55,6 +56,7 @@ int qemu_cfg_smbios_load_external(int type, char **p, unsigned *nr_structs, int qemu_cfg_get_numa_nodes(void); void qemu_cfg_get_numa_data(u64 *data, int n); u16 qemu_cfg_get_max_cpus(void); +u16 qemu_cfg_get_hplug_cpus(void); typedef struct QemuCfgFile { u32 size; /* file size */ diff --git a/src/smp.c b/src/smp.c index 8c077a1..b035870 100644 --- a/src/smp.c +++ b/src/smp.c @@ -36,6 +36,7 @@ wrmsr_smp(u32 index, u64 val) u32 CountCPUs VAR16VISIBLE; u32 MaxCountCPUs VAR16VISIBLE; +u32 HotPlugCPUs VAR16VISIBLE; extern void smp_ap_boot_code(void); ASM16( " .global smp_ap_boot_code\n" @@ -114,8 +115,9 @@ smp_probe(void) if (CONFIG_COREBOOT) { msleep(10); } else { - u8 cmos_smp_count = inb_cmos(CMOS_BIOS_SMP_COUNT); - while (cmos_smp_count + 1 != readl(&CountCPUs)) + u32 cmos_smp_count = (u32)inb_cmos(CMOS_BIOS_SMP_COUNT); + HotPlugCPUs = qemu_cfg_get_hplug_cpus(); + while (cmos_smp_count + HotPlugCPUs + 1 != readl(&CountCPUs)) yield(); } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html