On 04/19/2012 04:08 PM, Vasilis Liaskovitis wrote:
The numa_fw_cfg paravirt interface is extended to include SRAT information for all hotplug-able memslots. There are 3 words for each hotplug-able memory slot, denoting start address, size and node proximity. nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat info to SeaBIOS. This information is used by Seabios to build hotplug memory device objects at runtime. Signed-off-by: Vasilis Liaskovitis<vasilis.liaskovitis@xxxxxxxxxxxxxxxx> --- hw/pc.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ vl.c | 4 +++- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index 67f0479..f1f550a 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -46,6 +46,7 @@ #include "ui/qemu-spice.h" #include "memory.h" #include "exec-memory.h" +#include "memslot.h" /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -592,12 +593,15 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) return index; } +static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots); + static void *bochs_bios_init(void) { void *fw_cfg; uint8_t *smbios_table; size_t smbios_len; uint64_t *numa_fw_cfg; + uint64_t *hp_memslots_fw_cfg; int i, j; register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL); @@ -630,28 +634,71 @@ static void *bochs_bios_init(void) fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, (uint8_t *)&hpet_cfg, sizeof(struct hpet_fw_config)); /* allocate memory for the NUMA channel: one (64bit) word for the number - * of nodes, one word for each VCPU->node and one word for each node to - * hold the amount of memory. + * of nodes, one word for the number of hotplug memory slots, one word + * for each VCPU->node, one word for each node to hold the amount of memory. + * Finally three words for each hotplug memory slot, denoting start address, + * size and node proximity. */ - numa_fw_cfg = g_malloc0((1 + max_cpus + nb_numa_nodes) * 8); + numa_fw_cfg = g_malloc0((2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); + numa_fw_cfg[1] = cpu_to_le64(nb_hp_memslots);
this will brake compatibility if guest was migrated from old->new qemu than on reboot it will use old bios that expects numa_fw_cfg[1] to be something else. Could memslots info be moved to the end of an existing interface?
+ for (i = 0; i< max_cpus; i++) { for (j = 0; j< nb_numa_nodes; j++) { if (node_cpumask[j]& (1<< i)) { - numa_fw_cfg[i + 1] = cpu_to_le64(j); + numa_fw_cfg[i + 2] = cpu_to_le64(j); break; } } } for (i = 0; i< nb_numa_nodes; i++) { - numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]); + numa_fw_cfg[max_cpus + 2 + i] = cpu_to_le64(node_mem[i]); } + + hp_memslots_fw_cfg = numa_fw_cfg + 2 + max_cpus + nb_numa_nodes; + if (nb_hp_memslots) + bochs_bios_setup_hp_memslots(hp_memslots_fw_cfg); + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, (uint8_t *)numa_fw_cfg, - (1 + max_cpus + nb_numa_nodes) * 8); + (2 + max_cpus + nb_numa_nodes + 3 * nb_hp_memslots) * 8); return fw_cfg; } +static void bochs_bios_setup_hp_memslots(uint64_t *fw_cfg_slots) +{ + int i = 0; + Error *err = NULL; + DeviceState *dev; + MemSlotState *slot; + char *type; + BusState *bus = sysbus_get_default(); + + QTAILQ_FOREACH(dev,&bus->children, sibling) { + type = object_property_get_str(OBJECT(dev), "type",&err); + if (err) { + error_free(err); + fprintf(stderr, "error getting device type\n"); + exit(1); + } + + if (!strcmp(type, "memslot")) { + if (!dev->id) { + error_free(err); + fprintf(stderr, "error getting memslot device id\n"); + exit(1); + } + if (!strcmp(dev->id, "initialslot")) continue; + slot = MEMSLOT(dev); + fw_cfg_slots[3 * slot->idx] = cpu_to_le64(slot->start); + fw_cfg_slots[3 * slot->idx + 1] = cpu_to_le64(slot->size); + fw_cfg_slots[3 * slot->idx + 2] = cpu_to_le64(slot->node); + i++; + } + } + assert(i == nb_hp_memslots); +} + static long get_file_size(FILE *f) { long where, size; diff --git a/vl.c b/vl.c index ae91a8a..50df453 100644 --- a/vl.c +++ b/vl.c @@ -3428,8 +3428,10 @@ int main(int argc, char **argv, char **envp) register_savevm_live(NULL, "ram", 0, 4, NULL, ram_save_live, NULL, ram_load, NULL); + if (!nb_numa_nodes) + nb_numa_nodes = 1; - if (nb_numa_nodes> 0) { + { int i; if (nb_numa_nodes> MAX_NODES) {
-- ----- Igor -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html