At 09/21/2012 07:17 PM, Vasilis Liaskovitis Wrote: > pcimem_start and pcimem64_start are adjusted from srat entries. For this reason, > paravirt info (NUMA SRAT entries and number of cpus) need to be read before pci_setup. > Imho, this is an ugly code change since SRAT bios tables and number of > cpus have to be read earlier. But the advantage is that no new paravirt interface > is introduced. Suggestions to make the code change cleaner are welcome. > > The alternative patch (will be sent as a reply to this patch) implements a > paravirt interface to read the starting values of pcimem_start and > pcimem64_start from QEMU. > > Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovitis@xxxxxxxxxxxxxxxx> > --- > src/acpi.c | 82 ++++++++++++++++++++++++++++++++++++++++---------------- > src/acpi.h | 3 ++ > src/pciinit.c | 6 +++- > src/post.c | 3 ++ > src/smp.c | 4 +++ > 5 files changed, 72 insertions(+), 26 deletions(-) > > diff --git a/src/acpi.c b/src/acpi.c > index 1223b52..9e99aa7 100644 > --- a/src/acpi.c > +++ b/src/acpi.c > @@ -428,7 +428,10 @@ encodeLen(u8 *ssdt_ptr, int length, int bytes) > #define MEM_OFFSET_END 63 > #define MEM_OFFSET_SIZE 79 > > -u64 nb_hp_memslots = 0; > +u64 nb_hp_memslots = 0, nb_numanodes; > +u64 *numa_data, *hp_memdata; > +u64 below_4g_hp_mem_size = 0; > +u64 above_4g_hp_mem_size = 0; > struct srat_memory_affinity *mem; > > #define SSDT_SIGNATURE 0x54445353 // SSDT > @@ -763,17 +766,7 @@ acpi_build_srat_memory(struct srat_memory_affinity *numamem, > static void * > build_srat(void) > { > - int nb_numa_nodes = qemu_cfg_get_numa_nodes(); > - > - u64 *numadata = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numa_nodes)); > - if (!numadata) { > - warn_noalloc(); > - return NULL; > - } > - > - qemu_cfg_get_numa_data(numadata, MaxCountCPUs + nb_numa_nodes); > - > - qemu_cfg_get_numa_data(&nb_hp_memslots, 1); > + int nb_numa_nodes = nb_numanodes; > struct system_resource_affinity_table *srat; > int srat_size = sizeof(*srat) + > sizeof(struct srat_processor_affinity) * MaxCountCPUs + > @@ -782,7 +775,7 @@ build_srat(void) > srat = malloc_high(srat_size); > if (!srat) { > warn_noalloc(); > - free(numadata); > + free(numa_data); > return NULL; > } > > @@ -791,6 +784,7 @@ build_srat(void) > struct srat_processor_affinity *core = (void*)(srat + 1); > int i; > u64 curnode; > + u64 *numadata = numa_data; > > for (i = 0; i < MaxCountCPUs; ++i) { > core->type = SRAT_PROCESSOR; > @@ -847,15 +841,7 @@ build_srat(void) > mem = (void*)numamem; > > if (nb_hp_memslots) { > - u64 *hpmemdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots)); > - if (!hpmemdata) { > - warn_noalloc(); > - free(hpmemdata); > - free(numadata); > - return NULL; > - } > - > - qemu_cfg_get_numa_data(hpmemdata, 3 * nb_hp_memslots); > + u64 *hpmemdata = hp_memdata; > > for (i = 1; i < nb_hp_memslots + 1; ++i) { > mem_base = *hpmemdata++; > @@ -865,7 +851,7 @@ build_srat(void) > numamem++; > slots++; > } > - free(hpmemdata); > + free(hp_memdata); > } > > for (; slots < nb_numa_nodes + nb_hp_memslots + 2; slots++) { > @@ -875,10 +861,58 @@ build_srat(void) > > build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1); > > - free(numadata); > + free(numa_data); > return srat; > } > > +/* QEMU paravirt SRAT entries need to be read in before pci initilization */ > +void read_srat_early(void) > +{ > + int i; > + > + nb_numanodes = qemu_cfg_get_numa_nodes(); > + u64 *hpmemdata; > + u64 mem_len, mem_base; > + > + numa_data = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numanodes)); > + if (!numa_data) { > + warn_noalloc(); > + } > + > + qemu_cfg_get_numa_data(numa_data, MaxCountCPUs + nb_numanodes); > + qemu_cfg_get_numa_data(&nb_hp_memslots, 1); > + > + if (nb_hp_memslots) { > + hp_memdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots)); > + if (!hp_memdata) { > + warn_noalloc(); > + free(hp_memdata); > + free(numa_data); > + } > + > + qemu_cfg_get_numa_data(hp_memdata, 3 * nb_hp_memslots); > + hpmemdata = hp_memdata; > + > + for (i = 1; i < nb_hp_memslots + 1; ++i) { > + mem_base = *hpmemdata++; > + mem_len = *hpmemdata++; > + hpmemdata++; > + if (mem_base >= 0x100000000LL) { > + above_4g_hp_mem_size += mem_len; > + } > + /* if dimm fits before pci hole, append it normally */ > + else if (mem_base + mem_len <= BUILD_PCIMEM_START) { > + below_4g_hp_mem_size += mem_len; > + } > + /* otherwise place it above 4GB */ > + else { > + above_4g_hp_mem_size += mem_len; > + } > + } > + > + } > +} > + > static const struct pci_device_id acpi_find_tbl[] = { > /* PIIX4 Power Management device. */ > PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, NULL), > diff --git a/src/acpi.h b/src/acpi.h > index cb21561..d29837f 100644 > --- a/src/acpi.h > +++ b/src/acpi.h > @@ -5,6 +5,9 @@ > > void acpi_bios_init(void); > u32 find_resume_vector(void); > +void read_srat_early(void); > +extern u64 below_4g_hp_mem_size; > +extern u64 above_4g_hp_mem_size; > > #define RSDP_SIGNATURE 0x2052545020445352LL // "RSD PTR " > > diff --git a/src/pciinit.c b/src/pciinit.c > index 31115ee..c5a4b24 100644 > --- a/src/pciinit.c > +++ b/src/pciinit.c > @@ -12,6 +12,7 @@ > #include "ioport.h" // PORT_ATA1_CMD_BASE > #include "config.h" // CONFIG_* > #include "xen.h" // usingXen > +#include "acpi.h" > > #define PCI_DEVICE_MEM_MIN 0x1000 > #define PCI_BRIDGE_IO_MIN 0x1000 > @@ -597,7 +598,7 @@ static void pci_region_map_entries(struct pci_bus *busses, struct pci_region *r) > > static void pci_bios_map_devices(struct pci_bus *busses) > { > - pcimem_start = RamSize; > + pcimem_start = RamSize + below_4g_hp_mem_size; > > if (pci_bios_init_root_regions(busses)) { > struct pci_region r64_mem, r64_pref; > @@ -616,7 +617,8 @@ static void pci_bios_map_devices(struct pci_bus *busses) > u64 align_mem = pci_region_align(&r64_mem); > u64 align_pref = pci_region_align(&r64_pref); > > - r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem); > + r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G + > + above_4g_hp_mem_size, align_mem); > r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref); > pcimem64_start = r64_mem.base; > pcimem64_end = r64_pref.base + sum_pref; > diff --git a/src/post.c b/src/post.c > index 924b311..c37730b 100644 > --- a/src/post.c > +++ b/src/post.c > @@ -234,6 +234,9 @@ maininit(void) > // Initialize mtrr > mtrr_setup(); > > + smp_get_ncpus(); > + read_srat_early(); > + > // Initialize pci > pci_setup(); > smm_init(); > diff --git a/src/smp.c b/src/smp.c > index 4975412..3922776 100644 > --- a/src/smp.c > +++ b/src/smp.c > @@ -138,7 +138,11 @@ smp_probe(void) > > // Restore memory. > *(u64*)BUILD_AP_BOOT_ADDR = old; > +} > > +void > +smp_get_ncpus(void) You don't declare this function, and use it in another file. It will break building: src/post.c: In function ‘maininit’: src/post.c:237: warning: implicit declaration of function ‘smp_get_ncpus’ src/smp.c:144: note: previous definition of ‘smp_get_ncpus’ was here src/post.c:237: error: incompatible implicit declaration of function ‘smp_get_ncpus’ src/smp.c:144: note: previous definition of ‘smp_get_ncpus’ was here Thanks Wen Congyang > +{ > MaxCountCPUs = qemu_cfg_get_max_cpus(); > if (!MaxCountCPUs || MaxCountCPUs < CountCPUs) > MaxCountCPUs = CountCPUs; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html