Current code is a mess. And addition of acpi tables is broken. Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c index 369cbef..fda4894 100755 --- a/kvm/bios/rombios32.c +++ b/kvm/bios/rombios32.c @@ -1293,15 +1293,13 @@ struct rsdp_descriptor /* Root System Descriptor Pointer */ uint8_t reserved [3]; /* Reserved field must be 0 */ } __attribute__((__packed__)); -#define MAX_RSDT_ENTRIES 100 - /* * ACPI 1.0 Root System Description Table (RSDT) */ struct rsdt_descriptor_rev1 { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t table_offset_entry [MAX_RSDT_ENTRIES]; /* Array of pointers to other */ + uint32_t table_offset_entry [0]; /* Array of pointers to other */ /* ACPI tables */ } __attribute__((__packed__)); @@ -1585,324 +1583,332 @@ static void acpi_build_srat_memory(struct srat_memory_affinity *numamem, return; } -/* base_addr must be a multiple of 4KB */ -void acpi_bios_init(void) +static void rsdp_build(struct rsdp_descriptor *rsdp, uint32_t rsdt) { - struct rsdp_descriptor *rsdp; - struct rsdt_descriptor_rev1 *rsdt; - struct fadt_descriptor_rev1 *fadt; - struct facs_descriptor_rev1 *facs; - struct multiple_apic_table *madt; - uint8_t *dsdt, *ssdt; + memset(rsdp, 0, sizeof(*rsdp)); + memcpy(rsdp->signature, "RSD PTR ", 8); #ifdef BX_QEMU - struct system_resource_affinity_table *srat; - struct acpi_20_hpet *hpet; - uint32_t hpet_addr; -#endif - uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr; - uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size; - uint32_t srat_addr,srat_size; - uint16_t i, external_tables; - int nb_numa_nodes; - int nb_rsdt_entries = 0; - - /* reserve memory space for tables */ -#ifdef BX_USE_EBDA_TABLES - ebda_cur_addr = align(ebda_cur_addr, 16); - rsdp = (void *)(ebda_cur_addr); - ebda_cur_addr += sizeof(*rsdp); + memcpy(rsdp->oem_id, "QEMU ", 6); #else - bios_table_cur_addr = align(bios_table_cur_addr, 16); - rsdp = (void *)(bios_table_cur_addr); - bios_table_cur_addr += sizeof(*rsdp); + memcpy(rsdp->oem_id, "BOCHS ", 6); #endif + rsdp->rsdt_physical_address = rsdt; + rsdp->checksum = acpi_checksum((void*)rsdp, 20); +} -#ifdef BX_QEMU - external_tables = acpi_additional_tables(); -#else - external_tables = 0; -#endif +static uint32_t facs_build(uint32_t *addr) +{ + struct facs_descriptor_rev1 *facs; - addr = base_addr = ram_size - ACPI_DATA_SIZE; - rsdt_addr = addr; - rsdt = (void *)(addr); - rsdt_size = sizeof(*rsdt) + external_tables * 4; - addr += rsdt_size; + *addr = (*addr + 63) & ~63; /* 64 byte alignment for FACS */ + facs = (void*)(*addr); + *addr += sizeof(*facs); - fadt_addr = addr; - fadt = (void *)(addr); - addr += sizeof(*fadt); + memset(facs, 0, sizeof(*facs)); + memcpy(facs->signature, "FACS", 4); + facs->length = cpu_to_le32(sizeof(*facs)); + BX_INFO("Firmware waking vector %p\n", &facs->firmware_waking_vector); - /* XXX: FACS should be in RAM */ - addr = (addr + 63) & ~63; /* 64 byte alignment for FACS */ - facs_addr = addr; - facs = (void *)(addr); - addr += sizeof(*facs); + return (uint32_t)facs; +} - dsdt_addr = addr; - dsdt = (void *)(addr); - addr += sizeof(AmlCode); +static uint32_t dsdt_build(uint32_t *addr) +{ + uint8_t *dsdt = (void*)(*addr); -#ifdef BX_QEMU - qemu_cfg_select(QEMU_CFG_NUMA); - nb_numa_nodes = qemu_cfg_get64(); + *addr += sizeof(AmlCode); + + memcpy(dsdt, AmlCode, sizeof(AmlCode)); + + return (uint32_t)dsdt; +} + +static uint32_t fadt_build(uint32_t *addr, uint32_t facs, uint32_t dsdt) +{ + struct fadt_descriptor_rev1 *fadt = (void*)(*addr); + + *addr += sizeof(*fadt); + memset(fadt, 0, sizeof(*fadt)); + fadt->firmware_ctrl = facs; + fadt->dsdt = dsdt; + fadt->model = 1; + fadt->reserved1 = 0; + fadt->sci_int = cpu_to_le16(pm_sci_int); + fadt->smi_cmd = cpu_to_le32(SMI_CMD_IO_ADDR); + fadt->acpi_enable = 0xf1; + fadt->acpi_disable = 0xf0; + fadt->pm1a_evt_blk = cpu_to_le32(pm_io_base); + fadt->pm1a_cnt_blk = cpu_to_le32(pm_io_base + 0x04); + fadt->pm_tmr_blk = cpu_to_le32(pm_io_base + 0x08); + fadt->pm1_evt_len = 4; + fadt->pm1_cnt_len = 2; + fadt->pm_tmr_len = 4; + fadt->plvl2_lat = cpu_to_le16(0xfff); // C2 state not supported + fadt->plvl3_lat = cpu_to_le16(0xfff); // C3 state not supported + fadt->gpe0_blk = cpu_to_le32(0xafe0); + fadt->gpe0_blk_len = 4; + /* WBINVD + PROC_C1 + SLP_BUTTON + FIX_RTC */ + fadt->flags = cpu_to_le32((1 << 0) | (1 << 2) | (1 << 5) | (1 << 6)); + acpi_build_table_header((struct acpi_table_header *)fadt, "FACP", + sizeof(*fadt), 1); + + return (uint32_t)fadt; +} + +static uint32_t srat_build(uint32_t *addr) +{ +#ifndef BX_QEMU + return 0; #else - nb_numa_nodes = 0; -#endif - if (nb_numa_nodes > 0) { - addr = (addr + 7) & ~7; - srat_addr = addr; - srat_size = sizeof(*srat) + - sizeof(struct srat_processor_affinity) * smp_cpus + - sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2); - srat = (void *)(addr); - addr += srat_size; - } else { - srat_addr = addr; - srat = (void*)(addr); - srat_size = 0; + struct system_resource_affinity_table *srat; + int nb_numa_nodes, slots, i; + uint32_t size; + struct srat_processor_affinity *core; + struct srat_memory_affinity *numamem; + uint64_t mem_len, mem_base, next_base = 0, curnode; + + qemu_cfg_select(QEMU_CFG_NUMA); + nb_numa_nodes = qemu_cfg_get64(); + + if (!nb_numa_nodes) + return 0; + + *addr = (*addr + 7) & ~7; + srat = (void*)(*addr); + + size = sizeof(*srat) + + sizeof(struct srat_processor_affinity) * smp_cpus + + sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2); + + *addr += size; + + memset (srat, 0 , size); + srat->reserved1=1; + + core = (void*)(srat + 1); + for (i = 0; i < smp_cpus; ++i) { + core->type = SRAT_PROCESSOR; + core->length = sizeof(*core); + core->local_apic_id = i; + curnode = qemu_cfg_get64(); + core->proximity_lo = curnode; + memset (core->proximity_hi, 0, 3); + core->local_sapic_eid = 0; + if (i < smp_cpus) + core->flags = cpu_to_le32(1); + else + core->flags = 0; + core++; } - addr = (addr + 7) & ~7; - madt_addr = addr; - madt_size = sizeof(*madt) + - sizeof(struct madt_processor_apic) * MAX_CPUS + -#ifdef BX_QEMU - sizeof(struct madt_io_apic) /* + sizeof(struct madt_int_override) */; -#else - sizeof(struct madt_io_apic); -#endif - madt = (void *)(addr); - addr += madt_size; + /* the memory map is a bit tricky, it contains at least one hole + * from 640k-1M and possibly another one from 3.5G-4G. + */ + numamem = (void*)core; slots = 0; + acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); + next_base = 1024 * 1024; numamem++;slots++; + for (i = 1; i < nb_numa_nodes + 1; ++i) { + mem_base = next_base; + mem_len = qemu_cfg_get64(); + if (i == 1) mem_len -= 1024 * 1024; + next_base = mem_base + mem_len; + + /* Cut out the PCI hole */ + if (mem_base <= ram_size && next_base > ram_size) { + mem_len -= next_base - ram_size; + if (mem_len > 0) { + acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); + numamem++; slots++; + } + mem_base = 1ULL << 32; + mem_len = next_base - ram_size; + next_base += (1ULL << 32) - ram_size; + } + acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); + numamem++; slots++; + } + for (; slots < nb_numa_nodes + 2; slots++) { + acpi_build_srat_memory(numamem, 0, 0, 0, 0); + numamem++; + } -#ifdef BX_QEMU -#ifdef HPET_WORKS_IN_KVM - addr = (addr + 7) & ~7; - hpet_addr = addr; - hpet = (void *)(addr); - addr += sizeof(*hpet); -#endif -#endif + acpi_build_table_header((struct acpi_table_header *)srat, "SRAT", size, 1); - /* RSDP */ - memset(rsdp, 0, sizeof(*rsdp)); - memcpy(rsdp->signature, "RSD PTR ", 8); -#ifdef BX_QEMU - memcpy(rsdp->oem_id, "QEMU ", 6); -#else - memcpy(rsdp->oem_id, "BOCHS ", 6); -#endif - rsdp->rsdt_physical_address = cpu_to_le32(rsdt_addr); - rsdp->checksum = acpi_checksum((void *)rsdp, 20); - - /* FADT */ - memset(fadt, 0, sizeof(*fadt)); - fadt->firmware_ctrl = cpu_to_le32(facs_addr); - fadt->dsdt = cpu_to_le32(dsdt_addr); - fadt->model = 1; - fadt->reserved1 = 0; - fadt->sci_int = cpu_to_le16(pm_sci_int); - fadt->smi_cmd = cpu_to_le32(SMI_CMD_IO_ADDR); - fadt->acpi_enable = 0xf1; - fadt->acpi_disable = 0xf0; - fadt->pm1a_evt_blk = cpu_to_le32(pm_io_base); - fadt->pm1a_cnt_blk = cpu_to_le32(pm_io_base + 0x04); - fadt->pm_tmr_blk = cpu_to_le32(pm_io_base + 0x08); - fadt->pm1_evt_len = 4; - fadt->pm1_cnt_len = 2; - fadt->pm_tmr_len = 4; - fadt->plvl2_lat = cpu_to_le16(0xfff); // C2 state not supported - fadt->plvl3_lat = cpu_to_le16(0xfff); // C3 state not supported - fadt->gpe0_blk = cpu_to_le32(0xafe0); - fadt->gpe0_blk_len = 4; - /* WBINVD + PROC_C1 + SLP_BUTTON + FIX_RTC */ - fadt->flags = cpu_to_le32((1 << 0) | (1 << 2) | (1 << 5) | (1 << 6)); - acpi_build_table_header((struct acpi_table_header *)fadt, "FACP", - sizeof(*fadt), 1); - - /* FACS */ - memset(facs, 0, sizeof(*facs)); - memcpy(facs->signature, "FACS", 4); - facs->length = cpu_to_le32(sizeof(*facs)); - BX_INFO("Firmware waking vector %p\n", &facs->firmware_waking_vector); - - /* DSDT */ - memcpy(dsdt, AmlCode, sizeof(AmlCode)); - - /* MADT */ - { - struct madt_processor_apic *apic; - struct madt_io_apic *io_apic; -#ifdef BX_QEMU - struct madt_int_override *int_override; + return (uint32_t)srat; #endif +} - memset(madt, 0, madt_size); - madt->local_apic_address = cpu_to_le32(0xfee00000); - madt->flags = cpu_to_le32(1); - *(uint32_t*)APIC_MADT_PTR = apic = (void *)(madt + 1); - for(i=0;i<MAX_CPUS;i++) { - apic->type = APIC_PROCESSOR; - apic->length = sizeof(*apic); - apic->processor_id = i; - apic->local_apic_id = i; - if (i < smp_cpus) - apic->flags = cpu_to_le32(1); - else - apic->flags = 0; - apic++; - } - io_apic = (void *)apic; - io_apic->type = APIC_IO; - io_apic->length = sizeof(*io_apic); - io_apic->io_apic_id = smp_cpus; - io_apic->address = cpu_to_le32(0xfec00000); - io_apic->interrupt = cpu_to_le32(0); +static uint32_t madt_build(uint32_t *addr) +{ + struct multiple_apic_table *madt; + uint32_t size, i; + struct madt_processor_apic *apic; + struct madt_io_apic *io_apic; #ifdef BX_QEMU -#ifdef HPET_WORKS_IN_KVM - io_apic++; - - int_override = (void *)io_apic; - int_override->type = APIC_XRUPT_OVERRIDE; - int_override->length = sizeof(*int_override); - int_override->bus = cpu_to_le32(0); - int_override->source = cpu_to_le32(0); - int_override->gsi = cpu_to_le32(2); - int_override->flags = cpu_to_le32(0); + struct madt_int_override *int_override; #endif + + *addr = (*addr + 7) & ~7; + + madt = (void*)(*addr); + + size = sizeof(*madt) + sizeof(struct madt_processor_apic) * MAX_CPUS + + sizeof(struct madt_io_apic); + + memset(madt, 0, size); + madt->local_apic_address = cpu_to_le32(0xfee00000); + madt->flags = cpu_to_le32(1); + apic = (void *)(madt + 1); + *(uint32_t*)APIC_MADT_PTR = (uint32_t)apic; + for(i=0; i < MAX_CPUS; i++) { + apic->type = APIC_PROCESSOR; + apic->length = sizeof(*apic); + apic->processor_id = i; + apic->local_apic_id = i; + apic->flags = (i < smp_cpus) ? cpu_to_le32(1) : 0; + apic++; + } + io_apic = (void *)apic; + io_apic->type = APIC_IO; + io_apic->length = sizeof(*io_apic); + io_apic->io_apic_id = smp_cpus; + io_apic->address = cpu_to_le32(0xfec00000); + io_apic->interrupt = cpu_to_le32(0); + +#if defined(BX_QEMU) && defined(HPET_WORKS_IN_KVM) + io_apic++; + + int_override = (void *)io_apic; + memset(int_override, 0, sizeof(*int_override)); + + int_override->type = APIC_XRUPT_OVERRIDE; + int_override->length = sizeof(*int_override); + int_override->bus = cpu_to_le32(0); + int_override->source = cpu_to_le32(0); + int_override->gsi = cpu_to_le32(2); + int_override->flags = cpu_to_le32(0); + size += sizeof(struct madt_int_override); #endif - int_override = (struct madt_int_override*)(io_apic + 1); - for ( i = 0; i < 16; i++ ) { - if ( PCI_ISA_IRQ_MASK & (1U << i) ) { - memset(int_override, 0, sizeof(*int_override)); - int_override->type = APIC_XRUPT_OVERRIDE; - int_override->length = sizeof(*int_override); - int_override->source = i; - int_override->gsi = i; - int_override->flags = 0xd; /* active high, level triggered */ - } else { - /* No need for a INT source override structure. */ - continue; - } - int_override++; - madt_size += sizeof(struct madt_int_override); - } - acpi_build_table_header((struct acpi_table_header *)madt, - "APIC", madt_size, 1); + int_override = (struct madt_int_override*)(io_apic + 1); + for ( i = 0; i < 16; i++ ) { + if ( PCI_ISA_IRQ_MASK & (1U << i) ) { + memset(int_override, 0, sizeof(*int_override)); + int_override->type = APIC_XRUPT_OVERRIDE; + int_override->length = sizeof(*int_override); + int_override->source = i; + int_override->gsi = i; + int_override->flags = 0xd; /* active high, level triggered */ + } else { + /* No need for a INT source override structure. */ + continue; + } + int_override++; + size += sizeof(struct madt_int_override); } - memset(rsdt, 0, rsdt_size); -#ifdef BX_QEMU - /* SRAT */ - if (nb_numa_nodes > 0) { - struct srat_processor_affinity *core; - struct srat_memory_affinity *numamem; - int slots; - uint64_t mem_len, mem_base, next_base = 0, curnode; - - qemu_cfg_select(QEMU_CFG_NUMA); - qemu_cfg_get64(); - memset (srat, 0 , srat_size); - srat->reserved1=1; - - core = (void*)(srat + 1); - for (i = 0; i < smp_cpus; ++i) { - core->type = SRAT_PROCESSOR; - core->length = sizeof(*core); - core->local_apic_id = i; - curnode = qemu_cfg_get64(); - core->proximity_lo = curnode; - memset (core->proximity_hi, 0, 3); - core->local_sapic_eid = 0; - if (i < smp_cpus) - core->flags = cpu_to_le32(1); - else - core->flags = 0; - core++; - } + *addr += size; - /* the memory map is a bit tricky, it contains at least one hole - * from 640k-1M and possibly another one from 3.5G-4G. - */ - numamem = (void*)core; slots = 0; - acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1); - next_base = 1024 * 1024; numamem++;slots++; - for (i = 1; i < nb_numa_nodes + 1; ++i) { - mem_base = next_base; - mem_len = qemu_cfg_get64(); - if (i == 1) mem_len -= 1024 * 1024; - next_base = mem_base + mem_len; - - /* Cut out the PCI hole */ - if (mem_base <= ram_size && next_base > ram_size) { - mem_len -= next_base - ram_size; - if (mem_len > 0) { - acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); - numamem++; slots++; - } - mem_base = 1ULL << 32; - mem_len = next_base - ram_size; - next_base += (1ULL << 32) - ram_size; - } - acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1); - numamem++; slots++; - } - for (; slots < nb_numa_nodes + 2; slots++) { - acpi_build_srat_memory(numamem, 0, 0, 0, 0); - numamem++; - } + acpi_build_table_header((struct acpi_table_header *)madt, "APIC", size, 1); - acpi_build_table_header((struct acpi_table_header *)srat, - "SRAT", srat_size, 1); - } + return (uint32_t)madt; +} + +static uint32_t hpet_build(uint32_t *addr) +{ +#if !defined(BX_QEMU) || !defined(HPET_WORKS_IN_KVM) + return 0; +#else + struct acpi_20_hpet *hpet; + + *addr = (*addr + 7) & ~7; + + hpet = (void*)(*addr); + + *addr += sizeof(*hpet); - /* HPET */ -#ifdef HPET_WORKS_IN_KVM memset(hpet, 0, sizeof(*hpet)); /* Note timer_block_id value must be kept in sync with value advertised by * emulated hpet */ hpet->timer_block_id = cpu_to_le32(0x8086a201); hpet->addr.address = cpu_to_le32(ACPI_HPET_ADDRESS); - acpi_build_table_header((struct acpi_table_header *)hpet, - "HPET", sizeof(*hpet), 1); + acpi_build_table_header((struct acpi_table_header *)hpet, "HPET", + sizeof(*hpet), 1); + + return (uint32_t)hpet; #endif +} + +#define MAX_ACPI_TABLES 20 + +void acpi_bios_init(void) +{ + struct rsdp_descriptor *rsdp; + struct rsdt_descriptor_rev1 *rsdt; + uint32_t base_addr, addr, rsdp_size; + uint32_t tables[MAX_ACPI_TABLES], tbl_idx = 0; + uint32_t facs_addr, dsdt_addr; +#ifdef BX_QEMU + uint16_t external_tables, i; +#endif + + /* reserve memory space for tables */ +#ifdef BX_USE_EBDA_TABLES + ebda_cur_addr = align(ebda_cur_addr, 16); + rsdp = (void *)(ebda_cur_addr); + ebda_cur_addr += sizeof(*rsdp); +#else + bios_table_cur_addr = align(bios_table_cur_addr, 16); + rsdp = (void *)(bios_table_cur_addr); + bios_table_cur_addr += sizeof(*rsdp); +#endif + + addr = base_addr = ram_size - ACPI_DATA_SIZE; + + facs_addr = tables[tbl_idx++] = facs_build(&addr); + + dsdt_addr = tables[tbl_idx++] = dsdt_build(&addr); + + tables[tbl_idx++] = fadt_build(&addr, facs_addr, dsdt_addr); + + tables[tbl_idx] = srat_build(&addr); + if (tables[tbl_idx]) + tbl_idx++; + + tables[tbl_idx++] = madt_build(&addr); + + tables[tbl_idx] = hpet_build(&addr); + if (tables[tbl_idx]) + tbl_idx++; + +#ifdef BX_QEMU + external_tables = acpi_additional_tables(); - acpi_additional_tables(); /* resets cfg to required entry */ for(i = 0; i < external_tables; i++) { uint16_t len; if(acpi_load_table(i, addr, &len) < 0) BX_PANIC("Failed to load ACPI table from QEMU\n"); - rsdt->table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(addr); + tables[tbl_idx++] = addr; addr += len; - if(addr >= ram_size) + if(addr >= ram_size || tbl_idx == MAX_ACPI_TABLES) BX_PANIC("ACPI table overflow\n"); } #endif - /* RSDT */ - rsdt->table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(fadt_addr); - rsdt->table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(madt_addr); - /* kvm has no ssdt (processors are in dsdt) */ -// rsdt->table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(ssdt_addr); -#ifdef BX_QEMU - /* No HPET (yet) */ -// rsdt->table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(hpet_addr); - if (nb_numa_nodes > 0) - rsdt->table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(srat_addr); -#endif - rsdt_size -= MAX_RSDT_ENTRIES * 4; - rsdt_size += nb_rsdt_entries * 4; + rsdt = (void *)(addr); + rsdp_size = sizeof(*rsdt) + sizeof(uint32_t) * tbl_idx; + memcpy(rsdt->table_offset_entry, tables, sizeof(uint32_t) * tbl_idx); acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT", - rsdt_size, 1); + rsdp_size, 1); + + rsdp_build(rsdp, addr); - acpi_tables_size = addr - base_addr; + addr += rsdp_size; BX_INFO("ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx size=0x%x\n", (unsigned long)rsdp, - (unsigned long)rsdt, acpi_tables_size); - + (unsigned long)rsdt, addr - base_addr); } /* SMBIOS entry point -- must be written to a 16-bit aligned address -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html