These patches resolve the irq0->inti2 override issue, and get the hpet working on kvm with and without -no-kvm-irqchip (i.e., when hpet takes over, it disables userspace or in-kernel pit as appropriate). The irq0->inti2 override will always be used unless the kernel cannot do irq routing (i.e., compatibility with old kernels). So if the kernel is capable, userspace sets up irq0->inti2 via the irq routing interface, and adds the irq0->inti2 override to the MADT interrupt source override table, and the mp table (for the no-acpi case). A couple of months ago, Marcelo was seeing RHEL5 guests complain of invalid checksum with these patches, but later he couldn't reproduce it, and I'm not seeing it now. While all guests still need to be fully tested, everything appears to be in order. I've tested on win2k864, win2k832, RHEL5.3 32 bit, and ubuntu 8.10 64 bit. Signed-off-by: Beth Kon <eak@xxxxxxxxxx> diff --git a/bios/rombios32.c b/bios/rombios32.c index 4dea066..5cf1f54 100755 --- a/bios/rombios32.c +++ b/bios/rombios32.c @@ -443,6 +443,7 @@ uint32_t cpuid_ext_features; unsigned long ram_size; uint64_t ram_end; uint8_t bios_uuid[16]; +uint8_t irq0_override; #ifdef BX_USE_EBDA_TABLES unsigned long ebda_cur_addr; #endif @@ -475,6 +476,7 @@ void wrmsr_smp(uint32_t index, uint64_t val) #define QEMU_CFG_SIGNATURE 0x00 #define QEMU_CFG_ID 0x01 #define QEMU_CFG_UUID 0x02 +#define QEMU_CFG_IRQ0_OVERRIDE 0x0d int qemu_cfg_port; @@ -516,6 +518,18 @@ void uuid_probe(void) memset(bios_uuid, 0, 16); } +void irq0_override_probe(void) +{ +#ifdef BX_QEMU + if(qemu_cfg_port) { + qemu_cfg_select(QEMU_CFG_IRQ0_OVERRIDE); + qemu_cfg_read(&irq0_override, 1); + return; + } +#endif + memset(&irq0_override, 0, 1); +} + void cpu_probe(void) { uint32_t eax, ebx, ecx, edx; @@ -1152,6 +1166,8 @@ static void mptable_init(void) /* irqs */ for(i = 0; i < 16; i++) { + if (irq0_override && i == 2) + continue; putb(&q, 3); /* entry type = I/O interrupt */ putb(&q, 0); /* interrupt type = vectored interrupt */ putb(&q, 0); /* flags: po=0, el=0 */ @@ -1159,7 +1175,10 @@ static void mptable_init(void) putb(&q, 0); /* source bus ID = ISA */ putb(&q, i); /* source bus IRQ */ putb(&q, ioapic_id); /* dest I/O APIC ID */ - putb(&q, i); /* dest I/O APIC interrupt in */ + if (irq0_override && i == 0) + putb(&q, 2); /* dest I/O APIC interrupt in */ + else + putb(&q, i); /* dest I/O APIC interrupt in */ } /* patch length */ len = q - mp_config_table; @@ -1508,6 +1527,11 @@ void acpi_bios_init(void) sizeof(struct madt_processor_apic) * MAX_CPUS + sizeof(struct madt_io_apic); madt = (void *)(addr); + for (i = 0; i < 16; i++) + if (PCI_ISA_IRQ_MASK & (1U << i)) + madt_size += sizeof(struct madt_intsrcovr); + if (irq0_override) + madt_size += sizeof(struct madt_intsrcovr); addr += madt_size; acpi_tables_size = addr - base_addr; @@ -1597,8 +1621,15 @@ void acpi_bios_init(void) io_apic->interrupt = cpu_to_le32(0); intsrcovr = (struct madt_intsrcovr*)(io_apic + 1); - for ( i = 0; i < 16; i++ ) { - if ( PCI_ISA_IRQ_MASK & (1U << i) ) { + for (i = 0; i < 16; i++) { + if (irq0_override && i == 0) { + memset(intsrcovr, 0, sizeof(*intsrcovr)); + intsrcovr->type = APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = i; + intsrcovr->gsi = 2; + intsrcovr->flags = 0; //conforms to bus specifications + } else if (PCI_ISA_IRQ_MASK & (1U << i)) { memset(intsrcovr, 0, sizeof(*intsrcovr)); intsrcovr->type = APIC_XRUPT_OVERRIDE; intsrcovr->length = sizeof(*intsrcovr); @@ -1610,7 +1641,6 @@ void acpi_bios_init(void) continue; } intsrcovr++; - madt_size += sizeof(struct madt_intsrcovr); } acpi_build_table_header((struct acpi_table_header *)madt, "APIC", madt_size, 1); @@ -2230,6 +2260,8 @@ void rombios32_init(uint32_t *s3_resume_vector, uint8_t *shutdown_flag) if (bios_table_cur_addr != 0) { + irq0_override_probe(); + mptable_init(); uuid_probe(); diff --git a/qemu/hw/fw_cfg.c b/qemu/hw/fw_cfg.c index e324e8d..f06dc3c 100644 --- a/qemu/hw/fw_cfg.c +++ b/qemu/hw/fw_cfg.c @@ -279,6 +279,7 @@ void *fw_cfg_init(uint32_t ctl_port, uint32_t data_port, fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16); fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)nographic); fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); + fw_cfg_add_i16(s, FW_CFG_IRQ0_OVERRIDE, (uint16_t)irq0override); register_savevm("fw_cfg", -1, 1, fw_cfg_save, fw_cfg_load, s); qemu_register_reset(fw_cfg_reset, s); diff --git a/qemu/hw/fw_cfg.h b/qemu/hw/fw_cfg.h index 41a3dd0..ef6e8c6 100644 --- a/qemu/hw/fw_cfg.h +++ b/qemu/hw/fw_cfg.h @@ -14,6 +14,7 @@ #define FW_CFG_INITRD_ADDR 0x0a #define FW_CFG_INITRD_SIZE 0x0b #define FW_CFG_BOOT_DEVICE 0x0c +#define FW_CFG_IRQ0_OVERRIDE 0x0d #define FW_CFG_MAX_ENTRY 0x10 #define FW_CFG_WRITE_CHANNEL 0x4000 diff --git a/qemu/hw/ioapic.c b/qemu/hw/ioapic.c index 0b70cf6..4dafb52 100644 --- a/qemu/hw/ioapic.c +++ b/qemu/hw/ioapic.c @@ -23,6 +23,7 @@ #include "hw.h" #include "pc.h" +#include "sysemu.h" #include "qemu-timer.h" #include "host-utils.h" @@ -95,14 +96,12 @@ void ioapic_set_irq(void *opaque, int vector, int level) { IOAPICState *s = opaque; -#if 0 /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps * to GSI 2. GSI maps to ioapic 1-1. This is not * the cleanest way of doing it but it should work. */ - if (vector == 0) + if (vector == 0 && irq0override) vector = 2; -#endif if (vector >= 0 && vector < IOAPIC_NUM_PINS) { uint32_t mask = 1 << vector; diff --git a/qemu/pc-bios/bios.bin b/qemu/pc-bios/bios.bin index 04c1f86..c0b07b5 100644 Binary files a/qemu/pc-bios/bios.bin and b/qemu/pc-bios/bios.bin differ diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c index 4164368..2755b5e 100644 --- a/qemu/qemu-kvm.c +++ b/qemu/qemu-kvm.c @@ -812,7 +812,10 @@ int kvm_qemu_create_context(void) return r; } for (i = 0; i < 24; ++i) { - r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i); + if (i == 0) + r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, 2); + else if (i != 2) + r = kvm_add_irq_route(kvm_context, i, KVM_IRQCHIP_IOAPIC, i); if (r < 0) return r; } diff --git a/qemu/sysemu.h b/qemu/sysemu.h index d765465..fd127bd 100644 --- a/qemu/sysemu.h +++ b/qemu/sysemu.h @@ -94,6 +94,7 @@ extern int graphic_width; extern int graphic_height; extern int graphic_depth; extern int nographic; +extern int irq0override; extern const char *keyboard_layout; extern int win2k_install_hack; extern int rtc_td_hack; diff --git a/qemu/vl.c b/qemu/vl.c index b3da7ad..b982b53 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -205,6 +205,7 @@ static int vga_ram_size; enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB; static DisplayState *display_state; int nographic; +int irq0override; static int curses; static int sdl; const char* keyboard_layout = NULL; @@ -4516,6 +4517,7 @@ int main(int argc, char **argv, char **envp) #endif snapshot = 0; nographic = 0; + irq0override = 1; curses = 0; kernel_filename = NULL; kernel_cmdline = ""; @@ -5536,8 +5538,12 @@ int main(int argc, char **argv, char **envp) } } - if (kvm_enabled()) - kvm_init_ap(); + if (kvm_enabled()) { + kvm_init_ap(); + if (kvm_irqchip && !kvm_has_gsi_routing(kvm_context)) { + irq0override = 0; + } + } machine->init(ram_size, vga_ram_size, boot_devices, kernel_filename, kernel_cmdline, initrd_filename, cpu_model); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html