RE: [RFC] kvm irq assignment

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Avi and all

This is the revised one,

All PCI devices send interrupt to both PIC and IOAPIC,  
a). When PIC is enabled and IOAPIC is disabled,  all redirect entries in
IOAPIC are masked.
B) When PIC is disabled and IPAPIC is enabled, link entry bit7 is set,
means this link entry is disable.
Guest OS need to guarantee PIC and IOAPIC are not enabled in the same
time. Otherwise cause many suspicious interrupt to guest.

Test by running guest linux in kvm/ia32 and kvm/ia64.


Thanks,
Anthony



diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index 21fc76a..e12fd66 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -201,14 +201,28 @@ DefinitionBlock (
         }
     }
 
+    Name (PICD, 0)
 
-    /* PCI Bus definition */
+    Method(_PIC, 1)
+    {
+        Store(Arg0, PICD)
+    }
+
+    /*PCI Bus definition */
     Scope(\_SB) {
         Device(PCI0) {
             Name (_HID, EisaId ("PNP0A03"))
             Name (_ADR, 0x00)
             Name (_UID, 1)
-            Name(_PRT, Package() {
+
+            Method(_PRT,0){
+                If(PICD){
+                    Return(PRTA)
+                }
+                Return(PRTP)
+            }
+
+            Name(PRTP, Package() {
                 /* PCI IRQ routing table, example from ACPI 2.0a
specification,
                    section 6.2.8.1 */
                 /* Note: we provide the same info as the PCI routing
@@ -407,6 +421,202 @@ DefinitionBlock (
                 Package() {0x001fffff, 3, LNKB, 0},
             })
 
+            Name(PRTA, Package() {
+                /* IOAPIC use fixed connection */
+
+                // PCI Slot 0
+                Package() {0x0000ffff, 0, 0, 16},
+                Package() {0x0000ffff, 1, 0, 16},
+                Package() {0x0000ffff, 2, 0, 16},
+                Package() {0x0000ffff, 3, 0, 16},
+
+                // PCI Slot 1
+                Package() {0x0001ffff, 0, 0, 17},
+                Package() {0x0001ffff, 1, 0, 17},
+                Package() {0x0001ffff, 2, 0, 17},
+                Package() {0x0001ffff, 3, 0, 17},
+
+                // PCI Slot 2
+                Package() {0x0002ffff, 0, 0, 18},
+                Package() {0x0002ffff, 1, 0, 18},
+                Package() {0x0002ffff, 2, 0, 18},
+                Package() {0x0002ffff, 3, 0, 18},
+
+                // PCI Slot 3
+                Package() {0x0003ffff, 0, 0, 19},
+                Package() {0x0003ffff, 1, 0, 19},
+                Package() {0x0003ffff, 2, 0, 19},
+                Package() {0x0003ffff, 3, 0, 19},
+
+                // PCI Slot 4
+                Package() {0x0004ffff, 0, 0, 20},
+                Package() {0x0004ffff, 1, 0, 20},
+                Package() {0x0004ffff, 2, 0, 20},
+                Package() {0x0004ffff, 3, 0, 20},
+
+                // PCI Slot 5
+                Package() {0x0005ffff, 0, 0, 21},
+                Package() {0x0005ffff, 1, 0, 21},
+                Package() {0x0005ffff, 2, 0, 21},
+                Package() {0x0005ffff, 3, 0, 21},
+
+                // PCI Slot 6
+                Package() {0x0006ffff, 0, 0, 22},
+                Package() {0x0006ffff, 1, 0, 22},
+                Package() {0x0006ffff, 2, 0, 22},
+                Package() {0x0006ffff, 3, 0, 22},
+
+                // PCI Slot 7
+                Package() {0x0007ffff, 0, 0, 23},
+                Package() {0x0007ffff, 1, 0, 23},
+                Package() {0x0007ffff, 2, 0, 23},
+                Package() {0x0007ffff, 3, 0, 23},
+
+                // PCI Slot 8
+                Package() {0x0008ffff, 0, 0, 16},
+                Package() {0x0008ffff, 1, 0, 16},
+                Package() {0x0008ffff, 2, 0, 16},
+                Package() {0x0008ffff, 3, 0, 16},
+
+                // PCI Slot 9
+                Package() {0x0009ffff, 0, 0, 17},
+                Package() {0x0009ffff, 1, 0, 17},
+                Package() {0x0009ffff, 2, 0, 17},
+                Package() {0x0009ffff, 3, 0, 17},
+
+                // PCI Slot 10
+                Package() {0x000affff, 0, 0, 18},
+                Package() {0x000affff, 1, 0, 18},
+                Package() {0x000affff, 2, 0, 18},
+                Package() {0x000affff, 3, 0, 18},
+
+                // PCI Slot 11
+                Package() {0x000bffff, 0, 0, 19},
+                Package() {0x000bffff, 1, 0, 19},
+                Package() {0x000bffff, 2, 0, 19},
+                Package() {0x000bffff, 3, 0, 19},
+
+                // PCI Slot 12
+                Package() {0x000cffff, 0, 0, 20},
+                Package() {0x000cffff, 1, 0, 20},
+                Package() {0x000cffff, 2, 0, 20},
+                Package() {0x000cffff, 3, 0, 20},
+
+                // PCI Slot 13
+                Package() {0x000dffff, 0, 0, 21},
+                Package() {0x000dffff, 1, 0, 21},
+                Package() {0x000dffff, 2, 0, 21},
+                Package() {0x000dffff, 3, 0, 21},
+
+                // PCI Slot 14
+                Package() {0x000effff, 0, 0, 22},
+                Package() {0x000effff, 1, 0, 22},
+                Package() {0x000effff, 2, 0, 22},
+                Package() {0x000effff, 3, 0, 22},
+
+                // PCI Slot 15
+                Package() {0x000fffff, 0, 0, 23},
+                Package() {0x000fffff, 1, 0, 23},
+                Package() {0x000fffff, 2, 0, 23},
+                Package() {0x000fffff, 3, 0, 23},
+
+                // PCI Slot 16
+                Package() {0x0010ffff, 0, 0, 16},
+                Package() {0x0010ffff, 1, 0, 16},
+                Package() {0x0010ffff, 2, 0, 16},
+                Package() {0x0010ffff, 3, 0, 16},
+
+                // PCI Slot 17
+                Package() {0x0011ffff, 0, 0, 17},
+                Package() {0x0011ffff, 1, 0, 17},
+                Package() {0x0011ffff, 2, 0, 17},
+                Package() {0x0011ffff, 3, 0, 17},
+
+                // PCI Slot 18
+                Package() {0x0012ffff, 0, 0, 18},
+                Package() {0x0012ffff, 1, 0, 18},
+                Package() {0x0012ffff, 2, 0, 18},
+                Package() {0x0012ffff, 3, 0, 18},
+
+                // PCI Slot 19
+                Package() {0x0013ffff, 0, 0, 19},
+                Package() {0x0013ffff, 1, 0, 19},
+                Package() {0x0013ffff, 2, 0, 19},
+                Package() {0x0013ffff, 3, 0, 19},
+
+                // PCI Slot 20
+                Package() {0x0014ffff, 0, 0, 20},
+                Package() {0x0014ffff, 1, 0, 20},
+                Package() {0x0014ffff, 2, 0, 20},
+                Package() {0x0014ffff, 3, 0, 20},
+
+                // PCI Slot 21
+                Package() {0x0015ffff, 0, 0, 21},
+                Package() {0x0015ffff, 1, 0, 21},
+                Package() {0x0015ffff, 2, 0, 21},
+                Package() {0x0015ffff, 3, 0, 21},
+
+                // PCI Slot 22
+                Package() {0x0016ffff, 0, 0, 22},
+                Package() {0x0016ffff, 1, 0, 22},
+                Package() {0x0016ffff, 2, 0, 22},
+                Package() {0x0016ffff, 3, 0, 22},
+
+                // PCI Slot 23
+                Package() {0x0017ffff, 0, 0, 23},
+                Package() {0x0017ffff, 1, 0, 23},
+                Package() {0x0017ffff, 2, 0, 23},
+                Package() {0x0017ffff, 3, 0, 23},
+
+                // PCI Slot 24
+                Package() {0x0018ffff, 0, 0, 16},
+                Package() {0x0018ffff, 1, 0, 16},
+                Package() {0x0018ffff, 2, 0, 16},
+                Package() {0x0018ffff, 3, 0, 16},
+
+                // PCI Slot 25
+                Package() {0x0019ffff, 0, 0, 17},
+                Package() {0x0019ffff, 1, 0, 17},
+                Package() {0x0019ffff, 2, 0, 17},
+                Package() {0x0019ffff, 3, 0, 17},
+
+                // PCI Slot 26
+                Package() {0x001affff, 0, 0, 18},
+                Package() {0x001affff, 1, 0, 18},
+                Package() {0x001affff, 2, 0, 18},
+                Package() {0x001affff, 3, 0, 18},
+
+                // PCI Slot 27
+                Package() {0x001bffff, 0, 0, 19},
+                Package() {0x001bffff, 1, 0, 19},
+                Package() {0x001bffff, 2, 0, 19},
+                Package() {0x001bffff, 3, 0, 19},
+
+                // PCI Slot 28
+                Package() {0x001cffff, 0, 0, 20},
+                Package() {0x001cffff, 1, 0, 20},
+                Package() {0x001cffff, 2, 0, 20},
+                Package() {0x001cffff, 3, 0, 20},
+
+                // PCI Slot 29
+                Package() {0x001dffff, 0, 0, 21},
+                Package() {0x001dffff, 1, 0, 21},
+                Package() {0x001dffff, 2, 0, 21},
+                Package() {0x001dffff, 3, 0, 21},
+
+                // PCI Slot 30
+                Package() {0x001effff, 0, 0, 22},
+                Package() {0x001effff, 1, 0, 22},
+                Package() {0x001effff, 2, 0, 22},
+                Package() {0x001effff, 3, 0, 22},
+
+                // PCI Slot 31
+                Package() {0x001fffff, 0, 0, 23},
+                Package() {0x001fffff, 1, 0, 23},
+                Package() {0x001fffff, 2, 0, 23},
+                Package() {0x001fffff, 3, 0, 23},
+            })
+
             OperationRegion(PCST, SystemIO, 0xae00, 0x08)
             Field (PCST, DWordAcc, NoLock, WriteAsZeros)
 	    {
diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
index a14cab2..c3014fa 100644
--- a/qemu/hw/apic.c
+++ b/qemu/hw/apic.c
@@ -1053,9 +1053,25 @@ static void ioapic_service(IOAPICState *s)
     }
 }
 
+int ioapic_map_irq(int devfn, int irq_num)
+{
+    int irq;
+    irq = ((devfn >> 3) & 7) + 16;
+    return irq;
+}
+#ifdef KVM_CAP_IRQCHIP
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+#endif
+
 void ioapic_set_irq(void *opaque, int vector, int level)
 {
     IOAPICState *s = opaque;
+#ifdef KVM_CAP_IRQCHIP
+    ioapic_irq_count[vector] += level;
+    if (kvm_enabled())
+	if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
+	    return;
+#endif
 
     if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
         uint32_t mask = 1 << vector;
diff --git a/qemu/hw/ipf.c b/qemu/hw/ipf.c
index b11e328..4761463 100644
--- a/qemu/hw/ipf.c
+++ b/qemu/hw/ipf.c
@@ -672,3 +672,23 @@ QEMUMachine ipf_machine = {
     ipf_init_pci,
     VGA_RAM_SIZE + VGA_RAM_SIZE,
 };
+
+#define IOAPIC_NUM_PINS 48
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+
+int ioapic_map_irq(int devfn, int irq_num)
+{
+    int irq, dev;
+    dev = devfn >> 3;
+    irq = ((((dev << 2) + (dev >> 3) + irq_num) & 31) + 16);
+    return irq;
+}
+
+void ioapic_set_irq(void *opaque, int vector, int level)
+{
+    ioapic_irq_count[vector] += level;
+    if (kvm_enabled())
+	if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0))
+	    return;
+}
+
diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h
index c284bf1..ef09a78 100644
--- a/qemu/hw/pc.h
+++ b/qemu/hw/pc.h
@@ -47,6 +47,7 @@ int apic_accept_pic_intr(CPUState *env);
 void apic_local_deliver(CPUState *env, int vector);
 int apic_get_interrupt(CPUState *env);
 IOAPICState *ioapic_init(void);
+int ioapic_map_irq(int devfn, int irq_num);
 void ioapic_set_irq(void *opaque, int vector, int level);
 
 /* i8254.c */
diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index a23a466..f96fbb5 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -27,6 +27,8 @@
 #include "net.h"
 #include "pc.h"
 
+#include "qemu-kvm.h"
+
 //#define DEBUG_PCI
 
 struct PCIBus {
@@ -534,12 +536,18 @@ static void pci_set_irq(void *opaque, int irq_num,
int level)
     PCIDevice *pci_dev = (PCIDevice *)opaque;
     PCIBus *bus;
     int change;
-
+#ifdef KVM_CAP_IRQCHIP
+    int irq;
+#endif 
     change = level - pci_dev->irq_state[irq_num];
     if (!change)
         return;
 
     pci_dev->irq_state[irq_num] = level;
+#ifdef KVM_CAP_IRQCHIP
+    irq = ioapic_map_irq(pci_dev->devfn, irq_num);
+    ioapic_set_irq(opaque, irq, change);
+#endif
     for (;;) {
         bus = pci_dev->bus;
         irq_num = bus->map_irq(pci_dev, irq_num);
diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
index 90cb3a6..96316ca 100644
--- a/qemu/hw/piix_pci.c
+++ b/qemu/hw/piix_pci.c
@@ -225,6 +226,9 @@ static void piix3_set_irq(qemu_irq *pic, int
irq_num, int level)
     /* now we change the pic irq level according to the piix irq
mappings */
     /* XXX: optimize */
     pic_irq = piix3_dev->config[0x60 + irq_num];
+    /* if bit7 set 1, this link is disabled */
+    if (pic_irq & 0x80)  
+        return;
     if (pic_irq < 16) {
         /* The pic level is the logical OR of all the PCI irqs mapped
            to it */





Xu, Anthony wrote:
> Avi Kivity wrote:
>> Xu, Anthony wrote:
>>> Hi all,
>>> Thanks for your comments.
>>> 
>>> I made this new patch based on your comments
>>> 
>>> 1. use bimodal _PRT, to take advantage of IOAPIC pin 16~23
>>> 	the mapping is simple,  slot  ->  (slot&7)+16 IOAPIC pin,
>>> someone may provide good mapping ?
>>> 
>> 
>> I think it's fine. If we find a better one later, or if we add
>> another ioapic, we can easily change it since the bios and qemu are
>> shipped as a unit. 
>> 
>>> 2. use ISA-bridge configure space 0x64 byte as a communication
>>> 	mechansim. When guest BIOS invokes _PIC, the value is passed to
>>>              qemu through byte 0x64. qemu know whether it is PIC
>>> mode and APIC mode by checking byte 0x64. 
>>> 3. pci_slot_get_pirq and piix3_set_irq adopt different operation
>>> based on PIC mode/APIC mode 
>>> 
>> 
>> I'm not sure how real hardware works, but I _think_ that it routes
>> irqs unconditionally to both the legacy path and directly to the
>> ioapic. So for example if slot 5 asserts an interrupt, we map it
>> through the pci link mapping and generate an active high interrupt to
>> one of {5, 10, 11} (both pic and ioapic), and simultaneously an
>> active low interrupt to ioapic pin 21.
> I think what you described is correct.
> 
> 
>> 
>> The _PIC method should disable the link interrupts if ioapic mode is
>> disabled.
> Typo!  If ioapic mode is enabled.
> 
> From x86 BIOS, OS disable link interrupt through link device _DIS
> mothod. 
> 
> 
>> 
>> This removes the need for communication between the bios and qemu.
>> Agree 
> 
>> 
>>> 
>>> +            /* APIC and PIC flag */
>>> +            OperationRegion (P40D, PCI_Config, 0x64, 0x01) +
>>> 
>> 
>> This is actually SERIRQC, serial irq control.
>> 
>>> +
>>> +#ifdef KVM_CAP_IRQCHIP
>>> 
>> 
>> This should be unconditional.
>> 
>>> +static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +{ +
>>> int slot_addend; +    if( piix3_dev->config[0x64])  // APIC mode
>>> +        return ((pci_dev->devfn >> 3) & 7)+16;
>>> +    else {		// PIC mode
>>> +        slot_addend = (pci_dev->devfn >> 3) - 1;
>>> +        return (irq_num + slot_addend) & 3;
>>> +    }
>>> +}
>>> 
>> 
>> What I'm suggesting is to "fork" the interrupt into two lines, one
>> legacy path and the ioapic path.
> 
> I'll try this way.
> 
> Anthony
--
To unsubscribe from this list: send the line "unsubscribe kvm-ia64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux KVM Devel]     [Linux Virtualization]     [Big List of Linux Books]     [Linux SCSI]     [Yosemite Forum]

  Powered by Linux