On Tue, Apr 24, 2012 at 09:50:02AM +0300, Gleb Natapov wrote: > On Mon, Apr 23, 2012 at 05:04:40PM +0300, Michael S. Tsirkin wrote: > > The idea is simple: there's a bit, per APIC, in guest memory, > > that tells the guest that it does not need EOI. > > Guest tests it using a single est and clear operation - this is > > necessary so that host can detect interrupt nesting - and if set, it can > > skip the EOI MSR. > > > > This patch is not final yet, pls don't apply > > until host side is also finalized. Including > > it here for completeness to show another user > > of the new eoi_write interface. > > > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> Just pointing out the above: this last patch in the series is for illustrative purposes at this point. > > --- > > arch/x86/include/asm/bitops.h | 6 +++- > > arch/x86/include/asm/kvm_para.h | 2 + > > arch/x86/kernel/kvm.c | 51 ++++++++++++++++++++++++++++++++++++-- > > 3 files changed, 54 insertions(+), 5 deletions(-) > > > > diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h > > index b97596e..c9c70ea 100644 > > --- a/arch/x86/include/asm/bitops.h > > +++ b/arch/x86/include/asm/bitops.h > > @@ -26,11 +26,13 @@ > > #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) > > /* Technically wrong, but this avoids compilation errors on some gcc > > versions. */ > > -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) > > +#define BITOP_ADDR_CONSTRAINT "=m" > > #else > > -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) > > +#define BITOP_ADDR_CONSTRAINT "+m" > > #endif > > > > +#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x)) > > + > > #define ADDR BITOP_ADDR(addr) > > > > /* > > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h > > index 734c376..195840d 100644 > > --- a/arch/x86/include/asm/kvm_para.h > > +++ b/arch/x86/include/asm/kvm_para.h > > @@ -22,6 +22,7 @@ > > #define KVM_FEATURE_CLOCKSOURCE2 3 > > #define KVM_FEATURE_ASYNC_PF 4 > > #define KVM_FEATURE_STEAL_TIME 5 > > +#define KVM_FEATURE_EOI 6 > > > > /* The last 8 bits are used to indicate how to interpret the flags field > > * in pvclock structure. If no bits are set, all flags are ignored. > > @@ -37,6 +38,7 @@ > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > struct kvm_steal_time { > > __u64 steal; > > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > > index b8ba6e4..ad66cdd 100644 > > --- a/arch/x86/kernel/kvm.c > > +++ b/arch/x86/kernel/kvm.c > > @@ -39,6 +39,8 @@ > > #include <asm/desc.h> > > #include <asm/tlbflush.h> > > #include <asm/idle.h> > > +#include <asm/apic.h> > > +#include <asm/apicdef.h> > > > > static int kvmapf = 1; > > > > @@ -290,6 +292,27 @@ static void kvm_register_steal_time(void) > > cpu, __pa(st)); > > } > > > > +static DEFINE_PER_CPU(u16, kvm_apic_eoi) __aligned(2) = 0; > > + > > +/* Our own copy of __test_and_clear_bit to make sure > > + * it is done with a single instruction */ > > +static inline int kvm_test_and_clear_bit(int nr, volatile u16* addr) > > +{ > > + int oldbit; > > + > > + asm volatile("btr %2,%1\n\t" > > + "sbb %0,%0" > > + : "=r" (oldbit), BITOP_ADDR_CONSTRAINT(*addr) : "Ir" (nr)); > > + return oldbit; > > +} > > + > > +static void kvm_guest_apic_eoi_write(u32 reg, u32 val) > > +{ > > + if (kvm_test_and_clear_bit(0, &__get_cpu_var(kvm_apic_eoi))) > > + return; > > + apic->write(APIC_EOI, APIC_EOI_ACK); > > +} > > + > > void __cpuinit kvm_guest_cpu_init(void) > > { > > if (!kvm_para_available()) > > @@ -307,11 +330,17 @@ void __cpuinit kvm_guest_cpu_init(void) > > smp_processor_id()); > > } > > > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) { > > + __get_cpu_var(kvm_apic_eoi) = 0; > > + wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(kvm_apic_eoi)) | > > + KVM_MSR_ENABLED); > > + } > > + > > if (has_steal_clock) > > kvm_register_steal_time(); > > } > > > > -static void kvm_pv_disable_apf(void *unused) > > +static void kvm_pv_disable_apf(void) > > { > > if (!__get_cpu_var(apf_reason).enabled) > > return; > > @@ -323,11 +352,18 @@ static void kvm_pv_disable_apf(void *unused) > > smp_processor_id()); > > } > > > > +static void kvm_pv_guest_cpu_reboot(void *unused) > > +{ > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) > > + wrmsrl(MSR_KVM_EOI_EN, 0); > > + kvm_pv_disable_apf(); > > +} > > + > > static int kvm_pv_reboot_notify(struct notifier_block *nb, > > unsigned long code, void *unused) > > { > > if (code == SYS_RESTART) > > - on_each_cpu(kvm_pv_disable_apf, NULL, 1); > > + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); > > return NOTIFY_DONE; > > } > > > > @@ -378,7 +414,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) > > static void kvm_guest_cpu_offline(void *dummy) > > { > > kvm_disable_steal_time(); > > - kvm_pv_disable_apf(NULL); > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) > > + wrmsrl(MSR_KVM_EOI_EN, 0); > > + kvm_pv_disable_apf(); > > apf_task_wake_all(); > > } > > > > @@ -431,6 +469,13 @@ void __init kvm_guest_init(void) > > pv_time_ops.steal_clock = kvm_steal_clock; > > } > > > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) { > > + /* Should happen once after apic is initialized */ > > + BUG_ON(!apic); > What if kernel was started with noapic? I didn't test this yet. I thought noapic sets the apic pointer to apic_noop, not to NULL. So this case will get slowed down slightly if it actually runs on kvm. It does not look like noapic guest is worth optimising for. > > + BUG_ON(apic->eoi_write == kvm_guest_apic_eoi_write); > > + apic->eoi_write = kvm_guest_apic_eoi_write; > > + } > > + > > #ifdef CONFIG_SMP > > smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; > > register_cpu_notifier(&kvm_cpu_notifier); > > -- > > 1.7.9.111.gf3fb0 > > -- > Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html