We know both register and value for eoi beforehand, so there's no need to check it and no need to do math to calculate the msr. Saves instructions/branches on each EOI when using x2apic. I'm not sure what kind of tests should one run to check whether this patch is good for performance. Some data below - in case it's insufficient, this patch can be dropped from the series for now: I looked at the objdump output to verify that the generated code looks right and actually is shorter. Some benchmark results below show a tiny but measureable improvement. The tests were run on an AMD box with 24 cpus. - A clean kernel build after reboot shows a tiny but measureable improvement in system time which means lower CPU overhead (though not measureable in total time - that is dominated by user time and fluctuates too much): linux# reboot -f ... linux# make clean linux# time make -j 64 LOCALVERSION= 2>&1 > /dev/null Before: real 2m52.244s user 35m53.833s sys 6m7.194s After: real 2m52.827s user 35m48.916s sys 6m2.305s - perf micro-benchmarks seem to consistently show a tiny improvement in total time as well but it's below the confidence level of 3 std deviations: ... 0.414666797 seconds time elapsed ( +- 1.29% ) Performance counter stats for 'perf bench sched messaging' (100 runs): 0.395370891 seconds time elapsed ( +- 1.04% ) 0.307019664 seconds time elapsed ( +- 0.10% ) 0.304738024 seconds time elapsed ( +- 0.08% ) Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- arch/x86/include/asm/apic.h | 5 +++++ arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/apic/x2apic_phys.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 74efb8d..5eb6d56 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -138,6 +138,11 @@ static inline void native_apic_msr_write(u32 reg, u32 v) wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); } +static inline void native_apic_msr_eoi_write(u32 reg, u32 v) +{ + wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); +} + static inline u32 native_apic_msr_read(u32 reg) { u64 msr; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a5baa78..ff35cff 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -260,7 +260,7 @@ static struct apic apic_x2apic_cluster = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .eoi_write = native_apic_msr_write, + .eoi_write = native_apic_msr_eoi_write, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, .wait_icr_idle = native_x2apic_wait_icr_idle, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 8340356..c17e982 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -172,7 +172,7 @@ static struct apic apic_x2apic_phys = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .eoi_write = native_apic_msr_write, + .eoi_write = native_apic_msr_eoi_write, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, .wait_icr_idle = native_x2apic_wait_icr_idle, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 5b0e3d0..c6d03f7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -404,7 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .eoi_write = native_apic_msr_write, + .eoi_write = native_apic_msr_eoi_write, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, .wait_icr_idle = native_x2apic_wait_icr_idle, -- MST -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html