On Mon, Jan 11, 2021, Jason Baron wrote: > Use static calls to improve kvm_x86_ops performance. Introduce the > definitions that will be used by a subsequent patch to actualize the > savings. > > Note that all kvm_x86_ops are covered here except for 'pmu_ops' and > 'nested ops'. I think they can be covered by static calls in a simlilar > manner, but were omitted from this series to reduce scope and because > I don't think they have as large of a performance impact. > > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: Borislav Petkov <bp@xxxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> > Signed-off-by: Jason Baron <jbaron@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 65 +++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 5 ++++ > 2 files changed, 70 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 3ab7b46..e947522 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1087,6 +1087,65 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) > return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; > } > > +/* > + * static calls cover all kvm_x86_ops except for functions under pmu_ops and > + * nested_ops. > + */ > +#define FOREACH_KVM_X86_OPS(F) \ > + F(hardware_enable); F(hardware_disable); F(hardware_unsetup); \ > + F(cpu_has_accelerated_tpr); F(has_emulated_msr); \ > + F(vcpu_after_set_cpuid); F(vm_init); F(vm_destroy); F(vcpu_create); \ > + F(vcpu_free); F(vcpu_reset); F(prepare_guest_switch); F(vcpu_load); \ > + F(vcpu_put); F(update_exception_bitmap); F(get_msr); F(set_msr); \ > + F(get_segment_base); F(get_segment); F(get_cpl); F(set_segment); \ > + F(get_cs_db_l_bits); F(set_cr0); F(is_valid_cr4); F(set_cr4); \ > + F(set_efer); F(get_idt); F(set_idt); F(get_gdt); F(set_gdt); \ > + F(sync_dirty_debug_regs); F(set_dr7); F(cache_reg); F(get_rflags); \ > + F(set_rflags); F(tlb_flush_all); F(tlb_flush_current); \ > + F(tlb_remote_flush); F(tlb_remote_flush_with_range); F(tlb_flush_gva); \ > + F(tlb_flush_guest); F(run); F(handle_exit); \ > + F(skip_emulated_instruction); F(update_emulated_instruction); \ > + F(set_interrupt_shadow); F(get_interrupt_shadow); F(patch_hypercall); \ > + F(set_irq); F(set_nmi); F(queue_exception); F(cancel_injection); \ > + F(interrupt_allowed); F(nmi_allowed); F(get_nmi_mask); F(set_nmi_mask);\ > + F(enable_nmi_window); F(enable_irq_window); F(update_cr8_intercept); \ > + F(check_apicv_inhibit_reasons); F(pre_update_apicv_exec_ctrl); \ > + F(refresh_apicv_exec_ctrl); F(hwapic_irr_update); F(hwapic_isr_update);\ > + F(guest_apic_has_interrupt); F(load_eoi_exitmap); \ > + F(set_virtual_apic_mode); F(set_apic_access_page_addr); \ > + F(deliver_posted_interrupt); F(sync_pir_to_irr); F(set_tss_addr); \ > + F(set_identity_map_addr); F(get_mt_mask); F(load_mmu_pgd); \ > + F(has_wbinvd_exit); F(write_l1_tsc_offset); F(get_exit_info); \ > + F(check_intercept); F(handle_exit_irqoff); F(request_immediate_exit); \ > + F(sched_in); F(slot_enable_log_dirty); F(slot_disable_log_dirty); \ > + F(flush_log_dirty); F(enable_log_dirty_pt_masked); \ > + F(cpu_dirty_log_size); F(pre_block); F(post_block); F(vcpu_blocking); \ > + F(vcpu_unblocking); F(update_pi_irte); F(apicv_post_state_restore); \ > + F(dy_apicv_has_pending_interrupt); F(set_hv_timer); F(cancel_hv_timer);\ > + F(setup_mce); F(smi_allowed); F(pre_enter_smm); F(pre_leave_smm); \ > + F(enable_smi_window); F(mem_enc_op); F(mem_enc_reg_region); \ > + F(mem_enc_unreg_region); F(get_msr_feature); \ > + F(can_emulate_instruction); F(apic_init_signal_blocked); \ > + F(enable_direct_tlbflush); F(migrate_timers); F(msr_filter_changed); \ > + F(complete_emulated_msr) What about adding a dedicated .h file for this beast? Then it won't be so painful to do one function per line. As is, updates to kvm_x86_ops will be messy. And add yet another macro layer (or maybe just tweak this one?) so that the caller controls the line ending? I suppose you could also just use a comma, but that's a bit dirty... That would also allow using this to declare vmx_x86_ops and svm_x86_ops, which would need a comma insteat of a semi-colon. There have a been a few attempts to add a bit of automation to {vmx,svm}_x86_ops, this seems like it would be good motivation to go in a different direction and declare/define all ops, e.g. the VMX/SVM code could simply do something like: #define DECLARE_VMX_X86_OP(func) \ .func = vmx_##func static struct kvm_x86_ops vmx_x86_ops __initdata = { .vm_size = sizeof(struct kvm_vmx), .vm_init = vmx_vm_init, .pmu_ops = &intel_pmu_ops, .nested_ops = &vmx_nested_ops, FOREACH_KVM_X86_OPS(DECLARE_VMX_X86_OP) };