On Fri, Dec 22, 2017 at 8:26 AM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote: > On 21/12/2017 21:46, Jim Mattson wrote: >> The vmcs_field_to_offset_table was a rather sparse table of short >> integers with a maximum index of 0x6c16, amounting to 55342 bytes. Now >> that we are considering support for multiple VMCS12 formats, it would >> be unfortunate to replicate that large, sparse table. Using a >> three-dimensional table indexed by VMCS field index, VMCS field type, >> and VMCS field width, it's relatively easy to reduce that table to 768 >> bytes. > > Would it be faster if, instead of the three-dimensional table, we > indexed the array by > > ((field >> 10) | ((field << 5) & 0x3ff)) That doesn't look quite right to me. Do you mean: ((field >> 10) & 0x1f) | ((field & 0x3ff) << 5) > which would waste one bit, but still reduce the table a lot. > > Alternatively, it could use rotate-right-16(field, 10): > > (u16)((field >> 10) | ((field << 6)) > > That would waste two bits, but it would faster still to build the index. Amusingly, gcc prefers a rol to a ror. On my E5-1650 v4 workstation, I get: 3D: 16 cycles @ 768 bytes Alternative 1: 8 cycles @ 2982 bytes Alternative 2: 4 cycles @ 5926 bytes Let me send out v2 using the rol. > Paolo > >> Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> >> --- >> arch/x86/kvm/vmx.c | 145 ++++++++++++++++++++++++++++++++--------------------- >> 1 file changed, 88 insertions(+), 57 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 1847000fbb0c..bd601b0984d1 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -714,11 +714,15 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) >> return &(to_vmx(vcpu)->pi_desc); >> } >> >> -#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) >> -#define FIELD(number, name) [number] = VMCS12_OFFSET(name) >> -#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ >> - [number##_HIGH] = VMCS12_OFFSET(name)+4 >> +#define VMCS_FIELD_HIGH(field) ((field) & 1) >> +#define VMCS_FIELD_INDEX(field) (((field) >> 1) & 0x1ff) >> +#define VMCS_FIELD_TYPE(field) (((field) >> 10) & 3) >> +#define VMCS_FIELD_WIDTH(field) (((field) >> 13) & 3) >> >> +#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) >> +#define FIELD(n, name) \ >> + [VMCS_FIELD_INDEX(n)][VMCS_FIELD_TYPE(n)][VMCS_FIELD_WIDTH(n)] = \ >> + VMCS12_OFFSET(name) >> >> static unsigned long shadow_read_only_fields[] = { >> /* >> @@ -779,7 +783,7 @@ static unsigned long shadow_read_write_fields[] = { >> static int max_shadow_read_write_fields = >> ARRAY_SIZE(shadow_read_write_fields); >> >> -static const unsigned short vmcs_field_to_offset_table[] = { >> +static const unsigned short vmcs_field_to_offset_table[][4][4] = { >> FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), >> FIELD(POSTED_INTR_NV, posted_intr_nv), >> FIELD(GUEST_ES_SELECTOR, guest_es_selector), >> @@ -799,39 +803,39 @@ static const unsigned short vmcs_field_to_offset_table[] = { >> FIELD(HOST_FS_SELECTOR, host_fs_selector), >> FIELD(HOST_GS_SELECTOR, host_gs_selector), >> FIELD(HOST_TR_SELECTOR, host_tr_selector), >> - FIELD64(IO_BITMAP_A, io_bitmap_a), >> - FIELD64(IO_BITMAP_B, io_bitmap_b), >> - FIELD64(MSR_BITMAP, msr_bitmap), >> - FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr), >> - FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr), >> - FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr), >> - FIELD64(TSC_OFFSET, tsc_offset), >> - FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), >> - FIELD64(APIC_ACCESS_ADDR, apic_access_addr), >> - FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), >> - FIELD64(VM_FUNCTION_CONTROL, vm_function_control), >> - FIELD64(EPT_POINTER, ept_pointer), >> - FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), >> - FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), >> - FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), >> - FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), >> - FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), >> - FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), >> - FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), >> - FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), >> - FIELD64(PML_ADDRESS, pml_address), >> - FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), >> - FIELD64(GUEST_IA32_PAT, guest_ia32_pat), >> - FIELD64(GUEST_IA32_EFER, guest_ia32_efer), >> - FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl), >> - FIELD64(GUEST_PDPTR0, guest_pdptr0), >> - FIELD64(GUEST_PDPTR1, guest_pdptr1), >> - FIELD64(GUEST_PDPTR2, guest_pdptr2), >> - FIELD64(GUEST_PDPTR3, guest_pdptr3), >> - FIELD64(GUEST_BNDCFGS, guest_bndcfgs), >> - FIELD64(HOST_IA32_PAT, host_ia32_pat), >> - FIELD64(HOST_IA32_EFER, host_ia32_efer), >> - FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), >> + FIELD(IO_BITMAP_A, io_bitmap_a), >> + FIELD(IO_BITMAP_B, io_bitmap_b), >> + FIELD(MSR_BITMAP, msr_bitmap), >> + FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr), >> + FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr), >> + FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr), >> + FIELD(TSC_OFFSET, tsc_offset), >> + FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), >> + FIELD(APIC_ACCESS_ADDR, apic_access_addr), >> + FIELD(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), >> + FIELD(VM_FUNCTION_CONTROL, vm_function_control), >> + FIELD(EPT_POINTER, ept_pointer), >> + FIELD(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), >> + FIELD(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), >> + FIELD(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), >> + FIELD(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), >> + FIELD(EPTP_LIST_ADDRESS, eptp_list_address), >> + FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap), >> + FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address), >> + FIELD(VMCS_LINK_POINTER, vmcs_link_pointer), >> + FIELD(PML_ADDRESS, pml_address), >> + FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), >> + FIELD(GUEST_IA32_PAT, guest_ia32_pat), >> + FIELD(GUEST_IA32_EFER, guest_ia32_efer), >> + FIELD(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl), >> + FIELD(GUEST_PDPTR0, guest_pdptr0), >> + FIELD(GUEST_PDPTR1, guest_pdptr1), >> + FIELD(GUEST_PDPTR2, guest_pdptr2), >> + FIELD(GUEST_PDPTR3, guest_pdptr3), >> + FIELD(GUEST_BNDCFGS, guest_bndcfgs), >> + FIELD(HOST_IA32_PAT, host_ia32_pat), >> + FIELD(HOST_IA32_EFER, host_ia32_efer), >> + FIELD(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), >> FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control), >> FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control), >> FIELD(EXCEPTION_BITMAP, exception_bitmap), >> @@ -923,15 +927,56 @@ static const unsigned short vmcs_field_to_offset_table[] = { >> FIELD(HOST_RIP, host_rip), >> }; >> >> +enum vmcs_field_width { >> + VMCS_FIELD_WIDTH_U16 = 0, >> + VMCS_FIELD_WIDTH_U64 = 1, >> + VMCS_FIELD_WIDTH_U32 = 2, >> + VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3 >> +}; >> + >> +static inline int vmcs_field_width(unsigned long field) >> +{ >> + if (VMCS_FIELD_HIGH(field)) /* the *_HIGH fields are all 32 bit */ >> + return VMCS_FIELD_WIDTH_U32; >> + return VMCS_FIELD_WIDTH(field); >> +} >> + >> +enum vmcs_field_type { >> + VMCS_FIELD_TYPE_CONTROL = 0, >> + VMCS_FIELD_TYPE_INFO = 1, >> + VMCS_FIELD_TYPE_GUEST = 2, >> + VMCS_FIELD_TYPE_HOST = 3 >> +}; >> + >> +static inline int vmcs_field_type(unsigned long field) >> +{ >> + return VMCS_FIELD_TYPE(field); >> +} >> + >> static inline short vmcs_field_to_offset(unsigned long field) >> { >> - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); >> + unsigned index = VMCS_FIELD_INDEX(field); >> + unsigned type = VMCS_FIELD_TYPE(field); >> + unsigned width = VMCS_FIELD_WIDTH(field); >> + short offset; >> >> - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || >> - vmcs_field_to_offset_table[field] == 0) >> + BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > >> + VMCS12_MAX_FIELD_INDEX + 1); >> + if (VMCS_FIELD_HIGH(field) && width != VMCS_FIELD_WIDTH_U64) >> return -ENOENT; >> >> - return vmcs_field_to_offset_table[field]; >> + if (index >= ARRAY_SIZE(vmcs_field_to_offset_table)) >> + return -ENOENT; >> + >> + offset = vmcs_field_to_offset_table[index][type][width]; >> + >> + if (offset == 0) >> + return -ENOENT; >> + >> + if (VMCS_FIELD_HIGH(field)) >> + offset += sizeof(u32); >> + >> + return offset; >> } >> >> static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) >> @@ -4050,23 +4095,9 @@ static void free_kvm_area(void) >> } >> } >> >> -enum vmcs_field_width { >> - VMCS_FIELD_WIDTH_U16 = 0, >> - VMCS_FIELD_WIDTH_U64 = 1, >> - VMCS_FIELD_WIDTH_U32 = 2, >> - VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3 >> -}; >> - >> -static inline int vmcs_field_width(unsigned long field) >> -{ >> - if (0x1 & field) /* the *_HIGH fields are all 32 bit */ >> - return VMCS_FIELD_WIDTH_U32; >> - return (field >> 13) & 0x3 ; >> -} >> - >> static inline int vmcs_field_readonly(unsigned long field) >> { >> - return (((field >> 10) & 0x3) == 1); >> + return VMCS_FIELD_TYPE(field) == VMCS_FIELD_TYPE_INFO; >> } >> >> static void init_vmcs_shadow_fields(void) >> >