Re: [PATCH 3/3] kvm: vmx: Reduce size of vmcs_field_to_offset_table

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Dec 22, 2017 at 8:26 AM, Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:
> On 21/12/2017 21:46, Jim Mattson wrote:
>> The vmcs_field_to_offset_table was a rather sparse table of short
>> integers with a maximum index of 0x6c16, amounting to 55342 bytes. Now
>> that we are considering support for multiple VMCS12 formats, it would
>> be unfortunate to replicate that large, sparse table. Using a
>> three-dimensional table indexed by VMCS field index, VMCS field type,
>> and VMCS field width, it's relatively easy to reduce that table to 768
>> bytes.
>
> Would it be faster if, instead of the three-dimensional table, we
> indexed the array by
>
>    ((field >> 10) | ((field << 5) & 0x3ff))

That doesn't look quite right to me. Do you mean:
        ((field >> 10) & 0x1f) | ((field & 0x3ff) << 5)

> which would waste one bit, but still reduce the table a lot.
>
> Alternatively, it could use rotate-right-16(field, 10):
>
>    (u16)((field >> 10) | ((field << 6))
>
> That would waste two bits, but it would faster still to build the index.

Amusingly, gcc prefers a rol to a ror.

On my E5-1650 v4 workstation, I get:

3D: 16 cycles @ 768 bytes
Alternative 1: 8 cycles @ 2982 bytes
Alternative 2: 4 cycles @ 5926 bytes

Let me send out v2 using the rol.

> Paolo
>
>> Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx>
>> ---
>>  arch/x86/kvm/vmx.c | 145 ++++++++++++++++++++++++++++++++---------------------
>>  1 file changed, 88 insertions(+), 57 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 1847000fbb0c..bd601b0984d1 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -714,11 +714,15 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
>>       return &(to_vmx(vcpu)->pi_desc);
>>  }
>>
>> -#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
>> -#define FIELD(number, name)  [number] = VMCS12_OFFSET(name)
>> -#define FIELD64(number, name)        [number] = VMCS12_OFFSET(name), \
>> -                             [number##_HIGH] = VMCS12_OFFSET(name)+4
>> +#define VMCS_FIELD_HIGH(field)  ((field) & 1)
>> +#define VMCS_FIELD_INDEX(field) (((field) >> 1) & 0x1ff)
>> +#define VMCS_FIELD_TYPE(field)  (((field) >> 10) & 3)
>> +#define VMCS_FIELD_WIDTH(field) (((field) >> 13) & 3)
>>
>> +#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
>> +#define FIELD(n, name) \
>> +     [VMCS_FIELD_INDEX(n)][VMCS_FIELD_TYPE(n)][VMCS_FIELD_WIDTH(n)] = \
>> +             VMCS12_OFFSET(name)
>>
>>  static unsigned long shadow_read_only_fields[] = {
>>       /*
>> @@ -779,7 +783,7 @@ static unsigned long shadow_read_write_fields[] = {
>>  static int max_shadow_read_write_fields =
>>       ARRAY_SIZE(shadow_read_write_fields);
>>
>> -static const unsigned short vmcs_field_to_offset_table[] = {
>> +static const unsigned short vmcs_field_to_offset_table[][4][4] = {
>>       FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
>>       FIELD(POSTED_INTR_NV, posted_intr_nv),
>>       FIELD(GUEST_ES_SELECTOR, guest_es_selector),
>> @@ -799,39 +803,39 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>>       FIELD(HOST_FS_SELECTOR, host_fs_selector),
>>       FIELD(HOST_GS_SELECTOR, host_gs_selector),
>>       FIELD(HOST_TR_SELECTOR, host_tr_selector),
>> -     FIELD64(IO_BITMAP_A, io_bitmap_a),
>> -     FIELD64(IO_BITMAP_B, io_bitmap_b),
>> -     FIELD64(MSR_BITMAP, msr_bitmap),
>> -     FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
>> -     FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
>> -     FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
>> -     FIELD64(TSC_OFFSET, tsc_offset),
>> -     FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
>> -     FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
>> -     FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
>> -     FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
>> -     FIELD64(EPT_POINTER, ept_pointer),
>> -     FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
>> -     FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
>> -     FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
>> -     FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
>> -     FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
>> -     FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
>> -     FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
>> -     FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
>> -     FIELD64(PML_ADDRESS, pml_address),
>> -     FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
>> -     FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
>> -     FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
>> -     FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
>> -     FIELD64(GUEST_PDPTR0, guest_pdptr0),
>> -     FIELD64(GUEST_PDPTR1, guest_pdptr1),
>> -     FIELD64(GUEST_PDPTR2, guest_pdptr2),
>> -     FIELD64(GUEST_PDPTR3, guest_pdptr3),
>> -     FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
>> -     FIELD64(HOST_IA32_PAT, host_ia32_pat),
>> -     FIELD64(HOST_IA32_EFER, host_ia32_efer),
>> -     FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
>> +     FIELD(IO_BITMAP_A, io_bitmap_a),
>> +     FIELD(IO_BITMAP_B, io_bitmap_b),
>> +     FIELD(MSR_BITMAP, msr_bitmap),
>> +     FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
>> +     FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
>> +     FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
>> +     FIELD(TSC_OFFSET, tsc_offset),
>> +     FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
>> +     FIELD(APIC_ACCESS_ADDR, apic_access_addr),
>> +     FIELD(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
>> +     FIELD(VM_FUNCTION_CONTROL, vm_function_control),
>> +     FIELD(EPT_POINTER, ept_pointer),
>> +     FIELD(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
>> +     FIELD(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
>> +     FIELD(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
>> +     FIELD(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
>> +     FIELD(EPTP_LIST_ADDRESS, eptp_list_address),
>> +     FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap),
>> +     FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
>> +     FIELD(VMCS_LINK_POINTER, vmcs_link_pointer),
>> +     FIELD(PML_ADDRESS, pml_address),
>> +     FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
>> +     FIELD(GUEST_IA32_PAT, guest_ia32_pat),
>> +     FIELD(GUEST_IA32_EFER, guest_ia32_efer),
>> +     FIELD(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
>> +     FIELD(GUEST_PDPTR0, guest_pdptr0),
>> +     FIELD(GUEST_PDPTR1, guest_pdptr1),
>> +     FIELD(GUEST_PDPTR2, guest_pdptr2),
>> +     FIELD(GUEST_PDPTR3, guest_pdptr3),
>> +     FIELD(GUEST_BNDCFGS, guest_bndcfgs),
>> +     FIELD(HOST_IA32_PAT, host_ia32_pat),
>> +     FIELD(HOST_IA32_EFER, host_ia32_efer),
>> +     FIELD(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
>>       FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
>>       FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
>>       FIELD(EXCEPTION_BITMAP, exception_bitmap),
>> @@ -923,15 +927,56 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>>       FIELD(HOST_RIP, host_rip),
>>  };
>>
>> +enum vmcs_field_width {
>> +     VMCS_FIELD_WIDTH_U16 = 0,
>> +     VMCS_FIELD_WIDTH_U64 = 1,
>> +     VMCS_FIELD_WIDTH_U32 = 2,
>> +     VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
>> +};
>> +
>> +static inline int vmcs_field_width(unsigned long field)
>> +{
>> +     if (VMCS_FIELD_HIGH(field))     /* the *_HIGH fields are all 32 bit */
>> +             return VMCS_FIELD_WIDTH_U32;
>> +     return VMCS_FIELD_WIDTH(field);
>> +}
>> +
>> +enum vmcs_field_type {
>> +     VMCS_FIELD_TYPE_CONTROL = 0,
>> +     VMCS_FIELD_TYPE_INFO = 1,
>> +     VMCS_FIELD_TYPE_GUEST = 2,
>> +     VMCS_FIELD_TYPE_HOST = 3
>> +};
>> +
>> +static inline int vmcs_field_type(unsigned long field)
>> +{
>> +     return VMCS_FIELD_TYPE(field);
>> +}
>> +
>>  static inline short vmcs_field_to_offset(unsigned long field)
>>  {
>> -     BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
>> +     unsigned index = VMCS_FIELD_INDEX(field);
>> +     unsigned type = VMCS_FIELD_TYPE(field);
>> +     unsigned width = VMCS_FIELD_WIDTH(field);
>> +     short offset;
>>
>> -     if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
>> -         vmcs_field_to_offset_table[field] == 0)
>> +     BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) >
>> +                  VMCS12_MAX_FIELD_INDEX + 1);
>> +     if (VMCS_FIELD_HIGH(field) && width != VMCS_FIELD_WIDTH_U64)
>>               return -ENOENT;
>>
>> -     return vmcs_field_to_offset_table[field];
>> +     if (index >= ARRAY_SIZE(vmcs_field_to_offset_table))
>> +             return -ENOENT;
>> +
>> +     offset = vmcs_field_to_offset_table[index][type][width];
>> +
>> +     if (offset == 0)
>> +             return -ENOENT;
>> +
>> +     if (VMCS_FIELD_HIGH(field))
>> +             offset += sizeof(u32);
>> +
>> +     return offset;
>>  }
>>
>>  static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
>> @@ -4050,23 +4095,9 @@ static void free_kvm_area(void)
>>       }
>>  }
>>
>> -enum vmcs_field_width {
>> -     VMCS_FIELD_WIDTH_U16 = 0,
>> -     VMCS_FIELD_WIDTH_U64 = 1,
>> -     VMCS_FIELD_WIDTH_U32 = 2,
>> -     VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
>> -};
>> -
>> -static inline int vmcs_field_width(unsigned long field)
>> -{
>> -     if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
>> -             return VMCS_FIELD_WIDTH_U32;
>> -     return (field >> 13) & 0x3 ;
>> -}
>> -
>>  static inline int vmcs_field_readonly(unsigned long field)
>>  {
>> -     return (((field >> 10) & 0x3) == 1);
>> +     return VMCS_FIELD_TYPE(field) == VMCS_FIELD_TYPE_INFO;
>>  }
>>
>>  static void init_vmcs_shadow_fields(void)
>>
>



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux