2015-01-28 10:54+0800, Kai Huang: > This patch adds PML support in VMX. A new module parameter 'enable_pml' is added (+module_param_named(pml, enable_pml, bool, S_IRUGO);) > to allow user to enable/disable it manually. > > Signed-off-by: Kai Huang <kai.huang@xxxxxxxxxxxxxxx> > --- > diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h > index 587149b..a139977 100644 > --- a/arch/x86/kvm/trace.h > +++ b/arch/x86/kvm/trace.h > @@ -846,6 +846,24 @@ TRACE_EVENT(kvm_track_tsc, > +TRACE_EVENT(kvm_pml_full, > + TP_PROTO(unsigned int vcpu_id), > + TP_ARGS(vcpu_id), > + > + TP_STRUCT__entry( > + __field( unsigned int, vcpu_id ) > + ), > + > + TP_fast_assign( > + __entry->vcpu_id = vcpu_id; > + ), > + > + TP_printk("vcpu %d: PML full", __entry->vcpu_id) > +); > + > #endif /* CONFIG_X86_64 */ You have it protected by CONFIG_X86_64, but use it unconditionally. (Also, we can get all this information from kvm_exit tracepoint; I'd just drop it.) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index c987374..de5ce82 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -516,6 +519,10 @@ struct vcpu_vmx { > + /* Support for PML */ > +#define PML_ENTITY_NUM 512 (Readers of this struct likely aren't interested in this number, so it would be nicer to define it outside. I thought it is here to hint the amount of allocated pages, but PML_ENTITY_NUM / 512 isn't obvious.) > + struct page *pml_pg; > @@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > a current VMCS12 > */ > exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; > + /* PML is enabled/disabled in creating/destorying vcpu */ > + exec_control &= ~SECONDARY_EXEC_ENABLE_PML; What is the harm of enabling it here? (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES seems similar and does it.) > + > return exec_control; > } > @@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector) > +static int handle_pml_full(struct kvm_vcpu *vcpu) > +{ > + unsigned long exit_qualification; > + > + trace_kvm_pml_full(vcpu->vcpu_id); > + > + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + > + /* > + * PML buffer FULL happened while executing iret from NMI, > + * "blocked by NMI" bit has to be set before next VM entry. > + */ > + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && > + cpu_has_virtual_nmis() && > + (exit_qualification & INTR_INFO_UNBLOCK_NMI)) > + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, > + GUEST_INTR_STATE_NMI); Relevant part of the specification is pasted from 27.2.2 (Information for VM Exits Due to Vectored Events), which makes me think that this bit is mirrored to IDT-vectoring information field ... Isn't vmx_recover_nmi_blocking() enough? (I see the same code in handle_ept_violation(), but wasn't that needed just because of a hardware error?) > + > + /* > + * PML buffer already flushed at beginning of VMEXIT. Nothing to do > + * here.., and there's no userspace involvement needed for PML. > + */ > + return 1; > +} > @@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > +static int vmx_enable_pml(struct vcpu_vmx *vmx) > +{ > + struct page *pml_pg; > + u32 exec_control; > + > + pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!pml_pg) > + return -ENOMEM; > + > + vmx->pml_pg = pml_pg; (It's safe to use vmx->pml_pg directly.) > + > + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); > + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); > + > + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); > + exec_control |= SECONDARY_EXEC_ENABLE_PML; > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); > + > + return 0; > +} > +static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx) > +{ > + struct kvm *kvm = vmx->vcpu.kvm; > + u64 *pml_buf; > + u16 pml_idx; > + > + pml_idx = vmcs_read16(GUEST_PML_INDEX); > + > + /* Do nothing if PML buffer is empty */ > + if (pml_idx == (PML_ENTITY_NUM - 1)) > + return; > + > + /* PML index always points to next available PML buffer entity */ > + if (pml_idx >= PML_ENTITY_NUM) > + pml_idx = 0; > + else > + pml_idx++; If the pml_idx is >= PML_ENTITY_NUM and < 0xffff, the log is empty, so it would be better to use just 'pml_idx++' and unsigned modulo. (Could also 'assert(pml_idx < PML_ENTITY_NUM)' then.) > + > + pml_buf = page_address(vmx->pml_pg); > + for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { > + u64 gpa; > + > + gpa = pml_buf[pml_idx]; > + WARN_ON(gpa & (PAGE_SIZE - 1)); > + mark_page_dirty(kvm, gpa >> PAGE_SHIFT); > + } > + > + /* reset PML index */ > + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); > +} > @@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) > +static void vmx_slot_enable_log_dirty(struct kvm *kvm, > + struct kvm_memory_slot *slot) > +{ > + kvm_mmu_slot_leaf_clear_dirty(kvm, slot); (New slot contains dirty pages?) > + kvm_mmu_slot_largepage_remove_write_access(kvm, slot); > +} Thanks. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html