On 01/08/2017 23:00, David Matlack wrote: > The host physical addresses of L1's Virtual APIC Page and Posted > Interrupt descriptor are loaded into the VMCS02. The CPU may write > to these pages via their host physical address while L2 is running, > bypassing address-translation-based dirty tracking (e.g. EPT write > protection). Mark them dirty on every exit from L2 to prevent them > from getting out of sync with dirty tracking. > > Also mark the virtual APIC page and the posted interrupt descriptor > dirty when KVM is virtualizing posted interrupt processing. > > Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> Reviewed-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 43 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 07d2198db225..b277a0409563 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4952,6 +4952,28 @@ static bool vmx_get_enable_apicv(void) > return enable_apicv; > } > > +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) > +{ > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + gfn_t gfn; > + > + /* > + * Don't need to mark the APIC access page dirty; it is never > + * written to by the CPU during APIC virtualization. > + */ > + > + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { > + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; > + kvm_vcpu_mark_page_dirty(vcpu, gfn); > + } > + > + if (nested_cpu_has_posted_intr(vmcs12)) { > + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; > + kvm_vcpu_mark_page_dirty(vcpu, gfn); > + } > +} > + > + > static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -4959,18 +4981,15 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) > void *vapic_page; > u16 status; > > - if (vmx->nested.pi_desc && > - vmx->nested.pi_pending) { > - vmx->nested.pi_pending = false; > - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) > - return; > - > - max_irr = find_last_bit( > - (unsigned long *)vmx->nested.pi_desc->pir, 256); > + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) > + return; > > - if (max_irr == 256) > - return; > + vmx->nested.pi_pending = false; > + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) > + return; > > + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); > + if (max_irr != 256) { > vapic_page = kmap(vmx->nested.virtual_apic_page); > __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); > kunmap(vmx->nested.virtual_apic_page); > @@ -4982,6 +5001,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) > vmcs_write16(GUEST_INTR_STATUS, status); > } > } > + > + nested_mark_vmcs12_pages_dirty(vcpu); > } > > static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, > @@ -8029,6 +8050,18 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) > vmcs_read32(VM_EXIT_INTR_ERROR_CODE), > KVM_ISA_VMX); > > + /* > + * The host physical addresses of some pages of guest memory > + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU > + * may write to these pages via their host physical address while > + * L2 is running, bypassing any address-translation-based dirty > + * tracking (e.g. EPT write protection). > + * > + * Mark them dirty on every exit from L2 to prevent them from > + * getting out of sync with dirty tracking. > + */ > + nested_mark_vmcs12_pages_dirty(vcpu); > + > if (vmx->nested.nested_run_pending) > return false; > >