On Tue, 2021-11-16 at 16:49 +0100, Paolo Bonzini wrote: > On 11/16/21 16:09, David Woodhouse wrote: > > On Tue, 2021-11-16 at 15:57 +0100, Paolo Bonzini wrote: > > > This should not be needed, should it? As long as the gfn-to-pfn > > > cache's vcpu field is handled properly, the request will just cause > > > the vCPU not to enter. > > > > If the MMU mappings never change, the request never happens. But the > > memslots *can* change, so it does need to be revalidated each time > > through I think? > > That needs to be done on KVM_SET_USER_MEMORY_REGION, using the same > request (or even the same list walking code) as the MMU notifiers. Hm.... kvm_arch_memslots_updated() is already kicking every vCPU after the update, and although that was asynchronous it was actually OK because unlike in the MMU notifier case, that page wasn't actually going away — and if that HVA *did* subsequently go away, our HVA-based notifier check would still catch that and kill it synchronously. But yes, we *could* come up with a wakeup mechanism which does it that way. > > > It would have to take the gpc->lock around > > > changes to gpc->vcpu though (meaning: it's probably best to add a > > > function gfn_to_pfn_cache_set_vcpu). > > > > Hm, in my head that was never going to *change* for a given gpc; it > > *belongs* to that vCPU for ever (and was even part of vmx->nested. for > > that vCPU, to replace e.g. vmx->nested.pi_desc_map). > > Ah okay, I thought it would be set in nested vmentry and cleared in > nested vmexit. I don't think it needs to be proactively cleared; we just don't *refresh* it until we need it again. > > +static void nested_vmx_check_guest_maps(struct kvm_vcpu *vcpu) > > +{ > > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > + struct gfn_to_pfn_cache *gpc; > > + > > + bool valid; > > + > > + if (nested_cpu_has_posted_intr(vmcs12)) { > > + gpc = &vmx->nested.pi_desc_cache; > > + > > + read_lock(&gpc->lock); > > + valid = kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, > > + vmcs12->posted_intr_desc_addr, > > + PAGE_SIZE); > > + read_unlock(&gpc->lock); > > + if (!valid) { > > + /* XX: This isn't idempotent. Make it so, or use a different > > + * req for the 'refresh'. */ > > + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); > > + return; > > + } > > + } > > +} > > That's really slow to do on every vmentry. It probably doesn't have to be. Ultimately, all it *has* to check is that kvm->memslots->generation hasn't changed since the caches were valid. That can surely be made fast enough? If we *know* the GPA and size haven't changed, and if we know that gpc->valid becoming false would have been handled differently, then we could optimise that whole thing away quite effectively to a single check on ->generations? > > So nested_get_vmcs12_pages() certainly isn't idempotent right now > > because of all the kvm_vcpu_map() calls, which would just end up > > leaking — but I suppose the point is to kill all those, and then maybe > > it will be? > > Yes, exactly. That might be a larger than normal patch, but it should > not be one too hard to review. Once there's something that works, we > can think of how to split (if it's worth it). This one actually compiles. Not sure we have any test cases that will actually exercise it though, do we? diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 465455334c0c..9f279d08e570 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1510,6 +1510,7 @@ struct kvm_x86_nested_ops { int (*enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); + void (*check_guest_maps)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 280f34ea02c3..f67751112633 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -309,7 +309,7 @@ static void free_nested(struct kvm_vcpu *vcpu) kvm_release_page_clean(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } - kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); + kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vmx->nested.virtual_apic_cache); kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); vmx->nested.pi_desc = NULL; @@ -3170,10 +3170,12 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - map = &vmx->nested.virtual_apic_map; + struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache; - if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); + if (!kvm_gfn_to_pfn_cache_init(vcpu->kvm, gpc, vcpu, true, + vmcs12->virtual_apic_page_addr, + PAGE_SIZE, true)) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(gpc->pfn)); } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { @@ -3198,6 +3200,9 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) if (nested_cpu_has_posted_intr(vmcs12)) { map = &vmx->nested.pi_desc_map; + if (kvm_vcpu_mapped(map)) + kvm_vcpu_unmap(vcpu, map, true); + if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { vmx->nested.pi_desc = (struct pi_desc *)(((void *)map->hva) + @@ -3242,6 +3247,29 @@ static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) return true; } +static void nested_vmx_check_guest_maps(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct gfn_to_pfn_cache *gpc; + + int valid; + + if (nested_cpu_has_posted_intr(vmcs12)) { + gpc = &vmx->nested.virtual_apic_cache; + + read_lock(&gpc->lock); + valid = kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, + vmcs12->virtual_apic_page_addr, + PAGE_SIZE); + read_unlock(&gpc->lock); + if (!valid) { + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + return; + } + } +} + static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) { struct vmcs12 *vmcs12; @@ -3737,9 +3765,15 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr != 256) { - vapic_page = vmx->nested.virtual_apic_map.hva; - if (!vapic_page) + struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache; + + read_lock(&gpc->lock); + if (!kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, gpc->gpa, PAGE_SIZE)) { + read_unlock(&gpc->lock); goto mmio_needed; + } + + vapic_page = gpc->khva; __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page, &max_irr); @@ -3749,6 +3783,7 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) status |= (u8)max_irr; vmcs_write16(GUEST_INTR_STATUS, status); } + read_unlock(&gpc->lock); } nested_mark_vmcs12_pages_dirty(vcpu); @@ -4569,7 +4604,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_release_page_clean(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; } - kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); + kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vmx->nested.virtual_apic_cache); kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); vmx->nested.pi_desc = NULL; @@ -6744,4 +6779,5 @@ struct kvm_x86_nested_ops vmx_nested_ops = { .write_log_dirty = nested_vmx_write_pml_buffer, .enable_evmcs = nested_enable_evmcs, .get_evmcs_version = nested_get_evmcs_version, + .check_guest_maps = nested_vmx_check_guest_maps, }; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ba66c171d951..6c61faef86d3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3839,19 +3839,23 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - void *vapic_page; + struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache; u32 vppr; int rvi; if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || !nested_cpu_has_vid(get_vmcs12(vcpu)) || - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) + WARN_ON_ONCE(gpc->gpa == GPA_INVALID)) return false; rvi = vmx_get_rvi(); - vapic_page = vmx->nested.virtual_apic_map.hva; - vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); + read_lock(&gpc->lock); + if (!kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc, gpc->gpa, PAGE_SIZE)) + vppr = *((u32 *)(gpc->khva + APIC_PROCPRI)); + else + vppr = 0xff; + read_unlock(&gpc->lock); return ((rvi & 0xf0) > (vppr & 0xf0)); } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 4df2ac24ffc1..8364e7fc92a0 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -195,7 +195,7 @@ struct nested_vmx { * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; - struct kvm_host_map virtual_apic_map; + struct gfn_to_pfn_cache virtual_apic_cache; struct kvm_host_map pi_desc_map; struct kvm_host_map msr_bitmap_map; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a689bb62e9e..a879e4d08758 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9735,6 +9735,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) static_call(kvm_x86_update_cpu_dirty_logging)(vcpu); + if (kvm_check_request(KVM_REQ_GPC_INVALIDATE, vcpu)) + ; /* Nothing to do. It just wanted to wake us */ } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win || @@ -9781,6 +9783,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) local_irq_disable(); vcpu->mode = IN_GUEST_MODE; + /* + * If the guest requires direct access to mapped L1 pages, check + * the caches are valid. Will raise KVM_REQ_GET_NESTED_STATE_PAGES + * to go and revalidate them, if necessary. + */ + if (is_guest_mode(vcpu) && kvm_x86_ops.nested_ops->check_guest_maps) + kvm_x86_ops.nested_ops->check_guest_maps(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); /*
Attachment:
smime.p7s
Description: S/MIME cryptographic signature