On Mon, 2022-11-14 at 11:27 +0000, Durrant, Paul wrote: > > On Sun, 2022-11-13 at 13:32 +0000, David Woodhouse wrote: > > > For the runstate area, I think we can live with using a gfn_to_hva > > > cache instead, and writing via the userspace address (with appropriate > > > atomicity for the RUNSTATE_runnable case as we have at the moment > > > gating the refresh). > > > > Which mostly involves just reverting commit a795cd43c5b5 I think? > > > > IIRC the reason for that commit was mostly consistency with other > > things that really *did* want to be switched to gpc. > > A straight reversion would re-introduce the page-spanning check in > kvm_xen_vcpu_set_attr() but I think we can just leave that out. That check matches the behaviour of kvm_gfn_to_hva_cache_init() which deliberately clears ghc->memslot if it crosses a page. (Although I don't see why; it only really needs to do that if it crosses over onto a second *memslot*, surely?) So we'd then hit this in kvm_xen_update_runstate_guest(): /* We made sure it fits in a single page */ BUG_ON(!ghc->memslot); We'd need a fallback code path for the page-crossing (or memslot- crossing) case, and the natural way to do that is to just use kvm_write_guest(). In the RUNSTATE_runnable case where we can't sleep, can we get away with using pagefault_disable() around a call to kvm_write_guest()? The worst case here is kind of awful; it's a compat guest with the first half of its ->state_entry_time being on the first page, and the second half of it being on the second page. I'm playing with using a second GPC for the overrun onto the second page. Debating if it's already too ugly to live before I even fix up the actual copying part... diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 81114a376c4e..3fc08f416aa3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -647,6 +647,7 @@ struct kvm_vcpu_xen { struct gfn_to_pfn_cache vcpu_info_cache; struct gfn_to_pfn_cache vcpu_time_info_cache; struct gfn_to_pfn_cache runstate_cache; + struct gfn_to_pfn_cache runstate2_cache; u64 last_steal; u64 runstate_entry_time; u64 runstate_times[4]; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 4b8e9628fbf5..e297569bbc8d 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -201,35 +201,59 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; - struct gfn_to_pfn_cache *gpc = &vx->runstate_cache; + struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; + struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; uint64_t *user_times; unsigned long flags; - size_t user_len; + size_t user_len1, user_len2 = 0; int *user_state; kvm_xen_update_runstate(v, state); - if (!vx->runstate_cache.active) + if (!gpc1->active) return; if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_len = sizeof(struct vcpu_runstate_info); + user_len1 = sizeof(struct vcpu_runstate_info); else - user_len = sizeof(struct compat_vcpu_runstate_info); + user_len1 = sizeof(struct compat_vcpu_runstate_info); - read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - user_len)) { - read_unlock_irqrestore(&gpc->lock, flags); + if ((gpc1->gpa & ~PAGE_MASK) + user_len1 >= PAGE_SIZE) { + user_len2 = user_len1; + user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); + user_len2 -= user_len1; + } + + retry: + read_lock_irqsave(&gpc1->lock, flags); + while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, + user_len1)) { + read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (state == RUNSTATE_runnable) return; - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len)) + if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1)) return; - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(&gpc1->lock, flags); + } + if (user_len2) { + read_lock(&gpc2->lock); + if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) { + read_unlock(&gpc2->lock); + read_unlock_irqrestore(&gpc1->lock, flags); + + if (state == RUNSTATE_runnable) + return; + + if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc2, + gpc2->gpa, user_len2)) + return; + + goto retry; + } } /* @@ -584,23 +608,52 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; - case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { + size_t sz; + if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == GPA_INVALID) { + r = 0; + deactivate_out: kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); - r = 0; + deactivate2_out: + kvm_gpc_deactivate(vcpu->kvm, + &vcpu->arch.xen.runstate2_cache); break; } - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_runstate_info)); + if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) + sz = sizeof(struct vcpu_runstate_info); + else + sz = sizeof(struct compat_vcpu_runstate_info); + + /* Handle structures which cross a page boundary by using two GPCs */ + if ((data->u.gpa & ~PAGE_MASK) + sz <= PAGE_SIZE) { + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, + NULL, KVM_HOST_USES_PFN, data->u.gpa, + sizeof(struct vcpu_runstate_info)); + goto deactivate2_out; + } else { + /* Map the end of the first page... */ + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, + NULL, KVM_HOST_USES_PFN, data->u.gpa, + PAGE_SIZE - (data->u.gpa & ~PAGE_MASK)); + if (r) + goto deactivate2_out; + /* ... and the start of the second. */ + sz -= PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache, + NULL, KVM_HOST_USES_PFN, + (data->u.gpa + PAGE_SIZE) & PAGE_MASK, sz); + if (r) + goto deactivate_out; + } break; - + } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; @@ -1834,6 +1887,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); kvm_gpc_init(&vcpu->arch.xen.runstate_cache); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache); kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); } @@ -1844,6 +1898,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) kvm_xen_stop_timer(vcpu); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
Attachment:
smime.p7s
Description: S/MIME cryptographic signature