On Mon, 2022-11-14 at 16:16 -0800, David Woodhouse wrote: > > I'm playing with using a second GPC for the overrun onto the second > page. Debating if it's already too ugly to live before I even fix up > the actual copying part... Well it certainly didn't get any *prettier*. Utterly untested other than building it, so it's certainly going to be broken, but as an illustration. I can't see a sane way to get the two pages vmapped consecutively, given that they might be IOMEM. So I can't see how to make a single GPC do this "nicely", and I think we have to declare that the runstate area is the only case that actually needs this, then do it this way as a special case... even though it's fugly? diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 81114a376c4e..3fc08f416aa3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -647,6 +647,7 @@ struct kvm_vcpu_xen { struct gfn_to_pfn_cache vcpu_info_cache; struct gfn_to_pfn_cache vcpu_time_info_cache; struct gfn_to_pfn_cache runstate_cache; + struct gfn_to_pfn_cache runstate2_cache; u64 last_steal; u64 runstate_entry_time; u64 runstate_times[4]; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 4b8e9628fbf5..14ba45b541bf 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -198,38 +198,101 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) vx->runstate_entry_time = now; } +/* + * The guest region is arbitrarily aligned, and could be split across + * two pages. + * + * d1: Pointer to kernel map of first byte of region. + * d2: Pointer to kernel map of first byte of second page. + * l1: length of first range [ == PAGE_SIZE - (d1 & ~PAGE_MASK) ] + * src: Source pointer. + * len: Source length to be copied. + * dst_ofs: Destination offset within the guest region. + */ +static inline void memcpy_to_runstate(void *d1, void *d2, size_t l1, + void *src, size_t len, size_t dst_ofs) +{ + size_t copylen; + + if (dst_ofs < l1) { + copylen = min(l1 - dst_ofs, len); + memcpy(d1 + dst_ofs, src, copylen); + if (copylen == len) + return; + + src += copylen; + dst_ofs += copylen; + len -= copylen; + } + + BUG_ON(dst_ofs < l1); + memcpy(d2 + dst_ofs - l1, src, len); +} + void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; - struct gfn_to_pfn_cache *gpc = &vx->runstate_cache; - uint64_t *user_times; + struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; + struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; unsigned long flags; - size_t user_len; - int *user_state; + size_t user_len, user_len1, user_len2; + size_t times_ofs; + u8 *update_bit; kvm_xen_update_runstate(v, state); - if (!vx->runstate_cache.active) + if (!gpc1->active) return; - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) + if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { user_len = sizeof(struct vcpu_runstate_info); - else + times_ofs = offsetof(struct vcpu_runstate_info, + state_entry_time); + } else { user_len = sizeof(struct compat_vcpu_runstate_info); + times_ofs = offsetof(struct compat_vcpu_runstate_info, + state_entry_time); + } - read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - user_len)) { - read_unlock_irqrestore(&gpc->lock, flags); + if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { + user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); + user_len2 = user_len - user_len1; + } else { + user_len1 = user_len; + user_len2 = 0; + } + BUG_ON(user_len1 + user_len2 != user_len); + + retry: + read_lock_irqsave(&gpc1->lock, flags); + while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, + user_len1)) { + read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (state == RUNSTATE_runnable) return; - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len)) + if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1)) return; - read_lock_irqsave(&gpc->lock, flags); + read_lock_irqsave(&gpc1->lock, flags); + } + if (user_len2) { + read_lock(&gpc2->lock); + if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) { + read_unlock(&gpc2->lock); + read_unlock_irqrestore(&gpc1->lock, flags); + + if (state == RUNSTATE_runnable) + return; + + if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc2, + gpc2->gpa, user_len2)) + return; + + goto retry; + } } /* @@ -252,25 +315,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) offsetof(struct compat_vcpu_runstate_info, time) + 4); #endif - user_state = gpc->khva; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_times = gpc->khva + offsetof(struct vcpu_runstate_info, - state_entry_time); - else - user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info, - state_entry_time); - /* - * First write the updated state_entry_time at the appropriate - * location determined by 'offset'. + * The XEN_RUNSTATE_UPDATE bit is the top bit of the state_entry_time + * field. We need to set it (and write-barrier) before the rest. */ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); + sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); + sizeof(uint64_t)); + BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); - user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE; + if (user_len1 >= times_ofs + sizeof(uint64_t)) + update_bit = ((u8 *)gpc1->khva) + times_ofs + sizeof(u64) - 1; + else + update_bit = ((u8 *)gpc2->khva) + times_ofs + sizeof(u64) - 1 - + user_len1; + + *update_bit |= (XEN_RUNSTATE_UPDATE >> 56); smp_wmb(); /* @@ -284,7 +345,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - *user_state = vx->current_runstate; + memcpy_to_runstate(gpc1->khva, gpc2->khva, user_len1, + &vx->current_runstate, sizeof(vx->current_runstate), + offsetof(struct vcpu_runstate_info, state)); /* * Write the actual runstate times immediately after the @@ -299,19 +362,28 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); - memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); - smp_wmb(); + memcpy_to_runstate(gpc1->khva, gpc2->khva, user_len1, + vx->runstate_times, sizeof(vx->runstate_times), + times_ofs + sizeof(u64)); + memcpy_to_runstate(gpc1->khva, gpc2->khva, user_len1, + &vx->runstate_entry_time, sizeof(vx->runstate_entry_time) - 1, + times_ofs); + smp_wmb(); /* * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's * runstate_entry_time field. */ - user_times[0] &= ~XEN_RUNSTATE_UPDATE; + *update_bit = vx->runstate_entry_time >> 56; smp_wmb(); - read_unlock_irqrestore(&gpc->lock, flags); + if (user_len2) + read_unlock_irqrestore(&gpc2->lock, flags); + read_unlock_irqrestore(&gpc1->lock, flags); - mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); + if (user_len2) + mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); } static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) @@ -584,23 +656,52 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; - case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { + size_t sz; + if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == GPA_INVALID) { + r = 0; + deactivate_out: kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); - r = 0; + deactivate2_out: + kvm_gpc_deactivate(vcpu->kvm, + &vcpu->arch.xen.runstate2_cache); break; } - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_runstate_info)); + if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) + sz = sizeof(struct vcpu_runstate_info); + else + sz = sizeof(struct compat_vcpu_runstate_info); + + /* Handle structures which cross a page boundary by using two GPCs */ + if ((data->u.gpa & ~PAGE_MASK) + sz <= PAGE_SIZE) { + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, + NULL, KVM_HOST_USES_PFN, data->u.gpa, + sizeof(struct vcpu_runstate_info)); + goto deactivate2_out; + } else { + /* Map the end of the first page... */ + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, + NULL, KVM_HOST_USES_PFN, data->u.gpa, + PAGE_SIZE - (data->u.gpa & ~PAGE_MASK)); + if (r) + goto deactivate2_out; + /* ... and the start of the second. */ + sz -= PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); + r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache, + NULL, KVM_HOST_USES_PFN, + (data->u.gpa + PAGE_SIZE) & PAGE_MASK, sz); + if (r) + goto deactivate_out; + } break; - + } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; @@ -1834,6 +1935,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); kvm_gpc_init(&vcpu->arch.xen.runstate_cache); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache); kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); } @@ -1844,6 +1946,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) kvm_xen_stop_timer(vcpu); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
Attachment:
smime.p7s
Description: S/MIME cryptographic signature