On Tue, 2021-11-16 at 16:22 +0000, David Woodhouse wrote: > > I suppose we could make it a KVM_XEN_VCPU_SET_ATTR instead, and thus > associate it with a particular CPU at least for the initial wallclock > write? > We'd end up needing to do all this too, to plumb that 'vcpu' through to the actual mark_page_dirty_in_slot(). And I might end up wanting to kill kvm_write_guest() et al completely. If we're never supposed to be writing without a vCPU associated with the write, then we should always use kvm_vcpu_write_guest(), shouldn't we? Paolo, what do you think? Want me to finish and test this and submit it, along with changing the shinfo address to a per-vCPU thing? diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 326cdfec74a1..d8411ce4db4b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1143,7 +1143,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { kvm_set_pfn_dirty(pfn); - mark_page_dirty_in_slot(kvm, memslot, gfn); + mark_page_dirty_in_slot(kvm, vcpu, memslot, gfn); } out_unlock: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 33794379949e..4fd2ad5327b6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3090,7 +3090,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, return false; if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) - mark_page_dirty_in_slot(vcpu->kvm, fault->slot, fault->gfn); + mark_page_dirty_in_slot(vcpu->kvm, vcpu, fault->slot, fault->gfn); return true; } diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 0c76c45fdb68..0598515f3ae2 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -184,7 +184,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) { /* Enforced by kvm_mmu_hugepage_adjust. */ WARN_ON(level > PG_LEVEL_4K); - mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); + mark_page_dirty_in_slot(vcpu->kvm, vcpu, slot, gfn); } *new_spte = spte; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a54c3491af42..c5669c9918a4 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -247,7 +247,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, if ((!is_writable_pte(old_spte) || pfn_changed) && is_writable_pte(new_spte)) { slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn); - mark_page_dirty_in_slot(kvm, slot, gfn); + mark_page_dirty_in_slot(kvm, NULL, slot, gfn); } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a879e4d08758..c14ce545fae9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2118,7 +2118,7 @@ static s64 get_kvmclock_base_ns(void) } #endif -void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs) +void kvm_write_wall_clock(struct kvm_vcpu *vcpu, gpa_t wall_clock, int sec_hi_ofs) { int version; int r; @@ -2129,7 +2129,7 @@ void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs) if (!wall_clock) return; - r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); + r = kvm_vcpu_read_guest(vcpu, wall_clock, &version, sizeof(version)); if (r) return; @@ -2138,7 +2138,7 @@ void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs) ++version; - if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version))) + if (kvm_vcpu_write_guest(vcpu, wall_clock, &version, sizeof(version))) return; /* @@ -2146,22 +2146,22 @@ void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs) * system time (updated by kvm_guest_time_update below) to the * wall clock specified here. We do the reverse here. */ - wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm); + wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(vcpu->kvm); wc.nsec = do_div(wall_nsec, 1000000000); wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */ wc.version = version; - kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); + kvm_vcpu_write_guest(vcpu, wall_clock, &wc, sizeof(wc)); if (sec_hi_ofs) { wc_sec_hi = wall_nsec >> 32; - kvm_write_guest(kvm, wall_clock + sec_hi_ofs, - &wc_sec_hi, sizeof(wc_sec_hi)); + kvm_vcpu_write_guest(vcpu, wall_clock + sec_hi_ofs, + &wc_sec_hi, sizeof(wc_sec_hi)); } version++; - kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + kvm_vcpu_write_guest(vcpu, wall_clock, &version, sizeof(version)); } static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, @@ -3353,7 +3353,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) out: user_access_end(); dirty: - mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); + mark_page_dirty_in_slot(vcpu->kvm, vcpu, ghc->memslot, gpa_to_gfn(ghc->gpa)); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3494,14 +3494,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->kvm->arch.wall_clock = data; - kvm_write_wall_clock(vcpu->kvm, data, 0); + kvm_write_wall_clock(vcpu, data, 0); break; case MSR_KVM_WALL_CLOCK: if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) return 1; vcpu->kvm->arch.wall_clock = data; - kvm_write_wall_clock(vcpu->kvm, data, 0); + kvm_write_wall_clock(vcpu, data, 0); break; case MSR_KVM_SYSTEM_TIME_NEW: if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) @@ -4421,7 +4421,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; - mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); + mark_page_dirty_in_slot(vcpu->kvm, vcpu, ghc->memslot, gpa_to_gfn(ghc->gpa)); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index ea264c4502e4..f1dab3413fc8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -294,7 +294,7 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu) return is_smm(vcpu) || static_call(kvm_x86_apic_init_signal_blocked)(vcpu); } -void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs); +void kvm_write_wall_clock(struct kvm_vcpu *vcpu, gpa_t wall_clock, int sec_hi_ofs); void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); u64 get_kvmclock_ns(struct kvm *kvm); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 7a58df25c9b2..e7b0c0af807d 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -37,7 +37,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, true, gpa, PAGE_SIZE, true); - if (ret) + if (ret && !kvm->vcpus[0]) goto out; /* Paranoia checks on the 32-bit struct layout */ @@ -60,7 +60,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) } #endif - kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs); + kvm_write_wall_clock(kvm->vcpus[0], gpa + wc_ofs, sec_hi_ofs - wc_ofs); kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); out: @@ -316,7 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) err: user_access_end(); - mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(v->kvm, v, ghc->memslot, ghc->gpa >> PAGE_SHIFT); } else { __get_user(rc, (u8 __user *)ghc->hva + offset); } diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 4da8d4a4140b..f3be974f9c5a 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -43,7 +43,8 @@ static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, return 0; } -static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm, + struct kvm_vcpu *vcpu) { return NULL; } @@ -78,7 +79,8 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) u32 kvm_dirty_ring_get_rsvd_entries(void); int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); -struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm); +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm, + struct kvm_vcpu *vcpu); /* * called with kvm->slots_lock held, returns the number of diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f145a58e37b0..59a92a82e3a3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -954,7 +954,8 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_vcpu *vcpu, + struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index 2b4474387895..879d454eef71 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -36,12 +36,16 @@ static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) return kvm_dirty_ring_used(ring) >= ring->size; } -struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) +struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm, struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + struct kvm_vcpu *running_vcpu = kvm_get_running_vcpu(); + WARN_ON_ONCE(vcpu && vcpu != running_vcpu); WARN_ON_ONCE(vcpu->kvm != kvm); + if (!vcpu) + vcpu = running_vcpu; + return &vcpu->dirty_ring; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4065fd32271a..24f300e5fa96 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2787,7 +2787,7 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, } EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic); -static int __kvm_write_guest_page(struct kvm *kvm, +static int __kvm_write_guest_page(struct kvm *kvm, struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gfn_t gfn, const void *data, int offset, int len) { @@ -2800,7 +2800,7 @@ static int __kvm_write_guest_page(struct kvm *kvm, r = __copy_to_user((void __user *)addr + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(kvm, memslot, gfn); + mark_page_dirty_in_slot(kvm, vcpu, memslot, gfn); return 0; } @@ -2809,7 +2809,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, { struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); + return __kvm_write_guest_page(kvm, NULL, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -2818,7 +2818,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); + return __kvm_write_guest_page(vcpu->kvm, vcpu, slot, gfn, data, offset, len); } EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page); @@ -2937,7 +2937,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; - mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); + mark_page_dirty_in_slot(kvm, NULL, ghc->memslot, gpa >> PAGE_SHIFT); return 0; } @@ -3006,7 +3006,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -void mark_page_dirty_in_slot(struct kvm *kvm, +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, gfn_t gfn) { @@ -3015,7 +3015,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, u32 slot = (memslot->as_id << 16) | memslot->id; if (kvm->dirty_ring_size) - kvm_dirty_ring_push(kvm_dirty_ring_get(kvm), + kvm_dirty_ring_push(kvm_dirty_ring_get(kvm, vcpu), slot, rel_gfn); else set_bit_le(rel_gfn, memslot->dirty_bitmap); @@ -3028,7 +3028,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *memslot; memslot = gfn_to_memslot(kvm, gfn); - mark_page_dirty_in_slot(kvm, memslot, gfn); + mark_page_dirty_in_slot(kvm, NULL, memslot, gfn); } EXPORT_SYMBOL_GPL(mark_page_dirty); @@ -3037,7 +3037,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn) struct kvm_memory_slot *memslot; memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); + mark_page_dirty_in_slot(vcpu->kvm, vcpu, memslot, gfn); } EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
Attachment:
smime.p7s
Description: S/MIME cryptographic signature