On Sat, Jul 6, 2024 at 2:59 PM maobibo <maobibo@xxxxxxxxxxx> wrote: > > Huacai, > > On 2024/7/6 上午11:00, Huacai Chen wrote: > > Hi, Bibo, > > > > On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@xxxxxxxxxxx> wrote: > >> > >> Steal time feature is added here in kvm side, VM can search supported > >> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME > >> is added here. Like x86, steal time structure is saved in guest memory, > >> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to > >> enable the feature. > >> > >> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to > >> save and restore base address of steal time structure when VM is migrated. > >> > >> Signed-off-by: Bibo Mao <maobibo@xxxxxxxxxxx> > >> --- > >> arch/loongarch/include/asm/kvm_host.h | 7 ++ > >> arch/loongarch/include/asm/kvm_para.h | 10 ++ > >> arch/loongarch/include/asm/kvm_vcpu.h | 4 + > >> arch/loongarch/include/asm/loongarch.h | 1 + > >> arch/loongarch/include/uapi/asm/kvm.h | 4 + > >> arch/loongarch/kvm/Kconfig | 1 + > >> arch/loongarch/kvm/exit.c | 38 +++++++- > >> arch/loongarch/kvm/vcpu.c | 124 +++++++++++++++++++++++++ > >> 8 files changed, 187 insertions(+), 2 deletions(-) > >> > >> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h > >> index c87b6ea0ec47..2eb2f7572023 100644 > >> --- a/arch/loongarch/include/asm/kvm_host.h > >> +++ b/arch/loongarch/include/asm/kvm_host.h > >> @@ -30,6 +30,7 @@ > >> #define KVM_PRIVATE_MEM_SLOTS 0 > >> > >> #define KVM_HALT_POLL_NS_DEFAULT 500000 > >> +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(1) > >> > >> #define KVM_GUESTDBG_SW_BP_MASK \ > >> (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP) > >> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch { > >> struct kvm_mp_state mp_state; > >> /* cpucfg */ > >> u32 cpucfg[KVM_MAX_CPUCFG_REGS]; > >> + /* paravirt steal time */ > >> + struct { > >> + u64 guest_addr; > >> + u64 last_steal; > >> + struct gfn_to_hva_cache cache; > >> + } st; > >> }; > >> > >> static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) > >> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h > >> index 4ba2312e5f8c..a9ba8185d4af 100644 > >> --- a/arch/loongarch/include/asm/kvm_para.h > >> +++ b/arch/loongarch/include/asm/kvm_para.h > >> @@ -14,6 +14,7 @@ > >> > >> #define KVM_HCALL_SERVICE HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE) > >> #define KVM_HCALL_FUNC_IPI 1 > >> +#define KVM_HCALL_FUNC_NOTIFY 2 > >> > >> #define KVM_HCALL_SWDBG HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG) > >> > >> @@ -24,6 +25,15 @@ > >> #define KVM_HCALL_INVALID_CODE -1UL > >> #define KVM_HCALL_INVALID_PARAMETER -2UL > >> > >> +#define KVM_STEAL_PHYS_VALID BIT_ULL(0) > >> +#define KVM_STEAL_PHYS_MASK GENMASK_ULL(63, 6) > >> +struct kvm_steal_time { > >> + __u64 steal; > >> + __u32 version; > >> + __u32 flags; > > I found that x86 has a preempted field here, in our internal repo the > > LoongArch version also has this field. Moreover, > > kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted() > > seems needed. > By my understanding, macro vcpu_is_preempted() is used together with pv > spinlock, and pv spinlock depends on pv stealtime. So I think preempted > flag is not part of pv stealtime, it is part of pv spinlock. > > We are going to add preempted field if pv spinlock is added. > > > >> + __u32 pad[12]; > >> +}; > >> + > >> /* > >> * Hypercall interface for KVM hypervisor > >> * > >> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h > >> index 590a92cb5416..d7e51300a89f 100644 > >> --- a/arch/loongarch/include/asm/kvm_vcpu.h > >> +++ b/arch/loongarch/include/asm/kvm_vcpu.h > >> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v > >> vcpu->arch.gprs[num] = val; > >> } > >> > >> +static inline bool kvm_pvtime_supported(void) > >> +{ > >> + return !!sched_info_on(); > >> +} > >> #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */ > >> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h > >> index eb09adda54b7..7a4633ef284b 100644 > >> --- a/arch/loongarch/include/asm/loongarch.h > >> +++ b/arch/loongarch/include/asm/loongarch.h > >> @@ -169,6 +169,7 @@ > >> #define KVM_SIGNATURE "KVM\0" > >> #define CPUCFG_KVM_FEATURE (CPUCFG_KVM_BASE + 4) > >> #define KVM_FEATURE_IPI BIT(1) > >> +#define KVM_FEATURE_STEAL_TIME BIT(2) > >> > >> #ifndef __ASSEMBLY__ > >> > >> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h > >> index f9abef382317..ddc5cab0ffd0 100644 > >> --- a/arch/loongarch/include/uapi/asm/kvm.h > >> +++ b/arch/loongarch/include/uapi/asm/kvm.h > >> @@ -81,7 +81,11 @@ struct kvm_fpu { > >> #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) > >> #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) > >> #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) > >> + > >> +/* Device Control API on vcpu fd */ > >> #define KVM_LOONGARCH_VCPU_CPUCFG 0 > >> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 > >> +#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 > >> > >> struct kvm_debug_exit_arch { > >> }; > >> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig > >> index c4ef2b4d9797..248744b4d086 100644 > >> --- a/arch/loongarch/kvm/Kconfig > >> +++ b/arch/loongarch/kvm/Kconfig > >> @@ -29,6 +29,7 @@ config KVM > >> select KVM_MMIO > >> select HAVE_KVM_READONLY_MEM > >> select KVM_XFER_TO_GUEST_WORK > >> + select SCHED_INFO > >> help > >> Support hosting virtualized guest machines using > >> hardware virtualization extensions. You will need > >> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c > >> index c86e099af5ca..e2abd97fb13f 100644 > >> --- a/arch/loongarch/kvm/exit.c > >> +++ b/arch/loongarch/kvm/exit.c > >> @@ -24,7 +24,7 @@ > >> static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) > >> { > >> int rd, rj; > >> - unsigned int index; > >> + unsigned int index, ret; > >> > >> if (inst.reg2_format.opcode != cpucfg_op) > >> return EMULATE_FAIL; > >> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst) > >> vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE; > >> break; > >> case CPUCFG_KVM_FEATURE: > >> - vcpu->arch.gprs[rd] = KVM_FEATURE_IPI; > >> + ret = KVM_FEATURE_IPI; > >> + if (sched_info_on()) > > What about replacing it with your helper function kvm_pvtime_supported()? > Sure, will replace it with helper function kvm_pvtime_supported(). If you are sure this is the only issue, then needn't submit a new version. Huacai > > Regards > Bibo Mao > > > > Huacai > > > >> + ret |= KVM_FEATURE_STEAL_TIME; > >> + vcpu->arch.gprs[rd] = ret; > >> break; > >> default: > >> vcpu->arch.gprs[rd] = 0; > >> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) > >> return RESUME_GUEST; > >> } > >> > >> +static long kvm_save_notify(struct kvm_vcpu *vcpu) > >> +{ > >> + unsigned long id, data; > >> + > >> + id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1); > >> + data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2); > >> + switch (id) { > >> + case KVM_FEATURE_STEAL_TIME: > >> + if (!kvm_pvtime_supported()) > >> + return KVM_HCALL_INVALID_CODE; > >> + > >> + if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) > >> + return KVM_HCALL_INVALID_PARAMETER; > >> + > >> + vcpu->arch.st.guest_addr = data; > >> + if (!(data & KVM_STEAL_PHYS_VALID)) > >> + break; > >> + > >> + vcpu->arch.st.last_steal = current->sched_info.run_delay; > >> + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); > >> + break; > >> + default: > >> + break; > >> + }; > >> + > >> + return 0; > >> +}; > >> + > >> /* > >> * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. > >> * @vcpu: Virtual CPU context. > >> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu) > >> kvm_send_pv_ipi(vcpu); > >> ret = KVM_HCALL_SUCCESS; > >> break; > >> + case KVM_HCALL_FUNC_NOTIFY: > >> + ret = kvm_save_notify(vcpu); > >> + break; > >> default: > >> ret = KVM_HCALL_INVALID_CODE; > >> break; > >> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c > >> index 9e8030d45129..382796f1d3e6 100644 > >> --- a/arch/loongarch/kvm/vcpu.c > >> +++ b/arch/loongarch/kvm/vcpu.c > >> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { > >> sizeof(kvm_vcpu_stats_desc), > >> }; > >> > >> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu) > >> +{ > >> + struct kvm_steal_time __user *st; > >> + struct gfn_to_hva_cache *ghc; > >> + struct kvm_memslots *slots; > >> + gpa_t gpa; > >> + u64 steal; > >> + u32 version; > >> + > >> + ghc = &vcpu->arch.st.cache; > >> + gpa = vcpu->arch.st.guest_addr; > >> + if (!(gpa & KVM_STEAL_PHYS_VALID)) > >> + return; > >> + > >> + gpa &= KVM_STEAL_PHYS_MASK; > >> + slots = kvm_memslots(vcpu->kvm); > >> + if (slots->generation != ghc->generation || gpa != ghc->gpa) { > >> + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, > >> + sizeof(*st))) { > >> + ghc->gpa = INVALID_GPA; > >> + return; > >> + } > >> + } > >> + > >> + st = (struct kvm_steal_time __user *)ghc->hva; > >> + unsafe_get_user(version, &st->version, out); > >> + if (version & 1) > >> + version += 1; > >> + version += 1; > >> + unsafe_put_user(version, &st->version, out); > >> + smp_wmb(); > >> + > >> + unsafe_get_user(steal, &st->steal, out); > >> + steal += current->sched_info.run_delay - > >> + vcpu->arch.st.last_steal; > >> + vcpu->arch.st.last_steal = current->sched_info.run_delay; > >> + unsafe_put_user(steal, &st->steal, out); > >> + > >> + smp_wmb(); > >> + version += 1; > >> + unsafe_put_user(version, &st->version, out); > >> +out: > >> + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); > >> +} > >> + > >> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu, > >> + struct kvm_device_attr *attr) > >> +{ > >> + if (!kvm_pvtime_supported() || > >> + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) > >> + return -ENXIO; > >> + > >> + return 0; > >> +} > >> + > >> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu, > >> + struct kvm_device_attr *attr) > >> +{ > >> + u64 __user *user = (u64 __user *)attr->addr; > >> + u64 gpa; > >> + > >> + if (!kvm_pvtime_supported() || > >> + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) > >> + return -ENXIO; > >> + > >> + gpa = vcpu->arch.st.guest_addr; > >> + if (put_user(gpa, user)) > >> + return -EFAULT; > >> + > >> + return 0; > >> +} > >> + > >> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu, > >> + struct kvm_device_attr *attr) > >> +{ > >> + u64 __user *user = (u64 __user *)attr->addr; > >> + struct kvm *kvm = vcpu->kvm; > >> + u64 gpa; > >> + int ret = 0; > >> + int idx; > >> + > >> + if (!kvm_pvtime_supported() || > >> + attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA) > >> + return -ENXIO; > >> + > >> + if (get_user(gpa, user)) > >> + return -EFAULT; > >> + > >> + if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID)) > >> + return -EINVAL; > >> + > >> + if (!(gpa & KVM_STEAL_PHYS_VALID)) { > >> + vcpu->arch.st.guest_addr = gpa; > >> + return 0; > >> + } > >> + > >> + /* Check the address is in a valid memslot */ > >> + idx = srcu_read_lock(&kvm->srcu); > >> + if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT))) > >> + ret = -EINVAL; > >> + srcu_read_unlock(&kvm->srcu, idx); > >> + > >> + if (!ret) { > >> + vcpu->arch.st.guest_addr = gpa; > >> + vcpu->arch.st.last_steal = current->sched_info.run_delay; > >> + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); > >> + } > >> + > >> + return ret; > >> +} > >> + > >> /* > >> * kvm_check_requests - check and handle pending vCPU requests > >> * > >> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu) > >> if (kvm_dirty_ring_check_request(vcpu)) > >> return RESUME_HOST; > >> > >> + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) > >> + kvm_update_stolen_time(vcpu); > >> + > >> return RESUME_GUEST; > >> } > >> > >> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, > >> case KVM_LOONGARCH_VCPU_CPUCFG: > >> ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); > >> break; > >> + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: > >> + ret = kvm_loongarch_pvtime_has_attr(vcpu, attr); > >> + break; > >> default: > >> break; > >> } > >> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, > >> case KVM_LOONGARCH_VCPU_CPUCFG: > >> ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); > >> break; > >> + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: > >> + ret = kvm_loongarch_pvtime_get_attr(vcpu, attr); > >> + break; > >> default: > >> break; > >> } > >> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, > >> case KVM_LOONGARCH_VCPU_CPUCFG: > >> ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); > >> break; > >> + case KVM_LOONGARCH_VCPU_PVTIME_CTRL: > >> + ret = kvm_loongarch_pvtime_set_attr(vcpu, attr); > >> + break; > >> default: > >> break; > >> } > >> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > >> > >> /* Control guest page CCA attribute */ > >> change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); > >> + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); > >> > >> /* Don't bother restoring registers multiple times unless necessary */ > >> if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) > >> -- > >> 2.39.3 > >> >