On Wed, Jun 7, 2023 at 7:09 PM Nicholas Piggin <npiggin@xxxxxxxxx> wrote: [snip] > > You lost your comments. Thanks > > > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h > > index 0ca2d8b37b42..c5c57552b447 100644 > > --- a/arch/powerpc/include/asm/kvm_book3s.h > > +++ b/arch/powerpc/include/asm/kvm_book3s.h > > @@ -12,6 +12,7 @@ > > #include <linux/types.h> > > #include <linux/kvm_host.h> > > #include <asm/kvm_book3s_asm.h> > > +#include <asm/guest-state-buffer.h> > > > > struct kvmppc_bat { > > u64 raw; > > @@ -316,6 +317,57 @@ long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); > > > > void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); > > > > + > > +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE > > + > > +extern bool __kvmhv_on_papr; > > + > > +static inline bool kvmhv_on_papr(void) > > +{ > > + return __kvmhv_on_papr; > > +} > > It's a nitpick, but kvmhv_on_pseries() is because we're runnning KVM-HV > on a pseries guest kernel. Which is a papr guest kernel. So this kind of > doesn't make sense if you read it the same way. > > kvmhv_nested_using_papr() or something like that might read a bit > better. Will we go with kvmhv_using_nested_v2()? > > This could be a static key too. Will do. > > > @@ -575,6 +593,7 @@ struct kvm_vcpu_arch { > > ulong dscr; > > ulong amr; > > ulong uamor; > > + ulong amor; > > ulong iamr; > > u32 ctrl; > > u32 dabrx; > > This belongs somewhere else. It can be dropped. > > > @@ -829,6 +848,8 @@ struct kvm_vcpu_arch { > > u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ > > u32 nested_vcpu_id; > > gpa_t nested_io_gpr; > > + /* For nested APIv2 guests*/ > > + struct kvmhv_papr_host papr_host; > > #endif > > This is not exactly a papr host. Might have to come up with a better > name especially if we implement a L0 things could get confusing. Any name ideas? nestedv2_state? > > > @@ -342,6 +343,203 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) > > return rc; > > } > > > > +static inline long plpar_guest_create(unsigned long flags, unsigned long *guest_id) > > +{ > > + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; > > + unsigned long token; > > + long rc; > > + > > + token = -1UL; > > + while (true) { > > + rc = plpar_hcall(H_GUEST_CREATE, retbuf, flags, token); > > + if (rc == H_SUCCESS) { > > + *guest_id = retbuf[0]; > > + break; > > + } > > + > > + if (rc == H_BUSY) { > > + token = retbuf[0]; > > + cpu_relax(); > > + continue; > > + } > > + > > + if (H_IS_LONG_BUSY(rc)) { > > + token = retbuf[0]; > > + mdelay(get_longbusy_msecs(rc)); > > All of these things need a non-sleeping delay? Can we sleep instead? > Or if not, might have to think about going back to the caller and it > can retry. > > get/set state might be a bit inconvenient, although I don't expect > that should potentially take so long as guest and vcpu create/delete, > so at least those ones would be good if they're called while > preemptable. Yeah no reason not to sleep except for get/set, let me try it out. > > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > > index 521d84621422..f22ee582e209 100644 > > --- a/arch/powerpc/kvm/book3s_hv.c > > +++ b/arch/powerpc/kvm/book3s_hv.c > > @@ -383,6 +383,11 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) > > spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); > > } > > > > +static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) > > +{ > > + vcpu->arch.pvr = pvr; > > +} > > Didn't you lose this in a previous patch? I thought it must have moved > to a header but it reappears. Yes, that was meant to stay put. > > > + > > /* Dummy value used in computing PCR value below */ > > #define PCR_ARCH_31 (PCR_ARCH_300 << 1) > > > > @@ -1262,13 +1267,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > > return RESUME_HOST; > > break; > > #endif > > - case H_RANDOM: > > + case H_RANDOM: { > > unsigned long rand; > > > > if (!arch_get_random_seed_longs(&rand, 1)) > > ret = H_HARDWARE; > > kvmppc_set_gpr(vcpu, 4, rand); > > break; > > + } > > case H_RPT_INVALIDATE: > > ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4), > > kvmppc_get_gpr(vcpu, 5), > > Compile fix for a previous patch. Thanks. > > > @@ -2921,14 +2927,21 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) > > vcpu->arch.shared_big_endian = false; > > #endif > > #endif > > - kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC); > > > > + if (kvmhv_on_papr()) { > > + err = kvmhv_papr_vcpu_create(vcpu, &vcpu->arch.papr_host); > > + if (err < 0) > > + return err; > > + } > > + > > + kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC); > > if (cpu_has_feature(CPU_FTR_ARCH_31)) { > > kvmppc_set_mmcr_hv(vcpu, 0, kvmppc_get_mmcr_hv(vcpu, 0) | MMCR0_PMCCEXT); > > kvmppc_set_mmcra_hv(vcpu, MMCRA_BHRB_DISABLE); > > } > > > > kvmppc_set_ctrl_hv(vcpu, CTRL_RUNLATCH); > > + kvmppc_set_amor_hv(vcpu, ~0); > > This AMOR thing should go somewhere else. Not actually sure why it needs > to be added to the vcpu since it's always ~0. Maybe just put that in a > #define somewhere and use that. Yes, you are right, just can get rid of it from the vcpu entirely. > > > @@ -4042,6 +4059,50 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu) > > } > > } > > > > +static int kvmhv_vcpu_entry_papr(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) > > +{ > > + struct kvmhv_papr_host *ph; > > + unsigned long msr, i; > > + int trap; > > + long rc; > > + > > + ph = &vcpu->arch.papr_host; > > + > > + msr = mfmsr(); > > + kvmppc_msr_hard_disable_set_facilities(vcpu, msr); > > + if (lazy_irq_pending()) > > + return 0; > > + > > + kvmhv_papr_flush_vcpu(vcpu, time_limit); > > + > > + accumulate_time(vcpu, &vcpu->arch.in_guest); > > + rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id, > > + &trap, &i); > > + > > + if (rc != H_SUCCESS) { > > + pr_err("KVM Guest Run VCPU hcall failed\n"); > > + if (rc == H_INVALID_ELEMENT_ID) > > + pr_err("KVM: Guest Run VCPU invalid element id at %ld\n", i); > > + else if (rc == H_INVALID_ELEMENT_SIZE) > > + pr_err("KVM: Guest Run VCPU invalid element size at %ld\n", i); > > + else if (rc == H_INVALID_ELEMENT_VALUE) > > + pr_err("KVM: Guest Run VCPU invalid element value at %ld\n", i); > > + return 0; > > + } > > This needs the proper error handling. Were you going to wait until I > sent that up for existing code? Overall the unhappy paths need to be tightened up in the next revision. But yeah this hits the same thing as the v1 API. > > > @@ -5119,6 +5183,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, > > */ > > void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) > > { > > + struct kvm_vcpu *vcpu; > > long int i; > > u32 cores_done = 0; > > > > @@ -5139,6 +5204,12 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) > > if (++cores_done >= kvm->arch.online_vcores) > > break; > > } > > + > > + if (kvmhv_on_papr()) { > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + kvmppc_set_lpcr_hv(vcpu, vcpu->arch.vcore->lpcr); > > + } > > + } > > } > > vcpu define could go in that scope I guess. True. > > > @@ -5405,15 +5476,43 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) > > > > /* Allocate the guest's logical partition ID */ > > > > - lpid = kvmppc_alloc_lpid(); > > - if ((long)lpid < 0) > > - return -ENOMEM; > > - kvm->arch.lpid = lpid; > > + if (!kvmhv_on_papr()) { > > + lpid = kvmppc_alloc_lpid(); > > + if ((long)lpid < 0) > > + return -ENOMEM; > > + kvm->arch.lpid = lpid; > > + } > > > > kvmppc_alloc_host_rm_ops(); > > > > kvmhv_vm_nested_init(kvm); > > > > + if (kvmhv_on_papr()) { > > + long rc; > > + unsigned long guest_id; > > + > > + rc = plpar_guest_create(0, &guest_id); > > + > > + if (rc != H_SUCCESS) > > + pr_err("KVM: Create Guest hcall failed, rc=%ld\n", rc); > > + > > + switch (rc) { > > + case H_PARAMETER: > > + case H_FUNCTION: > > + case H_STATE: > > + return -EINVAL; > > + case H_NOT_ENOUGH_RESOURCES: > > + case H_ABORTED: > > + return -ENOMEM; > > + case H_AUTHORITY: > > + return -EPERM; > > + case H_NOT_AVAILABLE: > > + return -EBUSY; > > + } > > + kvm->arch.lpid = guest_id; > > + } > > I wouldn't mind putting lpid/guest_id in different variables. guest_id > is 64-bit isn't it? LPIDR is 32. If nothing else that could cause > issues if the hypervisor does something clever with the token. I was trying to get rid of a difference between this API and the others, but I'd forgotten about the 64bit / 32bit difference. Will put it back in its own variable. > > > @@ -5573,10 +5675,14 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) > > kvm->arch.process_table = 0; > > if (kvm->arch.secure_guest) > > uv_svm_terminate(kvm->arch.lpid); > > - kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); > > + if (!kvmhv_on_papr()) > > + kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); > > } > > Would be nice to have a +ve test for the "existing" API. All we have to > do is think of a name for it. Will we go with nestedv1? Thanks, Jordan > > Thanks, > Nick >