On 10/31/2024 10:54 PM, Sean Christopherson wrote:
My other idea was have an out-param to separate the return code intended for KVM
from the return code intended for the guest. I generally dislike out-params, but
trying to juggle a return value that multiplexes guest and host values seems like
an even worse idea.
Also completely untested...
---
arch/x86/include/asm/kvm_host.h | 8 +++----
arch/x86/kvm/x86.c | 41 +++++++++++++++------------------
2 files changed, 23 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6d9f763a7bb9..226df5c56811 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2179,10 +2179,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
}
-unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
- unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- int op_64_bit, int cpl);
+int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ int op_64_bit, int cpl, unsigned long *ret);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e09daa3b157c..e9ae09f1b45b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9998,13 +9998,11 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
- unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- int op_64_bit, int cpl)
+int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ int op_64_bit, int cpl, unsigned long *ret)
{
- unsigned long ret;
-
trace_kvm_hypercall(nr, a0, a1, a2, a3);
if (!op_64_bit) {
@@ -10016,15 +10014,15 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
}
if (cpl) {
- ret = -KVM_EPERM;
+ *ret = -KVM_EPERM;
goto out;
}
- ret = -KVM_ENOSYS;
+ *ret = -KVM_ENOSYS;
switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
- ret = 0;
+ *ret = 0;
break;
case KVM_HC_KICK_CPU:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
@@ -10032,36 +10030,36 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
kvm_pv_kick_cpu_op(vcpu->kvm, a1);
kvm_sched_yield(vcpu, a1);
- ret = 0;
+ *ret = 0;
break;
#ifdef CONFIG_X86_64
case KVM_HC_CLOCK_PAIRING:
- ret = kvm_pv_clock_pairing(vcpu, a0, a1);
+ *ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break;
#endif
case KVM_HC_SEND_IPI:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
break;
- ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+ *ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
case KVM_HC_SCHED_YIELD:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;
kvm_sched_yield(vcpu, a0);
- ret = 0;
+ *ret = 0;
break;
case KVM_HC_MAP_GPA_RANGE: {
u64 gpa = a0, npages = a1, attrs = a2;
- ret = -KVM_ENOSYS;
+ *ret = -KVM_ENOSYS;
if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE))
break;
if (!PAGE_ALIGNED(gpa) || !npages ||
gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
- ret = -KVM_EINVAL;
+ *ret = -KVM_EINVAL;
break;
}
*ret needs to be set to 0 for this case before returning 0 to caller?
@@ -10080,13 +10078,13 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
return 0;
}
default:
- ret = -KVM_ENOSYS;
+ *ret = -KVM_ENOSYS;
break;
}
out:
++vcpu->stat.hypercalls;
- return ret;
+ return 1;
}
EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
@@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit;
- int cpl;
+ int cpl, r;
if (kvm_xen_hypercall_enabled(vcpu->kvm))
return kvm_xen_hypercall(vcpu);
@@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
op_64_bit = is_64_bit_hypercall(vcpu);
cpl = kvm_x86_call(get_cpl)(vcpu);
- ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
- if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
- /* MAP_GPA tosses the request to the user space. */
- return 0;
+ r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
+ if (r <= r)
A typo here.
I guess it meant to be "if (r <= ret)" ?
So the combinations will be
----------------------------------------------------------------------------
| r | ret | r <= ret |
---|-----|-----------|----------|-------------------------------------------
1 | 0 | 0 | true | return r, which is 0, exit to userspace
---|-----|-----------|----------|-------------------------------------------
2 | 1 | 0 | false | set vcpu's RAX and return back to guest
---|-----|-----------|----------|-------------------------------------------
3 | 1 | -KVM_Exxx | false | set vcpu's RAX and return back to guest
---|-----|-----------|----------|-------------------------------------------
4 | 1 | Positive | true | return r, which is 1,
| | N | | back to guest without setting vcpu's RAX
----------------------------------------------------------------------------
KVM_HC_SEND_IPI, which calls kvm_pv_send_ipi() can hit case 4, which will
return back to guest without setting RAX. It is different from the current behavior.
r can be 0 only if there is no other error detected during pre-checks.
I think it can just check whether r is 0 or not.
I.e.,
@@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit;
- int cpl;
+ int cpl, r;
if (kvm_xen_hypercall_enabled(vcpu->kvm))
return kvm_xen_hypercall(vcpu);
@@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
op_64_bit = is_64_bit_hypercall(vcpu);
cpl = kvm_x86_call(get_cpl)(vcpu);
- ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
- if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
- /* MAP_GPA tosses the request to the user space. */
- return 0;
+ r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
+ if (!r)
+ return 0;
if (!op_64_bit)
ret = (u32)ret;
+ return r;
if (!op_64_bit)
ret = (u32)ret;
base-commit: 675248928970d33f7fc8ca9851a170c98f4f1c4f