On 2/25/2025 7:24 AM, Sean Christopherson wrote:
On Tue, Feb 18, 2025, Xin Li wrote:
On 9/30/2024 10:00 PM, Xin Li (Intel) wrote:
While I'm waiting for the CET patches for native Linux and KVM to be
upstreamed, do you think if it's worth it for you to take the cleanup
and some of the preparation patches first?
Yes, definitely. I'll go through the series and see what I can grab now.
I planned to do a rebase and fix the conflicts due to the reordering.
But I'm more than happy you do a first round.
BTW, if you plan to take
KVM: VMX: Virtualize nested exception tracking
Then as Gao Chao suggested we also need a patch to Save/restore the
nested flag of an exception (obviously a corresponding host patch is
needed). Following is a version that I have.
Thanks!
Xin
---
KVM: x86: Save/restore the nested flag of an exception
Save/restore the nested flag of an exception during VM save/restore
and live migration to ensure a correct event stack level is chosen
when a nested exception is injected through FRED event delivery.
Signed-off-by: Xin Li (Intel) <xin@xxxxxxxxx>
---
Change since v3:
* Add live migration support for exception nested flag (Chao Gao).
---
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 2b52eb77e29c..ed171fa6926f 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1180,6 +1180,10 @@ The following bits are defined in the flags field:
fields contain a valid state. This bit will be set whenever
KVM_CAP_EXCEPTION_PAYLOAD is enabled.
+- KVM_VCPUEVENT_VALID_NESTED_FLAG may be set to inform that the
+ exception is a nested exception. This bit will be set whenever
+ KVM_CAP_EXCEPTION_NESTED_FLAG is enabled.
+
- KVM_VCPUEVENT_VALID_TRIPLE_FAULT may be set to signal that the
triple_fault_pending field contains a valid state. This bit will
be set whenever KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled.
@@ -1279,6 +1283,10 @@ can be set in the flags field to signal that the
exception_has_payload, exception_payload, and exception.pending fields
contain a valid state and shall be written into the VCPU.
+If KVM_CAP_EXCEPTION_NESTED_FLAG is enabled,
KVM_VCPUEVENT_VALID_NESTED_FLAG
+can be set in the flags field to inform that the exception is a nested
+exception and exception_is_nested shall be written into the VCPU.
+
If KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled,
KVM_VCPUEVENT_VALID_TRIPLE_FAULT
can be set in flags field to signal that the triple_fault field contains
a valid state and shall be written into the VCPU.
@@ -8258,6 +8266,17 @@ KVM exits with the register state of either the
L1 or L2 guest
depending on which executed at the time of an exit. Userspace must
take care to differentiate between these cases.
+7.37 KVM_CAP_EXCEPTION_NESTED_FLAG
+----------------------------------
+
+:Architectures: x86
+:Parameters: args[0] whether feature should be enabled or not
+
+With this capability enabled, an exception is save/restored with the
+additional information of whether it was nested or not. FRED event
+delivery uses this information to ensure a correct event stack level
+is chosen when a VM entry injects a nested exception.
+
8. Other capabilities.
======================
diff --git a/arch/x86/include/asm/kvm_host.h
b/arch/x86/include/asm/kvm_host.h
index 4cfe1b8f4547..ede2319cee45 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1441,6 +1441,7 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+ bool exception_nested_flag_enabled;
bool triple_fault_event;
diff --git a/arch/x86/include/uapi/asm/kvm.h
b/arch/x86/include/uapi/asm/kvm.h
index 9e75da97bce0..f5167e3a7d0f 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -326,6 +326,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010
#define KVM_VCPUEVENT_VALID_TRIPLE_FAULT 0x00000020
+#define KVM_VCPUEVENT_VALID_NESTED_FLAG 0x00000040
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -363,7 +364,8 @@ struct kvm_vcpu_events {
struct {
__u8 pending;
} triple_fault;
- __u8 reserved[26];
+ __u8 reserved[25];
+ __u8 exception_is_nested;
__u8 exception_has_payload;
__u64 exception_payload;
};
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 01c945b27f01..80a9fa6ab720 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4675,6 +4675,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm,
long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_EXCEPTION_NESTED_FLAG:
case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_LAST_CPU:
@@ -5401,6 +5402,7 @@ static void
kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->exception.error_code = ex->error_code;
events->exception_has_payload = ex->has_payload;
events->exception_payload = ex->payload;
+ events->exception_is_nested = ex->nested;
events->interrupt.injected =
vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
@@ -5426,6 +5428,8 @@ static void
kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SMM);
if (vcpu->kvm->arch.exception_payload_enabled)
events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
+ if (vcpu->kvm->arch.exception_nested_flag_enabled)
+ events->flags |= KVM_VCPUEVENT_VALID_NESTED_FLAG;
if (vcpu->kvm->arch.triple_fault_event) {
events->triple_fault.pending =
kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu);
events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
@@ -5440,7 +5444,8 @@ static int
kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SHADOW
| KVM_VCPUEVENT_VALID_SMM
| KVM_VCPUEVENT_VALID_PAYLOAD
- | KVM_VCPUEVENT_VALID_TRIPLE_FAULT))
+ | KVM_VCPUEVENT_VALID_TRIPLE_FAULT
+ | KVM_VCPUEVENT_VALID_NESTED_FLAG))
return -EINVAL;
if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
@@ -5455,6 +5460,13 @@ static int
kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
events->exception_has_payload = 0;
}
+ if (events->flags & KVM_VCPUEVENT_VALID_NESTED_FLAG) {
+ if (!vcpu->kvm->arch.exception_nested_flag_enabled)
+ return -EINVAL;
+ } else {
+ events->exception_is_nested = 0;
+ }
+
if ((events->exception.injected || events->exception.pending) &&
(events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
return -EINVAL;
@@ -5486,6 +5498,7 @@ static int
kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.exception.error_code = events->exception.error_code;
vcpu->arch.exception.has_payload = events->exception_has_payload;
vcpu->arch.exception.payload = events->exception_payload;
+ vcpu->arch.exception.nested = events->exception_is_nested;
vcpu->arch.interrupt.injected = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
@@ -6609,6 +6622,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.exception_payload_enabled = cap->args[0];
r = 0;
break;
+ case KVM_CAP_EXCEPTION_NESTED_FLAG:
+ kvm->arch.exception_nested_flag_enabled = cap->args[0];
+ r = 0;
+ break;
case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
kvm->arch.triple_fault_event = cap->args[0];
r = 0;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 45e6d8fca9b9..b79f3c10a887 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,6 +929,7 @@ struct kvm_enable_cap {
#define KVM_CAP_PRE_FAULT_MEMORY 236
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_EXCEPTION_NESTED_FLAG 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;
Thanks!
Top of my mind are:
KVM: x86: Use a dedicated flow for queueing re-injected exceptions
KVM: VMX: Don't modify guest XFD_ERR if CR0.TS=1
KVM: VMX: Pass XFD_ERR as pseudo-payload when injecting #NM
KVM: nVMX: Add a prerequisite to existence of VMCS fields
KVM: nVMX: Add a prerequisite to SHADOW_FIELD_R[OW] macros
Then specially, the nested exception tracking patch seems a good one as
Chao Gao suggested to decouple the nested tracking from FRED:
KVM: VMX: Virtualize nested exception tracking
Lastly the patches to add support for the secondary VM exit controls might
go in early as well:
KVM: VMX: Add support for the secondary VM exit controls
KVM: nVMX: Add support for the secondary VM exit controls
But if you don't like the idea please just let me know.
Thanks!
Xin