This event is sent by the vCPU thread as a response to the KVMI_VCPU_PAUSE command, but it has a lower priority, being sent after any other introspection event and when no other introspection command is queued. The number of KVMI_EVENT_PAUSE_VCPU will match the number of successful KVMI_VCPU_PAUSE commands. Signed-off-by: Adalbert Lazăr <alazar@xxxxxxxxxxxxxxx> --- Documentation/virt/kvm/kvmi.rst | 22 ++- arch/x86/kvm/kvmi.c | 81 +++++++++ include/linux/kvmi_host.h | 11 ++ include/uapi/linux/kvmi.h | 13 ++ .../testing/selftests/kvm/x86_64/kvmi_test.c | 46 ++++++ virt/kvm/introspection/kvmi.c | 24 ++- virt/kvm/introspection/kvmi_int.h | 3 + virt/kvm/introspection/kvmi_msg.c | 155 +++++++++++++++++- 8 files changed, 351 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst index 502ee06d5e77..06c1cb34209e 100644 --- a/Documentation/virt/kvm/kvmi.rst +++ b/Documentation/virt/kvm/kvmi.rst @@ -563,6 +563,25 @@ On x86 the structure looks like this:: It contains information about the vCPU state at the time of the event. +An event reply begins with two common structures:: + + struct kvmi_vcpu_hdr; + struct kvmi_event_reply { + __u8 action; + __u8 event; + __u16 padding1; + __u32 padding2; + }; + +All events accept the KVMI_EVENT_ACTION_CRASH action, which stops the +guest ungracefully, but as soon as possible. + +Most of the events accept the KVMI_EVENT_ACTION_CONTINUE action, which +lets the instruction that caused the event to continue. + +Some of the events accept the KVMI_EVENT_ACTION_RETRY action, to continue +by re-entering in guest. + Specific event data can follow these common structures. 1. KVMI_EVENT_UNHOOK @@ -604,7 +623,8 @@ operation can proceed). struct kvmi_vcpu_hdr; struct kvmi_event_reply; -This event is sent in response to a *KVMI_VCPU_PAUSE* command. +This event is sent in response to a *KVMI_VCPU_PAUSE* command and +cannot be controlled with *KVMI_VCPU_CONTROL_EVENTS*. Because it has a low priority, it will be sent after any other vCPU introspection event and when no other vCPU introspection command is queued. diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c index cf7bfff6c8c5..ce7e2d5f2ab4 100644 --- a/arch/x86/kvm/kvmi.c +++ b/arch/x86/kvm/kvmi.c @@ -5,8 +5,89 @@ * Copyright (C) 2019-2020 Bitdefender S.R.L. */ +#include "linux/kvm_host.h" +#include "x86.h" #include "../../../virt/kvm/introspection/kvmi_int.h" +static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu, + const struct kvm_sregs *sregs) +{ + unsigned int mode = 0; + + if (is_long_mode((struct kvm_vcpu *) vcpu)) { + if (sregs->cs.l) + mode = 8; + else if (!sregs->cs.db) + mode = 2; + else + mode = 4; + } else if (sregs->cr0 & X86_CR0_PE) { + if (!sregs->cs.db) + mode = 2; + else + mode = 4; + } else if (!sregs->cs.db) { + mode = 2; + } else { + mode = 4; + } + + return mode; +} + +static void kvmi_get_msrs(struct kvm_vcpu *vcpu, struct kvmi_event_arch *event) +{ + struct msr_data msr; + + msr.host_initiated = true; + + msr.index = MSR_IA32_SYSENTER_CS; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.sysenter_cs = msr.data; + + msr.index = MSR_IA32_SYSENTER_ESP; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.sysenter_esp = msr.data; + + msr.index = MSR_IA32_SYSENTER_EIP; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.sysenter_eip = msr.data; + + msr.index = MSR_EFER; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.efer = msr.data; + + msr.index = MSR_STAR; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.star = msr.data; + + msr.index = MSR_LSTAR; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.lstar = msr.data; + + msr.index = MSR_CSTAR; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.cstar = msr.data; + + msr.index = MSR_IA32_CR_PAT; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.pat = msr.data; + + msr.index = MSR_KERNEL_GS_BASE; + kvm_x86_ops.get_msr(vcpu, &msr); + event->msrs.shadow_gs = msr.data; +} + +void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev) +{ + struct kvmi_event_arch *event = &ev->arch; + + kvm_arch_vcpu_get_regs(vcpu, &event->regs); + kvm_arch_vcpu_get_sregs(vcpu, &event->sregs); + ev->arch.mode = kvmi_vcpu_mode(vcpu, &event->sregs); + kvmi_get_msrs(vcpu, event); +} + int kvmi_arch_cmd_vcpu_get_info(struct kvm_vcpu *vcpu, struct kvmi_vcpu_get_info_reply *rpl) { diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h index fdb8ce6fe6a5..a87f0322c584 100644 --- a/include/linux/kvmi_host.h +++ b/include/linux/kvmi_host.h @@ -6,6 +6,14 @@ #include <asm/kvmi_host.h> +struct kvmi_vcpu_reply { + int error; + int action; + u32 seq; + void *data; + size_t size; +}; + struct kvmi_job { struct list_head link; void *ctx; @@ -20,6 +28,9 @@ struct kvm_vcpu_introspection { spinlock_t job_lock; atomic_t pause_requests; + + struct kvmi_vcpu_reply reply; + bool waiting_for_reply; }; struct kvm_introspection { diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h index 3ded22020bef..5a5b01df7e3e 100644 --- a/include/uapi/linux/kvmi.h +++ b/include/uapi/linux/kvmi.h @@ -38,6 +38,12 @@ enum { KVMI_NUM_EVENTS }; +enum { + KVMI_EVENT_ACTION_CONTINUE = 0, + KVMI_EVENT_ACTION_RETRY = 1, + KVMI_EVENT_ACTION_CRASH = 2, +}; + struct kvmi_msg_hdr { __u16 id; __u16 size; @@ -124,4 +130,11 @@ struct kvmi_event { struct kvmi_event_arch arch; }; +struct kvmi_event_reply { + __u8 action; + __u8 event; + __u16 padding1; + __u32 padding2; +}; + #endif /* _UAPI__LINUX_KVMI_H */ diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c b/tools/testing/selftests/kvm/x86_64/kvmi_test.c index 0df890b4b440..5c5c5018832d 100644 --- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c @@ -34,6 +34,12 @@ static vm_paddr_t test_gpa; static uint8_t test_write_pattern; static int page_size; +struct vcpu_reply { + struct kvmi_msg_hdr hdr; + struct kvmi_vcpu_hdr vcpu_hdr; + struct kvmi_event_reply reply; +}; + struct vcpu_worker_data { struct kvm_vm *vm; int vcpu_id; @@ -772,14 +778,54 @@ static void pause_vcpu(void) cmd_vcpu_pause(1, 0, 0); } +static void reply_to_event(struct kvmi_msg_hdr *ev_hdr, struct kvmi_event *ev, + __u8 action, struct vcpu_reply *rpl, size_t rpl_size) +{ + ssize_t r; + + rpl->hdr.id = ev_hdr->id; + rpl->hdr.seq = ev_hdr->seq; + rpl->hdr.size = rpl_size - sizeof(rpl->hdr); + + rpl->vcpu_hdr.vcpu = ev->vcpu; + + rpl->reply.action = action; + rpl->reply.event = ev->event; + + r = send(Userspace_socket, rpl, rpl_size, 0); + TEST_ASSERT(r == rpl_size, + "send() failed, sending %zd, result %zd, errno %d (%s)\n", + rpl_size, r, errno, strerror(errno)); +} + +static void discard_pause_event(struct kvm_vm *vm) +{ + struct vcpu_worker_data data = {.vm = vm, .vcpu_id = VCPU_ID}; + struct vcpu_reply rpl = {}; + struct kvmi_msg_hdr hdr; + pthread_t vcpu_thread; + struct kvmi_event ev; + + vcpu_thread = start_vcpu_worker(&data); + + receive_event(&hdr, &ev, sizeof(ev), KVMI_EVENT_PAUSE_VCPU); + + reply_to_event(&hdr, &ev, KVMI_EVENT_ACTION_CONTINUE, + &rpl, sizeof(rpl)); + + stop_vcpu_worker(vcpu_thread, &data); +} + static void test_pause(struct kvm_vm *vm) { __u8 no_wait = 0, wait = 1, wait_inval = 2; __u8 padding = 1, no_padding = 0; pause_vcpu(); + discard_pause_event(vm); cmd_vcpu_pause(wait, no_padding, 0); + discard_pause_event(vm); cmd_vcpu_pause(wait_inval, no_padding, -KVM_EINVAL); cmd_vcpu_pause(no_wait, padding, -KVM_EINVAL); diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c index a704e05b3184..02a866ca8d8c 100644 --- a/virt/kvm/introspection/kvmi.c +++ b/virt/kvm/introspection/kvmi.c @@ -358,6 +358,7 @@ static void kvmi_job_release_vcpu(struct kvm_vcpu *vcpu, void *ctx) struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu); atomic_set(&vcpui->pause_requests, 0); + vcpui->waiting_for_reply = false; } static void kvmi_release_vcpus(struct kvm *kvm) @@ -743,12 +744,33 @@ void kvmi_run_jobs(struct kvm_vcpu *vcpu) } } +static void kvmi_handle_unsupported_event_action(struct kvm *kvm) +{ + kvmi_sock_shutdown(KVMI(kvm)); +} + +void kvmi_handle_common_event_actions(struct kvm *kvm, u32 action) +{ + switch (action) { + default: + kvmi_handle_unsupported_event_action(kvm); + } +} + static void kvmi_vcpu_pause_event(struct kvm_vcpu *vcpu) { struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu); + u32 action; atomic_dec(&vcpui->pause_requests); - /* to be implemented */ + + action = kvmi_msg_send_vcpu_pause(vcpu); + switch (action) { + case KVMI_EVENT_ACTION_CONTINUE: + break; + default: + kvmi_handle_common_event_actions(vcpu->kvm, action); + } } void kvmi_handle_requests(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/introspection/kvmi_int.h b/virt/kvm/introspection/kvmi_int.h index cb99cb3db396..f73596032883 100644 --- a/virt/kvm/introspection/kvmi_int.h +++ b/virt/kvm/introspection/kvmi_int.h @@ -27,6 +27,7 @@ void kvmi_sock_shutdown(struct kvm_introspection *kvmi); void kvmi_sock_put(struct kvm_introspection *kvmi); bool kvmi_msg_process(struct kvm_introspection *kvmi); int kvmi_msg_send_unhook(struct kvm_introspection *kvmi); +u32 kvmi_msg_send_vcpu_pause(struct kvm_vcpu *vcpu); /* kvmi.c */ void *kvmi_msg_alloc(void); @@ -37,6 +38,7 @@ bool kvmi_is_known_vm_event(u8 id); int kvmi_add_job(struct kvm_vcpu *vcpu, void (*fct)(struct kvm_vcpu *vcpu, void *ctx), void *ctx, void (*free_fct)(void *ctx)); +void kvmi_run_jobs(struct kvm_vcpu *vcpu); int kvmi_cmd_vm_control_events(struct kvm_introspection *kvmi, unsigned int event_id, bool enable); int kvmi_cmd_read_physical(struct kvm *kvm, u64 gpa, size_t size, @@ -51,5 +53,6 @@ int kvmi_cmd_vcpu_pause(struct kvm_vcpu *vcpu, bool wait); /* arch */ int kvmi_arch_cmd_vcpu_get_info(struct kvm_vcpu *vcpu, struct kvmi_vcpu_get_info_reply *rpl); +void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev); #endif diff --git a/virt/kvm/introspection/kvmi_msg.c b/virt/kvm/introspection/kvmi_msg.c index 1adec838cddd..1dcd3db75ff1 100644 --- a/virt/kvm/introspection/kvmi_msg.c +++ b/virt/kvm/introspection/kvmi_msg.c @@ -337,6 +337,66 @@ static int handle_vcpu_get_info(const struct kvmi_vcpu_msg_job *job, return kvmi_msg_vcpu_reply(job, msg, 0, &rpl, sizeof(rpl)); } +static int check_event_reply(const struct kvmi_msg_hdr *msg, + const struct kvmi_event_reply *reply, + const struct kvmi_vcpu_reply *expected, + u8 *action, size_t *received) +{ + size_t msg_size, common, event_size; + int err = -EINVAL; + + if (unlikely(msg->seq != expected->seq)) + return err; + + msg_size = msg->size; + common = sizeof(struct kvmi_vcpu_hdr) + sizeof(*reply); + + if (check_sub_overflow(msg_size, common, &event_size)) + return err; + + if (unlikely(event_size > expected->size)) + return err; + + if (unlikely(reply->padding1 || reply->padding2)) + return err; + + *received = event_size; + *action = reply->action; + return 0; +} + +static int handle_vcpu_event_reply(const struct kvmi_vcpu_msg_job *job, + const struct kvmi_msg_hdr *msg, + const void *rpl) +{ + struct kvm_vcpu_introspection *vcpui = VCPUI(job->vcpu); + struct kvmi_vcpu_reply *expected = &vcpui->reply; + const struct kvmi_event_reply *reply = rpl; + const void *reply_data = reply + 1; + size_t useful, received; + u8 action; + + expected->error = check_event_reply(msg, reply, expected, &action, + &received); + if (unlikely(expected->error)) + goto out; + + useful = min(received, expected->size); + if (useful) + memcpy(expected->data, reply_data, useful); + + if (expected->size > useful) + memset((char *)expected->data + useful, 0, + expected->size - useful); + + expected->action = action; + expected->error = 0; + +out: + vcpui->waiting_for_reply = false; + return expected->error; +} + /* * These functions are executed from the vCPU thread. The receiving thread * passes the messages using a newly allocated 'struct kvmi_vcpu_msg_job' @@ -345,6 +405,7 @@ static int handle_vcpu_get_info(const struct kvmi_vcpu_msg_job *job, */ static int(*const msg_vcpu[])(const struct kvmi_vcpu_msg_job *, const struct kvmi_msg_hdr *, const void *) = { + [KVMI_EVENT] = handle_vcpu_event_reply, [KVMI_VCPU_GET_INFO] = handle_vcpu_get_info, }; @@ -430,7 +491,7 @@ static int kvmi_msg_do_vm_cmd(struct kvm_introspection *kvmi, static bool is_message_allowed(struct kvm_introspection *kvmi, u16 id) { - return kvmi_is_command_allowed(kvmi, id); + return id == KVMI_EVENT || kvmi_is_command_allowed(kvmi, id); } static int kvmi_msg_vm_reply_ec(struct kvm_introspection *kvmi, @@ -450,7 +511,8 @@ static int kvmi_msg_handle_vm_cmd(struct kvm_introspection *kvmi, static bool vcpu_can_handle_messages(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state != KVM_MP_STATE_UNINITIALIZED; + return VCPUI(vcpu)->waiting_for_reply + || vcpu->arch.mp_state != KVM_MP_STATE_UNINITIALIZED; } static int kvmi_get_vcpu_if_ready(struct kvm_introspection *kvmi, @@ -554,6 +616,13 @@ static void kvmi_setup_event_common(struct kvmi_event *ev, u32 ev_id, ev->size = sizeof(*ev); } +static void kvmi_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev, + u32 ev_id) +{ + kvmi_setup_event_common(ev, ev_id, kvm_vcpu_get_idx(vcpu)); + kvmi_arch_setup_event(vcpu, ev); +} + int kvmi_msg_send_unhook(struct kvm_introspection *kvmi) { struct kvmi_msg_hdr hdr; @@ -570,3 +639,85 @@ int kvmi_msg_send_unhook(struct kvm_introspection *kvmi) return kvmi_sock_write(kvmi, vec, n, msg_size); } + +static int kvmi_wait_for_reply(struct kvm_vcpu *vcpu) +{ + struct rcuwait *waitp = kvm_arch_vcpu_get_wait(vcpu); + struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu); + int err = 0; + + while (vcpui->waiting_for_reply && !err) { + kvmi_run_jobs(vcpu); + + err = rcuwait_wait_event(waitp, + !vcpui->waiting_for_reply || + !list_empty(&vcpui->job_list), + TASK_KILLABLE); + } + + return err; +} + +static void kvmi_setup_vcpu_reply(struct kvm_vcpu_introspection *vcpui, + u32 event_seq, void *rpl, size_t rpl_size) +{ + memset(&vcpui->reply, 0, sizeof(vcpui->reply)); + + vcpui->reply.seq = event_seq; + vcpui->reply.data = rpl; + vcpui->reply.size = rpl_size; + vcpui->reply.error = -EINTR; + vcpui->waiting_for_reply = true; +} + +static int kvmi_send_event(struct kvm_vcpu *vcpu, u32 ev_id, + void *ev, size_t ev_size, + void *rpl, size_t rpl_size, int *action) +{ + struct kvmi_msg_hdr hdr; + struct kvmi_event common; + struct kvec vec[] = { + {.iov_base = &hdr, .iov_len = sizeof(hdr) }, + {.iov_base = &common, .iov_len = sizeof(common)}, + {.iov_base = ev, .iov_len = ev_size }, + }; + size_t msg_size = sizeof(hdr) + sizeof(common) + ev_size; + size_t n = ARRAY_SIZE(vec) - (ev_size == 0 ? 1 : 0); + struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu); + struct kvm_introspection *kvmi = KVMI(vcpu->kvm); + int err; + + kvmi_setup_event_msg_hdr(kvmi, &hdr, msg_size); + kvmi_setup_event(vcpu, &common, ev_id); + kvmi_setup_vcpu_reply(vcpui, hdr.seq, rpl, rpl_size); + + err = kvmi_sock_write(kvmi, vec, n, msg_size); + if (err) + goto out; + + err = kvmi_wait_for_reply(vcpu); + if (err) + goto out; + + err = vcpui->reply.error; + + if (!err) + *action = vcpui->reply.action; + +out: + if (err) + kvmi_sock_shutdown(kvmi); + return err; +} + +u32 kvmi_msg_send_vcpu_pause(struct kvm_vcpu *vcpu) +{ + int err, action; + + err = kvmi_send_event(vcpu, KVMI_EVENT_PAUSE_VCPU, NULL, 0, + NULL, 0, &action); + if (err) + return KVMI_EVENT_ACTION_CONTINUE; + + return action; +}