From: Mihai Donțu <mdontu@xxxxxxxxxxxxxxx> This command sets the spte access bits (rwx) for an array of guest physical addresses (through the page tracking subsystem). These GPAs, with the requested access bits, are also kept in a radix tree in order to filter out the #PF events which are of no interest to the introspection tool. The access restrictions for pages that are not visible to the guest are silently ignored by default (the tool might set restrictions for the whole memory, based on KVMI_VM_GET_MAX_GFN). Signed-off-by: Mihai Donțu <mdontu@xxxxxxxxxxxxxxx> Co-developed-by: Adalbert Lazăr <alazar@xxxxxxxxxxxxxxx> Signed-off-by: Adalbert Lazăr <alazar@xxxxxxxxxxxxxxx> --- Documentation/virt/kvm/kvmi.rst | 60 ++++++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/include/asm/kvmi_host.h | 7 + arch/x86/kvm/kvmi.c | 40 ++++ include/linux/kvmi_host.h | 3 + include/uapi/linux/kvmi.h | 23 +++ .../testing/selftests/kvm/x86_64/kvmi_test.c | 52 ++++++ virt/kvm/introspection/kvmi.c | 174 +++++++++++++++++- virt/kvm/introspection/kvmi_int.h | 12 ++ virt/kvm/introspection/kvmi_msg.c | 12 ++ 10 files changed, 384 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst index 536d6ecec026..123b2360d2e0 100644 --- a/Documentation/virt/kvm/kvmi.rst +++ b/Documentation/virt/kvm/kvmi.rst @@ -977,6 +977,66 @@ to control events for any other register will fail with -KVM_EINVAL:: * -KVM_EINVAL - the padding is not zero * -KVM_EAGAIN - the selected vCPU can't be introspected yet +23. KVMI_VM_SET_PAGE_ACCESS +--------------------------- + +:Architectures: x86 +:Versions: >= 1 +:Parameters: + +:: + + struct kvmi_vm_set_page_access { + __u16 count; + __u16 padding1; + __u32 padding2; + struct kvmi_page_access_entry entries[0]; + }; + +where:: + + struct kvmi_page_access_entry { + __u64 gpa; + __u8 access; + __u8 visible; + __u16 padding1; + __u32 padding2; + }; + + +:Returns: + +:: + + struct kvmi_error_code + +Sets the access bits (rwx) for an array of ``count`` guest physical +addresses. + +The valid access bits are:: + + KVMI_PAGE_ACCESS_R + KVMI_PAGE_ACCESS_W + KVMI_PAGE_ACCESS_X + + +The command will fail with -KVM_EINVAL if any of the specified combination +of access bits is not supported. Also, if ``visible`` is set to 1 but +the page is not visible. + +The command will try to apply all changes and return the first error if +some failed. The introspection tool should handle the rollback. + +In order to 'forget' an address, all three bits ('rwx') must be set. + +:Errors: + +* -KVM_EINVAL - the specified access bits combination is invalid +* -KVM_EINVAL - the padding is not zero +* -KVM_EINVAL - the message size is invalid +* -KVM_EAGAIN - the selected vCPU can't be introspected yet +* -KVM_ENOMEM - there is not enough memory to add the page tracking structures + Events ====== diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index acfcebce51dd..d96bf0e15ea2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -45,6 +45,8 @@ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) +#include <asm/kvmi_host.h> + #define KVM_HALT_POLL_NS_DEFAULT 200000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h index acc003403c95..98ea548c0b15 100644 --- a/arch/x86/include/asm/kvmi_host.h +++ b/arch/x86/include/asm/kvmi_host.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_KVMI_HOST_H #define _ASM_X86_KVMI_HOST_H +#include <asm/kvm_page_track.h> #include <asm/kvmi.h> struct msr_data; @@ -42,6 +43,12 @@ struct kvm_vcpu_arch_introspection { struct kvm_arch_introspection { }; +#define SLOTS_SIZE BITS_TO_LONGS(KVM_MEM_SLOTS_NUM) + +struct kvmi_arch_mem_access { + unsigned long active[KVM_PAGE_TRACK_MAX][SLOTS_SIZE]; +}; + #ifdef CONFIG_KVM_INTROSPECTION bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg); diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c index 0b1301ebafba..b233a3c5becb 100644 --- a/arch/x86/kvm/kvmi.c +++ b/arch/x86/kvm/kvmi.c @@ -1169,3 +1169,43 @@ bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr) return ret; } + +static const struct { + unsigned int allow_bit; + enum kvm_page_track_mode track_mode; +} track_modes[] = { + { KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD }, + { KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE }, + { KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC }, +}; + +void kvmi_arch_update_page_tracking(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvmi_mem_access *m) +{ + struct kvmi_arch_mem_access *arch = &m->arch; + int i; + + if (!slot) { + slot = gfn_to_memslot(kvm, m->gfn); + if (!slot) + return; + } + + for (i = 0; i < ARRAY_SIZE(track_modes); i++) { + unsigned int allow_bit = track_modes[i].allow_bit; + enum kvm_page_track_mode mode = track_modes[i].track_mode; + bool slot_tracked = test_bit(slot->id, arch->active[mode]); + + if (m->access & allow_bit) { + if (slot_tracked) { + kvm_slot_page_track_remove_page(kvm, slot, + m->gfn, mode); + clear_bit(slot->id, arch->active[mode]); + } + } else if (!slot_tracked) { + kvm_slot_page_track_add_page(kvm, slot, m->gfn, mode); + set_bit(slot->id, arch->active[mode]); + } + } +} diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h index 1fae589d9d35..11eb9b1c3c5e 100644 --- a/include/linux/kvmi_host.h +++ b/include/linux/kvmi_host.h @@ -64,6 +64,9 @@ struct kvm_introspection { atomic_t ev_seq; bool cleanup_on_unhook; + + struct radix_tree_root access_tree; + rwlock_t access_tree_lock; }; int kvmi_version(void); diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h index 2872f90ff092..dc82f192534c 100644 --- a/include/uapi/linux/kvmi.h +++ b/include/uapi/linux/kvmi.h @@ -45,6 +45,8 @@ enum { KVMI_VCPU_GET_MTRR_TYPE = 21, KVMI_VCPU_CONTROL_MSR = 22, + KVMI_VM_SET_PAGE_ACCESS = 23, + KVMI_NUM_MESSAGES }; @@ -68,6 +70,12 @@ enum { KVMI_EVENT_ACTION_CRASH = 2, }; +enum { + KVMI_PAGE_ACCESS_R = 1 << 0, + KVMI_PAGE_ACCESS_W = 1 << 1, + KVMI_PAGE_ACCESS_X = 1 << 2, +}; + struct kvmi_msg_hdr { __u16 id; __u16 size; @@ -164,6 +172,21 @@ struct kvmi_vm_get_max_gfn_reply { __u64 gfn; }; +struct kvmi_page_access_entry { + __u64 gpa; + __u8 access; + __u8 visible; + __u16 padding1; + __u32 padding2; +}; + +struct kvmi_vm_set_page_access { + __u16 count; + __u16 padding1; + __u32 padding2; + struct kvmi_page_access_entry entries[0]; +}; + struct kvmi_event { __u16 size; __u16 vcpu; diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c b/tools/testing/selftests/kvm/x86_64/kvmi_test.c index 0ddc0029e3e5..4fb109cec1b4 100644 --- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c @@ -1809,6 +1809,57 @@ static void test_cmd_vcpu_control_msr(struct kvm_vm *vm) test_invalid_control_msr(vm, msr); } +static int cmd_set_page_access(__u16 count, __u64 *gpa, __u8 *access) +{ + struct kvmi_page_access_entry *entry, *end; + struct kvmi_vm_set_page_access *cmd; + struct kvmi_msg_hdr *req; + size_t req_size; + int r; + + req_size = sizeof(*req) + sizeof(*cmd) + count * sizeof(*entry); + req = calloc(1, req_size); + + TEST_ASSERT(req, "Insufficient Memory\n"); + + cmd = (struct kvmi_vm_set_page_access *)(req + 1); + cmd->count = count; + + entry = cmd->entries; + end = cmd->entries + count; + for (; entry < end; entry++) { + entry->gpa = *gpa++; + entry->access = *access++; + } + + r = do_command(KVMI_VM_SET_PAGE_ACCESS, req, req_size, NULL, 0); + + free(req); + return r; +} + +static void set_page_access(__u64 gpa, __u8 access) +{ + int r; + + r = cmd_set_page_access(1, &gpa, &access); + TEST_ASSERT(r == 0, + "KVMI_VM_SET_PAGE_ACCESS failed, gpa 0x%llx, access 0x%x, error %d (%s)\n", + gpa, access, -r, kvm_strerror(-r)); +} + +static void test_cmd_vm_set_page_access(struct kvm_vm *vm) +{ + __u8 full_access = KVMI_PAGE_ACCESS_R | KVMI_PAGE_ACCESS_W + | KVMI_PAGE_ACCESS_X; + __u8 no_access = 0; + __u64 gpa = 0; + + set_page_access(gpa, no_access); + + set_page_access(gpa, full_access); +} + static void test_introspection(struct kvm_vm *vm) { srandom(time(0)); @@ -1841,6 +1892,7 @@ static void test_introspection(struct kvm_vm *vm) test_cmd_vcpu_get_mtrr_type(vm); test_event_descriptor(vm); test_cmd_vcpu_control_msr(vm); + test_cmd_vm_set_page_access(vm); unhook_introspection(vm); } diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c index aa8a0708cc1c..b57ad490dd06 100644 --- a/virt/kvm/introspection/kvmi.c +++ b/virt/kvm/introspection/kvmi.c @@ -21,6 +21,11 @@ static DECLARE_BITMAP(Kvmi_known_vcpu_events, KVMI_NUM_EVENTS); static struct kmem_cache *msg_cache; static struct kmem_cache *job_cache; +static struct kmem_cache *radix_cache; + +static const u8 full_access = KVMI_PAGE_ACCESS_R | + KVMI_PAGE_ACCESS_W | + KVMI_PAGE_ACCESS_X; void *kvmi_msg_alloc(void) { @@ -39,6 +44,8 @@ static void kvmi_cache_destroy(void) msg_cache = NULL; kmem_cache_destroy(job_cache); job_cache = NULL; + kmem_cache_destroy(radix_cache); + radix_cache = NULL; } static int kvmi_cache_create(void) @@ -48,8 +55,11 @@ static int kvmi_cache_create(void) job_cache = kmem_cache_create("kvmi_job", sizeof(struct kvmi_job), 0, SLAB_ACCOUNT, NULL); + radix_cache = kmem_cache_create("kvmi_radix_tree", + sizeof(struct kvmi_mem_access), + 0, SLAB_ACCOUNT, NULL); - if (!msg_cache || !job_cache) { + if (!msg_cache || !job_cache || !radix_cache) { kvmi_cache_destroy(); return -1; @@ -241,12 +251,38 @@ static void free_vcpui(struct kvm_vcpu *vcpu, bool restore_interception) kvmi_make_request(vcpu, false); } +static void kvmi_clear_mem_access(struct kvm *kvm) +{ + struct kvm_introspection *kvmi = KVMI(kvm); + struct radix_tree_iter iter; + void **slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + radix_tree_for_each_slot(slot, &kvmi->access_tree, &iter, 0) { + struct kvmi_mem_access *m = *slot; + + m->access = full_access; + kvmi_arch_update_page_tracking(kvm, NULL, m); + + radix_tree_iter_delete(&kvmi->access_tree, &iter, slot); + kmem_cache_free(radix_cache, m); + } + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + static void free_kvmi(struct kvm *kvm) { bool restore_interception = KVMI(kvm)->cleanup_on_unhook; struct kvm_vcpu *vcpu; int i; + kvmi_clear_mem_access(kvm); + kvm_for_each_vcpu(i, vcpu, kvm) free_vcpui(vcpu, restore_interception); @@ -297,6 +333,10 @@ alloc_kvmi(struct kvm *kvm, const struct kvm_introspection_hook *hook) atomic_set(&kvmi->ev_seq, 0); + INIT_RADIX_TREE(&kvmi->access_tree, + GFP_KERNEL & ~__GFP_DIRECT_RECLAIM); + rwlock_init(&kvmi->access_tree_lock); + kvm_for_each_vcpu(i, vcpu, kvm) { int err = create_vcpui(vcpu); @@ -1010,3 +1050,135 @@ bool kvmi_enter_guest(struct kvm_vcpu *vcpu) kvmi_put(vcpu->kvm); return r; } + +static struct kvmi_mem_access * +__kvmi_get_gfn_access(struct kvm_introspection *kvmi, const gfn_t gfn) +{ + return radix_tree_lookup(&kvmi->access_tree, gfn); +} + +static void kvmi_update_mem_access(struct kvm *kvm, struct kvmi_mem_access *m) +{ + struct kvm_introspection *kvmi = KVMI(kvm); + + kvmi_arch_update_page_tracking(kvm, NULL, m); + + if (m->access == full_access) { + radix_tree_delete(&kvmi->access_tree, m->gfn); + kmem_cache_free(radix_cache, m); + } +} + +static void kvmi_insert_mem_access(struct kvm *kvm, struct kvmi_mem_access *m) +{ + struct kvm_introspection *kvmi = KVMI(kvm); + + radix_tree_insert(&kvmi->access_tree, m->gfn, m); + kvmi_arch_update_page_tracking(kvm, NULL, m); +} + +static void kvmi_set_mem_access(struct kvm *kvm, struct kvmi_mem_access *m, + bool *used) +{ + struct kvm_introspection *kvmi = KVMI(kvm); + struct kvmi_mem_access *found; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + write_lock(&kvmi->access_tree_lock); + + found = __kvmi_get_gfn_access(kvmi, m->gfn); + if (found) { + found->access = m->access; + kvmi_update_mem_access(kvm, found); + } else if (m->access != full_access) { + kvmi_insert_mem_access(kvm, m); + *used = true; + } + + write_unlock(&kvmi->access_tree_lock); + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + +static int kvmi_set_gfn_access(struct kvm *kvm, gfn_t gfn, u8 access) +{ + struct kvmi_mem_access *m; + bool used = false; + int err = 0; + + m = kmem_cache_zalloc(radix_cache, GFP_KERNEL); + if (!m) + return -KVM_ENOMEM; + + m->gfn = gfn; + m->access = access; + + if (radix_tree_preload(GFP_KERNEL)) + err = -KVM_ENOMEM; + else + kvmi_set_mem_access(kvm, m, &used); + + radix_tree_preload_end(); + + if (!used) + kmem_cache_free(radix_cache, m); + + return err; +} + +static bool kvmi_is_visible_gfn(struct kvm *kvm, gfn_t gfn) +{ + bool visible; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + visible = kvm_is_visible_gfn(kvm, gfn); + srcu_read_unlock(&kvm->srcu, idx); + + return visible; +} + +static int set_page_access_entry(struct kvm_introspection *kvmi, + const struct kvmi_page_access_entry *entry) +{ + u8 unknown_bits = ~(KVMI_PAGE_ACCESS_R | KVMI_PAGE_ACCESS_W + | KVMI_PAGE_ACCESS_X); + gfn_t gfn = gpa_to_gfn(entry->gpa); + struct kvm *kvm = kvmi->kvm; + + if ((entry->access & unknown_bits) + || entry->padding1 || entry->padding2 + || entry->visible > 1) + return -KVM_EINVAL; + + if (!kvmi_is_visible_gfn(kvm, gfn)) + return entry->visible ? -KVM_EINVAL : 0; + + return kvmi_set_gfn_access(kvm, gfn, entry->access); +} + +int kvmi_cmd_set_page_access(struct kvm_introspection *kvmi, + const struct kvmi_msg_hdr *msg, + const struct kvmi_vm_set_page_access *req) +{ + const struct kvmi_page_access_entry *entry = req->entries; + const struct kvmi_page_access_entry *end = req->entries + req->count; + int ec = 0; + + if (req->padding1 || req->padding2) + return -KVM_EINVAL; + + if (msg->size < struct_size(req, entries, req->count)) + return -KVM_EINVAL; + + for (; entry < end; entry++) { + int r = set_page_access_entry(kvmi, entry); + + if (r && !ec) + ec = r; + } + + return ec; +} diff --git a/virt/kvm/introspection/kvmi_int.h b/virt/kvm/introspection/kvmi_int.h index 8f8557bd8ab0..024e7acf0dce 100644 --- a/virt/kvm/introspection/kvmi_int.h +++ b/virt/kvm/introspection/kvmi_int.h @@ -21,6 +21,12 @@ #define KVMI(kvm) ((kvm)->kvmi) #define VCPUI(vcpu) ((vcpu)->kvmi) +struct kvmi_mem_access { + gfn_t gfn; + u8 access; + struct kvmi_arch_mem_access arch; +}; + static inline bool is_event_enabled(struct kvm_vcpu *vcpu, int event) { return test_bit(event, VCPUI(vcpu)->ev_enable_mask); @@ -73,6 +79,9 @@ int kvmi_cmd_write_physical(struct kvm *kvm, u64 gpa, size_t size, int kvmi_cmd_vcpu_pause(struct kvm_vcpu *vcpu, bool wait); int kvmi_cmd_vcpu_set_registers(struct kvm_vcpu *vcpu, const struct kvm_regs *regs); +int kvmi_cmd_set_page_access(struct kvm_introspection *kvmi, + const struct kvmi_msg_hdr *msg, + const struct kvmi_vm_set_page_access *req); /* arch */ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu *vcpu); @@ -117,5 +126,8 @@ int kvmi_arch_cmd_vcpu_set_xsave(struct kvm_vcpu *vcpu, int kvmi_arch_cmd_vcpu_get_mtrr_type(struct kvm_vcpu *vcpu, u64 gpa, u8 *type); int kvmi_arch_cmd_vcpu_control_msr(struct kvm_vcpu *vcpu, const struct kvmi_vcpu_control_msr *req); +void kvmi_arch_update_page_tracking(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvmi_mem_access *m); #endif diff --git a/virt/kvm/introspection/kvmi_msg.c b/virt/kvm/introspection/kvmi_msg.c index d9cbbefb9993..f7ffb971f1dc 100644 --- a/virt/kvm/introspection/kvmi_msg.c +++ b/virt/kvm/introspection/kvmi_msg.c @@ -334,6 +334,17 @@ static int handle_vm_get_max_gfn(struct kvm_introspection *kvmi, return kvmi_msg_vm_reply(kvmi, msg, 0, &rpl, sizeof(rpl)); } +static int handle_vm_set_page_access(struct kvm_introspection *kvmi, + const struct kvmi_msg_hdr *msg, + const void *req) +{ + int ec; + + ec = kvmi_cmd_set_page_access(kvmi, msg, req); + + return kvmi_msg_vm_reply(kvmi, msg, ec, NULL, 0); +} + /* * These commands are executed by the receiving thread. */ @@ -348,6 +359,7 @@ static int(*const msg_vm[])(struct kvm_introspection *, [KVMI_VM_GET_INFO] = handle_vm_get_info, [KVMI_VM_GET_MAX_GFN] = handle_vm_get_max_gfn, [KVMI_VM_READ_PHYSICAL] = handle_vm_read_physical, + [KVMI_VM_SET_PAGE_ACCESS] = handle_vm_set_page_access, [KVMI_VM_WRITE_PHYSICAL] = handle_vm_write_physical, };