From: Mihai DONTU <mdontu@xxxxxxxxxxxxxxx> Notify KVMI on vCPU create/destroy, VM destroy events, the new ioctl-s (KVM_INTROSPECTION_HOOK, KVM_INTROSPECTION_UNHOOK) and when a KVMI command is received (KVM_REQ_INTROSPECTION). Change the x86 emulator and mmu behavior when KVMI is present (TODO: do it when active) or when dealing with a page of interest for the introspection tool. Also, the EPT AD bits feature is disabled by this patch. Signed-off-by: Adalbert Lazăr <alazar@xxxxxxxxxxxxxxx> Signed-off-by: Mihai Donțu <mdontu@xxxxxxxxxxxxxxx> Signed-off-by: Nicușor Cîțu <ncitu@xxxxxxxxxxxxxxx> Signed-off-by: Mircea Cîrjaliu <mcirjaliu@xxxxxxxxxxxxxxx> --- arch/x86/kvm/mmu.c | 13 +++++++++++-- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx.c | 4 +++- arch/x86/kvm/x86.c | 33 ++++++++++++++++++++++++++++----- virt/kvm/kvm_main.c | 34 +++++++++++++++++++++++++++++++++- 5 files changed, 77 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ed4b3c593bf5..85a980e5cc27 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -40,6 +40,7 @@ #include <linux/uaccess.h> #include <linux/hash.h> #include <linux/kern_levels.h> +#include <linux/kvmi.h> #include <asm/page.h> #include <asm/pat.h> @@ -2458,6 +2459,9 @@ static void clear_sp_write_flooding_count(u64 *spte) static unsigned int kvm_mmu_page_track_acc(struct kvm_vcpu *vcpu, gfn_t gfn, unsigned int acc) { + if (!kvmi_tracked_gfn(vcpu, gfn)) + return acc; + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD)) acc &= ~ACC_USER_MASK; if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) || @@ -5433,8 +5437,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); - return 1; + if (kvmi_tracked_gfn(vcpu, gpa_to_gfn(cr2))) { + if (kvmi_update_ad_flags(vcpu)) + return 1; + } else { + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + return 1; + } } /* diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f355860d845c..766578401d1c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) "SVM: " fmt #include <linux/kvm_host.h> +#include <linux/kvmi.h> #include "irq.h" #include "mmu.h" @@ -50,6 +51,7 @@ #include <asm/kvm_para.h> #include <asm/irq_remapping.h> #include <asm/spec-ctrl.h> +#include <asm/kvmi.h> #include <asm/virtext.h> #include "trace.h" diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fc0219336edb..e9de89bd7f1c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -36,6 +36,7 @@ #include <linux/hrtimer.h> #include <linux/frame.h> #include <linux/nospec.h> +#include <linux/kvmi.h> #include "kvm_cache_regs.h" #include "x86.h" @@ -55,6 +56,7 @@ #include <asm/mmu_context.h> #include <asm/spec-ctrl.h> #include <asm/mshyperv.h> +#include <asm/kvmi.h> #include "trace.h" #include "pmu.h" @@ -7939,7 +7941,7 @@ static __init int hardware_setup(void) !cpu_has_vmx_invept_global()) enable_ept = 0; - if (!cpu_has_vmx_ept_ad_bits() || !enable_ept) + if (!cpu_has_vmx_ept_ad_bits() || !enable_ept || kvmi_is_present()) enable_ept_ad_bits = 0; if (!cpu_has_vmx_unrestricted_guest() || !enable_ept) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e413f81d6c46..f073fe596244 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -20,6 +20,8 @@ */ #include <linux/kvm_host.h> +#include <linux/kvmi.h> +#include <uapi/linux/kvmi.h> #include "irq.h" #include "mmu.h" #include "i8254.h" @@ -3076,6 +3078,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = kvm_x86_ops->get_nested_state ? kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0; break; +#ifdef CONFIG_KVM_INTROSPECTION + case KVM_CAP_INTROSPECTION: + r = KVMI_VERSION; + break; +#endif default: break; } @@ -5314,7 +5321,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, * operation using rep will only have the initial GPA from the NPF * occurred. */ - if (vcpu->arch.gpa_available && + if (vcpu->arch.gpa_available && !kvmi_is_present() && emulator_can_use_gpa(ctxt) && (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { gpa = vcpu->arch.gpa_val; @@ -6096,7 +6103,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; spin_unlock(&vcpu->kvm->mmu_lock); - if (indirect_shadow_pages) + if (indirect_shadow_pages + && !kvmi_tracked_gfn(vcpu, gpa_to_gfn(gpa))) kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); return true; @@ -6107,7 +6115,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, * and it failed try to unshadow page and re-enter the * guest to let CPU execute the instruction. */ - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + if (!kvmi_tracked_gfn(vcpu, gpa_to_gfn(gpa))) + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); /* * If the access faults on its page table, it can not @@ -6159,6 +6168,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (!vcpu->arch.mmu->direct_map) gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + if (kvmi_tracked_gfn(vcpu, gpa_to_gfn(gpa))) + return false; + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); return true; @@ -6324,6 +6336,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, vcpu->arch.l1tf_flush_l1d = true; + kvmi_init_emulate(vcpu); + /* * Clear write_fault_to_shadow_pgtable here to ensure it is * never reused. @@ -6360,6 +6374,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; + if (!kvmi_track_emul_unimplemented(vcpu, cr2)) + return EMULATE_DONE; if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; @@ -6429,9 +6445,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, writeback = false; r = EMULATE_USER_EXIT; vcpu->arch.complete_userspace_io = complete_emulated_mmio; - } else if (r == EMULATION_RESTART) + } else if (r == EMULATION_RESTART) { + kvmi_activate_rep_complete(vcpu); goto restart; - else + } else r = EMULATE_DONE; if (writeback) { @@ -7677,6 +7694,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) */ if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) kvm_hv_process_stimers(vcpu); + + if (kvm_check_request(KVM_REQ_INTROSPECTION, vcpu)) + kvmi_handle_requests(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -9492,6 +9512,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) (vcpu->arch.smi_pending && !is_smm(vcpu))) return true; + if (kvm_test_request(KVM_REQ_INTROSPECTION, vcpu)) + return true; + if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || kvm_guest_apic_has_interrupt(vcpu))) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2679e476b6c3..8db2bf9ca0c2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,7 @@ #include <linux/slab.h> #include <linux/sort.h> #include <linux/bsearch.h> +#include <linux/kvmi.h> #include <asm/processor.h> #include <asm/io.h> @@ -315,6 +316,11 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) r = kvm_arch_vcpu_init(vcpu); if (r < 0) goto fail_free_run; + + r = kvmi_vcpu_init(vcpu); + if (r < 0) + goto fail_free_run; + return 0; fail_free_run: @@ -332,6 +338,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) * descriptors are already gone. */ put_pid(rcu_dereference_protected(vcpu->pid, 1)); + kvmi_vcpu_uninit(vcpu); kvm_arch_vcpu_uninit(vcpu); free_page((unsigned long)vcpu->run); } @@ -681,6 +688,8 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; + kvmi_create_vm(kvm); + spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); @@ -726,6 +735,7 @@ static void kvm_destroy_vm(struct kvm *kvm) int i; struct mm_struct *mm = kvm->mm; + kvmi_destroy_vm(kvm); kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); @@ -1476,7 +1486,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. - */ + */ kvm_get_pfn(pfn); *p_pfn = pfn; @@ -3133,6 +3143,24 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; +#ifdef CONFIG_KVM_INTROSPECTION + case KVM_INTROSPECTION_HOOK: { + struct kvm_introspection i; + + r = -EFAULT; + if (copy_from_user(&i, argp, sizeof(i))) + goto out; + + r = kvmi_hook(kvm, &i); + break; + } + case KVM_INTROSPECTION_UNHOOK: + r = -EFAULT; + if (kvmi_notify_unhook(kvm)) + goto out; + r = 0; + break; +#endif /* CONFIG_KVM_INTROSPECTION */ default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -4075,6 +4103,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_vfio_ops_init(); WARN_ON(r); + r = kvmi_init(); + WARN_ON(r); + return 0; out_unreg: @@ -4100,6 +4131,7 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { + kvmi_uninit(); debugfs_remove_recursive(kvm_debugfs_dir); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache);