/*
@@ -22,6 +25,13 @@ struct kvm_page_track_notifier_head {
struct kvm_page_track_notifier_node {
struct hlist_node node;
+ bool (*track_preread)(struct kvm_vcpu *vcpu, gpa_t gpa, u8 *new,
+ int bytes,
+ struct kvm_page_track_notifier_node *node,
+ bool *data_ready);
+ bool (*track_prewrite)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes,
+ struct kvm_page_track_notifier_node *node);
/*
* It is called when guest is writing the write-tracked page
* and write emulation is finished at that time.
@@ -34,6 +44,11 @@ struct kvm_page_track_notifier_node {
*/
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes, struct kvm_page_track_notifier_node *node);
+ bool (*track_preexec)(struct kvm_vcpu *vcpu, gpa_t gpa,
+ struct kvm_page_track_notifier_node *node);
+ void (*track_create_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages,
+ struct kvm_page_track_notifier_node *node);
/*
* It is called when memory slot is being moved or removed
* users can drop write-protection for the pages in that memory slot
@@ -51,7 +66,7 @@ void kvm_page_track_cleanup(struct kvm *kvm);
void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages);
void kvm_slot_page_track_add_page(struct kvm *kvm,
@@ -69,7 +84,12 @@ kvm_page_track_register_notifier(struct kvm *kvm,
void
kvm_page_track_unregister_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n);
+bool kvm_page_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, u8 *new,
+ int bytes, bool *data_ready);
+bool kvm_page_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes);
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
+bool kvm_page_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa);
void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 55fcb0292724..19dc17b00db2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1014,9 +1014,13 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
slot = __gfn_to_memslot(slots, gfn);
/* the non-leaf shadow pages are keeping readonly. */
- if (sp->role.level > PT_PAGE_TABLE_LEVEL)
- return kvm_slot_page_track_add_page(kvm, slot, gfn,
- KVM_PAGE_TRACK_WRITE);
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL) {
+ kvm_slot_page_track_add_page(kvm, slot, gfn,
+ KVM_PAGE_TRACK_PREWRITE);
+ kvm_slot_page_track_add_page(kvm, slot, gfn,
+ KVM_PAGE_TRACK_WRITE);
+ return;
+ }
kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
@@ -1031,9 +1035,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
gfn = sp->gfn;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
- if (sp->role.level > PT_PAGE_TABLE_LEVEL)
- return kvm_slot_page_track_remove_page(kvm, slot, gfn,
- KVM_PAGE_TRACK_WRITE);
+ if (sp->role.level > PT_PAGE_TABLE_LEVEL) {
+ kvm_slot_page_track_remove_page(kvm, slot, gfn,
+ KVM_PAGE_TRACK_PREWRITE);
+ kvm_slot_page_track_remove_page(kvm, slot, gfn,
+ KVM_PAGE_TRACK_WRITE);
+ return;
+ }
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
@@ -1416,6 +1424,29 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
return mmu_spte_update(sptep, spte);
}
+static bool spte_read_protect(u64 *sptep)
+{
+ u64 spte = *sptep;
+
+ rmap_printk("rmap_read_protect: spte %p %llx\n", sptep, *sptep);
+
+ /* TODO: verify if the CPU supports EPT-execute-only */
+ spte = spte & ~(PT_WRITABLE_MASK | PT_PRESENT_MASK);
+
+ return mmu_spte_update(sptep, spte);
+}
+
+static bool spte_exec_protect(u64 *sptep, bool pt_protect)
+{
+ u64 spte = *sptep;
+
+ rmap_printk("rmap_exec_protect: spte %p %llx\n", sptep, *sptep);
+
+ spte = spte & ~PT_USER_MASK;
+
+ return mmu_spte_update(sptep, spte);
+}
+
static bool __rmap_write_protect(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
bool pt_protect)
@@ -1430,6 +1461,34 @@ static bool __rmap_write_protect(struct kvm *kvm,
return flush;
}
+static bool __rmap_read_protect(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
+ bool pt_protect)
+{
+ u64 *sptep;
+ struct rmap_iterator iter;
+ bool flush = false;
+
+ for_each_rmap_spte(rmap_head, &iter, sptep)
+ flush |= spte_read_protect(sptep);
+
+ return flush;
+}
+
+static bool __rmap_exec_protect(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
+ bool pt_protect)
+{
+ u64 *sptep;
+ struct rmap_iterator iter;
+ bool flush = false;
+
+ for_each_rmap_spte(rmap_head, &iter, sptep)
+ flush |= spte_exec_protect(sptep, pt_protect);
+
+ return flush;
+}
+
static bool spte_clear_dirty(u64 *sptep)
{
u64 spte = *sptep;
@@ -1600,6 +1659,36 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
return write_protected;
}
+bool kvm_mmu_slot_gfn_read_protect(struct kvm *kvm,
+ struct kvm_memory_slot *slot, u64 gfn)
+{
+ struct kvm_rmap_head *rmap_head;
+ int i;
+ bool read_protected = false;
+
+ for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+ rmap_head = __gfn_to_rmap(gfn, i, slot);
+ read_protected |= __rmap_read_protect(kvm, rmap_head, true);
+ }
+
+ return read_protected;
+}
+
+bool kvm_mmu_slot_gfn_exec_protect(struct kvm *kvm,
+ struct kvm_memory_slot *slot, u64 gfn)
+{
+ struct kvm_rmap_head *rmap_head;
+ int i;
+ bool exec_protected = false;
+
+ for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+ rmap_head = __gfn_to_rmap(gfn, i, slot);
+ exec_protected |= __rmap_exec_protect(kvm, rmap_head, true);
+ }
+
+ return exec_protected;
+}
+
static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{
struct kvm_memory_slot *slot;
@@ -2688,7 +2777,8 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
{
struct kvm_mmu_page *sp;
- if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+ if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) ||
+ kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
return true;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
@@ -2953,6 +3043,21 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
+static unsigned int kvm_mmu_page_track_acc(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ unsigned int acc = ACC_ALL;
+
+ if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD))
+ acc &= ~ACC_USER_MASK;
+ if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) ||
+ kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+ acc &= ~ACC_WRITE_MASK;
+ if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREEXEC))
+ acc &= ~ACC_EXEC_MASK;
+
+ return acc;
+}
+
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
{
@@ -2966,7 +3071,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+ unsigned int acc = kvm_mmu_page_track_acc(vcpu, gfn);
+
+ emulate = mmu_set_spte(vcpu, iterator.sptep, acc,
write, level, gfn, pfn, prefault,
map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
@@ -3713,15 +3820,21 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
if (unlikely(error_code & PFERR_RSVD_MASK))
return false;
- if (!(error_code & PFERR_PRESENT_MASK) ||
- !(error_code & PFERR_WRITE_MASK))
+ if (!(error_code & PFERR_PRESENT_MASK))
return false;
/*
- * guest is writing the page which is write tracked which can
+ * guest is reading/writing/fetching the page which is
+ * read/write/execute tracked which can
* not be fixed by page fault handler.
*/
- if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+ if (((error_code & PFERR_USER_MASK)
+ && kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD))
+ || ((error_code & PFERR_WRITE_MASK)
+ && (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE)
+ || kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)))
+ || ((error_code & PFERR_FETCH_MASK)
+ && kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREEXEC)))
return true;
return false;
@@ -4942,7 +5055,11 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* and resume the guest.
*/
if (vcpu->arch.mmu.direct_map &&
- (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
+ (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE &&
+ !kvm_page_track_is_active(vcpu, gpa_to_gfn(cr2), KVM_PAGE_TRACK_PREREAD) &&
+ !kvm_page_track_is_active(vcpu, gpa_to_gfn(cr2), KVM_PAGE_TRACK_PREWRITE) &&
+ !kvm_page_track_is_active(vcpu, gpa_to_gfn(cr2), KVM_PAGE_TRACK_WRITE) &&
+ !kvm_page_track_is_active(vcpu, gpa_to_gfn(cr2), KVM_PAGE_TRACK_PREEXEC)) {
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
return 1;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5b408c0ad612..57c947752490 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -193,5 +193,9 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
+bool kvm_mmu_slot_gfn_read_protect(struct kvm *kvm,
+ struct kvm_memory_slot *slot, u64 gfn);
+bool kvm_mmu_slot_gfn_exec_protect(struct kvm *kvm,
+ struct kvm_memory_slot *slot, u64 gfn);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 01c1371f39f8..8bf6581d25d5 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -34,10 +34,13 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
}
}
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
- int i;
+ struct kvm_page_track_notifier_head *head;
+ struct kvm_page_track_notifier_node *n;
+ int idx;
+ int i;
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
slot->arch.gfn_track[i] = kvzalloc(npages *
@@ -46,6 +49,17 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
goto track_free;
}
+ head = &kvm->arch.track_notifier_head;
+
+ if (hlist_empty(&head->track_notifier_list))
+ return 0;
+
+ idx = srcu_read_lock(&head->track_srcu);
+ hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ if (n->track_create_slot)
+ n->track_create_slot(kvm, slot, npages, n);
+ srcu_read_unlock(&head->track_srcu, idx);
+
return 0;
track_free:
@@ -86,7 +100,7 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
* @kvm: the guest instance we are interested in.
* @slot: the @gfn belongs to.
* @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
+ * @mode: tracking mode.
*/
void kvm_slot_page_track_add_page(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
@@ -104,9 +118,16 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
*/
kvm_mmu_gfn_disallow_lpage(slot, gfn);
- if (mode == KVM_PAGE_TRACK_WRITE)
+ if (mode == KVM_PAGE_TRACK_PREWRITE || mode == KVM_PAGE_TRACK_WRITE) {
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
kvm_flush_remote_tlbs(kvm);
+ } else if (mode == KVM_PAGE_TRACK_PREREAD) {
+ if (kvm_mmu_slot_gfn_read_protect(kvm, slot, gfn))
+ kvm_flush_remote_tlbs(kvm);
+ } else if (mode == KVM_PAGE_TRACK_PREEXEC) {
+ if (kvm_mmu_slot_gfn_exec_protect(kvm, slot, gfn))
+ kvm_flush_remote_tlbs(kvm);
+ }
}
EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
@@ -121,7 +142,7 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
* @kvm: the guest instance we are interested in.
* @slot: the @gfn belongs to.
* @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
+ * @mode: tracking mode.
*/
void kvm_slot_page_track_remove_page(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
@@ -214,6 +235,75 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
+/*
+ * Notify the node that a read access is about to happen. Returning false
+ * doesn't stop the other nodes from being called, but it will stop
+ * the emulation.
+ *
+ * The node should figure out if the written page is the one that node is