Allow vcpus to pin spte translations by: 1) Creating a per-vcpu list of pinned ranges. 2) On mmu reload request: - Fault ranges. - Mark sptes with a pinned bit. - Mark shadow pages as pinned. 3) Then modify the following actions: - Page age => skip spte flush. - MMU notifiers => force mmu reload request (which kicks cpu out of guest mode). - GET_DIRTY_LOG => force mmu reload request. - SLAB shrinker => skip shadow page deletion. TDP-only. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 13 + arch/x86/kvm/mmu.c | 294 +++++++++++++++++++++++++++++++++++++--- arch/x86/kvm/mmu.h | 7 arch/x86/kvm/mmutrace.h | 23 +++ arch/x86/kvm/paging_tmpl.h | 4 arch/x86/kvm/x86.c | 8 - include/linux/kvm_host.h | 3 include/uapi/linux/kvm.h | 2 virt/kvm/kvm_main.c | 18 +- 9 files changed, 340 insertions(+), 32 deletions(-) Index: kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h =================================================================== --- kvm.pinned-sptes.orig/arch/x86/include/asm/kvm_host.h 2014-07-09 12:05:34.836161266 -0300 +++ kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h 2014-07-09 12:08:45.341762782 -0300 @@ -97,6 +97,8 @@ #define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 +#define KVM_MAX_PER_VCPU_PINNED_RANGE 10 + #define ASYNC_PF_PER_VCPU 64 struct kvm_vcpu; @@ -221,6 +223,8 @@ /* hold the gfn of each spte inside spt */ gfn_t *gfns; bool unsync; + bool pinned; + int root_count; /* Currently serving as active root */ unsigned int unsync_children; unsigned long parent_ptes; /* Reverse mapping for parent_pte */ @@ -337,6 +341,12 @@ KVM_DEBUGREG_WONT_EXIT = 2, }; +struct kvm_pinned_page_range { + gfn_t base_gfn; + unsigned long npages; + struct list_head link; +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -392,6 +402,9 @@ struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + struct list_head pinned_mmu_pages; + atomic_t nr_pinned_ranges; + struct fpu guest_fpu; u64 xcr0; u64 guest_supported_xcr0; Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c =================================================================== --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c 2014-07-09 12:05:34.837161264 -0300 +++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-07-09 12:09:21.856684314 -0300 @@ -148,6 +148,9 @@ #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) +#define SPTE_PINNED (1ULL << (PT64_SECOND_AVAIL_BITS_SHIFT)) + +#define SPTE_PINNED_BIT PT64_SECOND_AVAIL_BITS_SHIFT #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) @@ -327,6 +330,11 @@ return pte & PT_PRESENT_MASK && !is_mmio_spte(pte); } +static int is_pinned_spte(u64 spte) +{ + return spte & SPTE_PINNED && is_shadow_present_pte(spte); +} + static int is_large_pte(u64 pte) { return pte & PT_PAGE_SIZE_MASK; @@ -1176,6 +1184,16 @@ kvm_flush_remote_tlbs(vcpu->kvm); } +static bool vcpu_has_pinned(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.nr_pinned_ranges); +} + +static void mmu_reload_pinned_vcpus(struct kvm *kvm) +{ + make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, &vcpu_has_pinned); +} + /* * Write-protect on the specified @sptep, @pt_protect indicates whether * spte write-protection is caused by protecting shadow page table. @@ -1268,7 +1286,8 @@ } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator iter; @@ -1278,6 +1297,14 @@ BUG_ON(!(*sptep & PT_PRESENT_MASK)); rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); + if (is_pinned_spte(*sptep)) { + /* don't nuke pinned sptes if page aging: return + * young=yes instead. + */ + if (age) + return 1; + mmu_reload_pinned_vcpus(kvm); + } drop_spte(kvm, sptep); need_tlb_flush = 1; } @@ -1286,7 +1313,8 @@ } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator iter; @@ -1304,6 +1332,9 @@ need_flush = 1; + if (is_pinned_spte(*sptep)) + mmu_reload_pinned_vcpus(kvm); + if (pte_write(*ptep)) { drop_spte(kvm, sptep); sptep = rmap_get_first(*rmapp, &iter); @@ -1334,7 +1365,8 @@ int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, - unsigned long data)) + unsigned long data, + bool age)) { int j; int ret = 0; @@ -1374,7 +1406,7 @@ rmapp = __gfn_to_rmap(gfn_start, j, memslot); for (; idx <= idx_end; ++idx) - ret |= handler(kvm, rmapp++, memslot, data); + ret |= handler(kvm, rmapp++, memslot, data, false); } } @@ -1385,7 +1417,8 @@ unsigned long data, int (*handler)(struct kvm *kvm, unsigned long *rmapp, struct kvm_memory_slot *slot, - unsigned long data)) + unsigned long data, + bool age)) { return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } @@ -1406,7 +1439,8 @@ } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); @@ -1421,7 +1455,7 @@ * out actively used pages or breaking up actively used hugepages. */ if (!shadow_accessed_mask) { - young = kvm_unmap_rmapp(kvm, rmapp, slot, data); + young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true); goto out; } @@ -1442,7 +1476,8 @@ } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - struct kvm_memory_slot *slot, unsigned long data) + struct kvm_memory_slot *slot, unsigned long data, + bool age) { u64 *sptep; struct rmap_iterator iter; @@ -1480,7 +1515,7 @@ rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false); kvm_flush_remote_tlbs(vcpu->kvm); } @@ -2753,7 +2788,8 @@ } static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, - pfn_t pfn, unsigned access, int *ret_val) + pfn_t pfn, unsigned access, int *ret_val, + bool pin) { bool ret = true; @@ -2763,8 +2799,14 @@ goto exit; } - if (unlikely(is_noslot_pfn(pfn))) + if (unlikely(is_noslot_pfn(pfn))) { + /* pinned sptes must point to RAM */ + if (unlikely(pin)) { + *ret_val = -EFAULT; + goto exit; + } vcpu_cache_mmio_info(vcpu, gva, gfn, access); + } ret = false; exit: @@ -2818,7 +2860,7 @@ * - false: let the real page fault path to fix it. */ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, - u32 error_code) + u32 error_code, bool pin) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -2828,6 +2870,9 @@ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return false; + if (pin) + return false; + if (!page_fault_can_be_fast(error_code)) return false; @@ -2895,9 +2940,71 @@ } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, pfn_t *pfn, bool write, bool *writable); + gva_t gva, pfn_t *pfn, bool write, bool *writable, + bool pin); static void make_mmu_pages_available(struct kvm_vcpu *vcpu); + +static int get_sptep_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes[4]) + +{ + struct kvm_shadow_walk_iterator iterator; + int nr_sptes = 0; + + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return nr_sptes; + + for_each_shadow_entry(vcpu, addr, iterator) { + sptes[iterator.level-1] = iterator.sptep; + nr_sptes++; + if (!is_shadow_present_pte(*iterator.sptep)) + break; + } + + return nr_sptes; +} + +static bool __direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn, bool pin) +{ + u64 *sptes[4]; + int r, i, level; + + r = get_sptep_hierarchy(vcpu, gfn << PAGE_SHIFT, sptes); + if (!r) + return false; + + level = 5 - r; + if (!is_last_spte(*sptes[level-1], level)) + return false; + if (!is_shadow_present_pte(*sptes[level-1])) + return false; + + for (i = 0; i < r; i++) { + u64 *sptep = sptes[3-i]; + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + if (pin) { + sp->pinned = true; + set_bit(SPTE_PINNED_BIT, (unsigned long *)sptep); + } else { + sp->pinned = false; + clear_bit(SPTE_PINNED_BIT, (unsigned long *)sptep); + } + } + + return true; +} + +static bool direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + return __direct_pin_sptes(vcpu, gfn, true); +} + +static bool direct_unpin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + return __direct_pin_sptes(vcpu, gfn, false); +} + static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault, bool pin, bool *pinned) { @@ -2923,16 +3030,17 @@ } else level = PT_PAGE_TABLE_LEVEL; - if (fast_page_fault(vcpu, v, level, error_code)) + if (fast_page_fault(vcpu, v, level, error_code, pin)) return 0; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) + if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable, + pin)) return 0; - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r, pin)) return r; spin_lock(&vcpu->kvm->mmu_lock); @@ -2943,6 +3051,8 @@ transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, prefault); + if (pin) + *pinned = direct_pin_sptes(vcpu, gfn); spin_unlock(&vcpu->kvm->mmu_lock); @@ -3131,7 +3241,7 @@ lm_root = (void*)get_zeroed_page(GFP_KERNEL); if (lm_root == NULL) - return 1; + return -ENOMEM; lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask; @@ -3349,7 +3459,8 @@ } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, pfn_t *pfn, bool write, bool *writable) + gva_t gva, pfn_t *pfn, bool write, bool *writable, + bool pin) { bool async; @@ -3358,7 +3469,7 @@ if (!async) return false; /* *pfn has correct page already */ - if (!prefault && can_do_async_pf(vcpu)) { + if (!prefault && !pin && can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(gva, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { trace_kvm_async_pf_doublefault(gva, gfn); @@ -3406,16 +3517,17 @@ } else level = PT_PAGE_TABLE_LEVEL; - if (fast_page_fault(vcpu, gpa, level, error_code)) + if (fast_page_fault(vcpu, gpa, level, error_code, pin)) return 0; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable, + pin)) return 0; - if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) + if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r, pin)) return r; spin_lock(&vcpu->kvm->mmu_lock); @@ -3426,6 +3538,8 @@ transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, gpa, write, map_writable, level, gfn, pfn, prefault); + if (pin) + *pinned = direct_pin_sptes(vcpu, gfn); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -3903,6 +4017,141 @@ } EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); +int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu, + gfn_t base_gfn, unsigned long npages) +{ + struct kvm_pinned_page_range *p; + + if (!tdp_enabled) { + WARN_ON(1); + return -EINVAL; + } + + list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) { + if (p->base_gfn == base_gfn && p->npages == npages) { + return -EEXIST; + } + } + + if (atomic_read(&vcpu->arch.nr_pinned_ranges) >= + KVM_MAX_PER_VCPU_PINNED_RANGE) + return -ENOSPC; + + p = kzalloc(sizeof(struct kvm_pinned_page_range), GFP_KERNEL); + if (!p) + return -ENOMEM; + + atomic_inc(&vcpu->arch.nr_pinned_ranges); + + trace_kvm_mmu_register_pinned_range(vcpu->vcpu_id, base_gfn, npages); + + INIT_LIST_HEAD(&p->link); + p->base_gfn = base_gfn; + p->npages = npages; + list_add(&p->link, &vcpu->arch.pinned_mmu_pages); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + + return 0; +} + + +void unregister_pinned_sptes(struct kvm_vcpu *vcpu, unsigned long base_gfn, + unsigned long npages) +{ + gfn_t gfn; + + for (gfn = base_gfn; gfn < base_gfn+npages; gfn++) + direct_unpin_sptes(vcpu, gfn); + +} + +int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu, + gfn_t base_gfn, unsigned long npages) +{ + struct kvm_pinned_page_range *p; + + list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) { + if (p->base_gfn == base_gfn && p->npages == npages) { + list_del(&p->link); + atomic_dec(&vcpu->arch.nr_pinned_ranges); + spin_lock(&vcpu->kvm->mmu_lock); + mmu_reload_pinned_vcpus(vcpu->kvm); + unregister_pinned_sptes(vcpu, base_gfn, npages); + spin_unlock(&vcpu->kvm->mmu_lock); + kfree(p); + return 0; + } + } + + return -ENOENT; +} + +void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu) +{ + struct kvm_pinned_page_range *p, *p2; + + list_for_each_entry_safe(p, p2, &vcpu->arch.pinned_mmu_pages, link) { + list_del(&p->link); + kfree(p); + } +} + +/* + * Pin KVM MMU page translations. This guarantees, for valid + * addresses registered by kvm_mmu_register_pinned_range (valid address + * meaning address which posses sufficient information for fault to + * be resolved), valid translations exist while in guest mode and + * therefore no VM-exits due to faults will occur. + * + * Failure to instantiate pages will abort guest entry. + * + * Pinning is not guaranteed while executing as L2 guest. + * + */ + +static int kvm_mmu_pin_pages(struct kvm_vcpu *vcpu) +{ + struct kvm_pinned_page_range *p; + int r = 1; + + if (is_guest_mode(vcpu)) + return r; + + if (!vcpu->arch.mmu.direct_map) + return r; + + ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); + + list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) { + gfn_t gfn_offset; + + for (gfn_offset = 0; gfn_offset < p->npages; gfn_offset++) { + gfn_t gfn = p->base_gfn + gfn_offset; + int r; + bool pinned = false; + + r = vcpu->arch.mmu.page_fault(vcpu, gfn << PAGE_SHIFT, + PFERR_WRITE_MASK, false, + true, &pinned); + /* MMU notifier sequence window: retry */ + if (!r && !pinned) + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + if (r) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_PIN_FAILURE; + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = gfn; + r = 0; + goto out; + } + + } + } +out: + return r; +} + int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; @@ -3916,6 +4165,7 @@ goto out; /* set_cr3() should ensure TLB has been flushed */ vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa); + r = kvm_mmu_pin_pages(vcpu); out: return r; } Index: kvm.pinned-sptes/arch/x86/kvm/mmu.h =================================================================== --- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.h 2014-07-09 12:05:30.018171068 -0300 +++ kvm.pinned-sptes/arch/x86/kvm/mmu.h 2014-07-09 12:08:45.343762778 -0300 @@ -94,7 +94,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE)) - return 0; + return 1; return kvm_mmu_load(vcpu); } @@ -178,4 +178,9 @@ } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); +int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu, + gfn_t base_gfn, unsigned long npages); +int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu, + gfn_t base_gfn, unsigned long npages); +void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu); #endif Index: kvm.pinned-sptes/arch/x86/kvm/x86.c =================================================================== --- kvm.pinned-sptes.orig/arch/x86/kvm/x86.c 2014-07-09 12:05:34.838161262 -0300 +++ kvm.pinned-sptes/arch/x86/kvm/x86.c 2014-07-09 12:08:45.346762771 -0300 @@ -6017,7 +6017,7 @@ } r = kvm_mmu_reload(vcpu); - if (unlikely(r)) { + if (unlikely(r <= 0)) { goto cancel_injection; } @@ -7049,6 +7049,8 @@ kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); + INIT_LIST_HEAD(&vcpu->arch.pinned_mmu_pages); + atomic_set(&vcpu->arch.nr_pinned_ranges, 0); return 0; fail_free_wbinvd_dirty_mask: @@ -7069,6 +7071,7 @@ { int idx; + kvm_mmu_free_pinned_ranges(vcpu); kvm_pmu_destroy(vcpu); kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); @@ -7113,6 +7116,7 @@ int r; r = vcpu_load(vcpu); BUG_ON(r); + kvm_mmu_free_pinned_ranges(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } @@ -7408,7 +7412,7 @@ return; r = kvm_mmu_reload(vcpu); - if (unlikely(r)) + if (unlikely(r <= 0)) return; if (!vcpu->arch.mmu.direct_map && Index: kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h =================================================================== --- kvm.pinned-sptes.orig/arch/x86/kvm/paging_tmpl.h 2014-07-09 12:05:34.837161264 -0300 +++ kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h 2014-07-09 12:08:45.346762771 -0300 @@ -747,11 +747,11 @@ smp_rmb(); if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault, - &map_writable)) + &map_writable, false)) return 0; if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, - walker.gfn, pfn, walker.pte_access, &r)) + walker.gfn, pfn, walker.pte_access, &r, false)) return r; /* Index: kvm.pinned-sptes/arch/x86/kvm/mmutrace.h =================================================================== --- kvm.pinned-sptes.orig/arch/x86/kvm/mmutrace.h 2014-07-09 12:05:30.018171068 -0300 +++ kvm.pinned-sptes/arch/x86/kvm/mmutrace.h 2014-07-09 12:08:45.347762769 -0300 @@ -322,6 +322,29 @@ __entry->kvm_gen == __entry->spte_gen ) ); + +TRACE_EVENT( + kvm_mmu_register_pinned_range, + TP_PROTO(unsigned int vcpu_id, gfn_t gfn, unsigned long npages), + TP_ARGS(vcpu_id, gfn, npages), + + TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( gfn_t, gfn ) + __field( unsigned long, npages ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->gfn = gfn; + __entry->npages = npages; + ), + + TP_printk("vcpu_id %u gfn %llx npages %lx", + __entry->vcpu_id, + __entry->gfn, + __entry->npages) +); #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH Index: kvm.pinned-sptes/include/uapi/linux/kvm.h =================================================================== --- kvm.pinned-sptes.orig/include/uapi/linux/kvm.h 2014-07-09 12:05:30.019171066 -0300 +++ kvm.pinned-sptes/include/uapi/linux/kvm.h 2014-07-09 12:08:45.347762769 -0300 @@ -180,6 +180,8 @@ #define KVM_INTERNAL_ERROR_SIMUL_EX 2 /* Encounter unexpected vm-exit due to delivery event. */ #define KVM_INTERNAL_ERROR_DELIVERY_EV 3 +/* Failure to pin address translation. */ +#define KVM_INTERNAL_ERROR_PIN_FAILURE 4 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { Index: kvm.pinned-sptes/include/linux/kvm_host.h =================================================================== --- kvm.pinned-sptes.orig/include/linux/kvm_host.h 2014-07-09 12:05:30.019171066 -0300 +++ kvm.pinned-sptes/include/linux/kvm_host.h 2014-07-09 12:08:45.348762767 -0300 @@ -591,6 +591,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); +bool make_all_cpus_request(struct kvm *kvm, unsigned int req, + bool (*vcpukick)(struct kvm_vcpu *)); + void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); Index: kvm.pinned-sptes/virt/kvm/kvm_main.c =================================================================== --- kvm.pinned-sptes.orig/virt/kvm/kvm_main.c 2014-07-09 12:05:30.019171066 -0300 +++ kvm.pinned-sptes/virt/kvm/kvm_main.c 2014-07-09 12:08:45.349762765 -0300 @@ -152,7 +152,8 @@ { } -static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool make_all_cpus_request(struct kvm *kvm, unsigned int req, + bool (*vcpukick)(struct kvm_vcpu *)) { int i, cpu, me; cpumask_var_t cpus; @@ -163,6 +164,8 @@ me = get_cpu(); kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpukick && !vcpukick(vcpu)) + continue; kvm_make_request(req, vcpu); cpu = vcpu->cpu; @@ -189,7 +192,7 @@ long dirty_count = kvm->tlbs_dirty; smp_mb(); - if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) + if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH, NULL)) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } @@ -197,17 +200,22 @@ void kvm_reload_remote_mmus(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); + make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL); +} + +void kvm_reload_pinned_remote_mmus(struct kvm *kvm) +{ + make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL); } void kvm_make_mclock_inprogress_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); + make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS, NULL); } void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); + make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC, NULL); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html