In typical guest boot workload only 2-3 memslots are used extensively, and at that it's mostly the same memslot lookup operation. Adding LRU cache improves average lookup time from 46 to 28 cycles (~40%) for this workload. Signed-off-by: Igor Mammedov <imammedo@xxxxxxxxxx> --- include/linux/kvm_host.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 231dd94..1a37144 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -353,6 +353,7 @@ struct kvm_memslots { struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM]; /* The mapping table from slot id to the index in memslots[]. */ short id_to_index[KVM_MEM_SLOTS_NUM]; + atomic_t lru_slot; }; struct kvm { @@ -790,12 +791,19 @@ static inline void kvm_guest_exit(void) static inline struct kvm_memory_slot * search_memslots(struct kvm_memslots *slots, gfn_t gfn) { - struct kvm_memory_slot *memslot; + int slot = atomic_read(&slots->lru_slot); + struct kvm_memory_slot *memslot = &slots->memslots[slot]; + + if (gfn >= memslot->base_gfn && + gfn < memslot->base_gfn + memslot->npages) + return memslot; kvm_for_each_memslot(memslot, slots) if (gfn >= memslot->base_gfn && - gfn < memslot->base_gfn + memslot->npages) + gfn < memslot->base_gfn + memslot->npages) { + atomic_set(&slots->lru_slot, memslot - slots->memslots); return memslot; + } return NULL; } -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html