From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> Simplify pte_list_{add|remove} by ensuring all the non-head pte_list_desc to be full and addition/removal actions being performed on the head. To make pte_list_add() return a count as before, @tail_count is also added to the struct pte_list_desc. No visible performace is changed in tests. But pte_list_add() is no longer shown in the perf result for the COWed pages even the guest forks millions of tasks. Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 77 +++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 55c9fcd6ed4f..fc64f5f0822f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -131,10 +131,16 @@ module_param(dbg, bool, 0644); struct pte_list_desc { struct pte_list_desc *more; /* - * Stores number of entries stored in the pte_list_desc. No need to be - * u64 but just for easier alignment. When PTE_LIST_EXT, means full. + * @spte_count: Stores number of entries stored in the pte_list_desc. + * When PTE_LIST_EXT, means full. All the non-head pte_list_desc must + * be full. + * + * @tail_count: Stores number of entries stored in its tail descriptions. + * + * No need to be u32 but just for easier alignment. */ - u64 spte_count; + u32 spte_count; + u32 tail_count; u64 *sptes[PTE_LIST_EXT]; }; @@ -917,22 +923,25 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; desc->spte_count = 2; + desc->tail_count = 0; rmap_head->val = (unsigned long)desc | 1; ++count; } else { rmap_printk("%p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc->spte_count == PTE_LIST_EXT) { - count += PTE_LIST_EXT; - if (!desc->more) { - desc->more = kvm_mmu_memory_cache_alloc(cache); - desc = desc->more; - desc->spte_count = 0; - break; - } - desc = desc->more; + count = desc->tail_count + desc->spte_count; + /* + * When the head pte_list_desc is full, the whole list must + * be full since all the non-head pte_list_desc are full. + * So just allocate a new head. + */ + if (desc->spte_count == PTE_LIST_EXT) { + desc = kvm_mmu_memory_cache_alloc(cache); + desc->more = (struct pte_list_desc *)(rmap_head->val & ~1ul); + desc->spte_count = 0; + desc->tail_count = count; + rmap_head->val = (unsigned long)desc | 1; } - count += desc->spte_count; desc->sptes[desc->spte_count++] = spte; } return count; @@ -940,30 +949,30 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte, static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, - struct pte_list_desc *desc, int i, - struct pte_list_desc *prev_desc) + struct pte_list_desc *desc, int i) { - int j = desc->spte_count - 1; + struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + int j = head_desc->spte_count - 1; - desc->sptes[i] = desc->sptes[j]; - desc->sptes[j] = NULL; - desc->spte_count--; - if (desc->spte_count) + /* + * Grab an entry from the head pte_list_desc to ensure that + * the non-head pte_list_desc are full. + */ + desc->sptes[i] = head_desc->sptes[j]; + head_desc->sptes[j] = NULL; + head_desc->spte_count--; + if (head_desc->spte_count) return; - if (!prev_desc && !desc->more) + if (!head_desc->more) rmap_head->val = 0; else - if (prev_desc) - prev_desc->more = desc->more; - else - rmap_head->val = (unsigned long)desc->more | 1; - mmu_free_pte_list_desc(desc); + rmap_head->val = (unsigned long)head_desc->more | 1; + mmu_free_pte_list_desc(head_desc); } static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; - struct pte_list_desc *prev_desc; int i; if (!rmap_head->val) { @@ -979,16 +988,13 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) } else { rmap_printk("%p many->many\n", spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - prev_desc = NULL; while (desc) { for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { - pte_list_desc_remove_entry(rmap_head, - desc, i, prev_desc); + pte_list_desc_remove_entry(rmap_head, desc, i); return; } } - prev_desc = desc; desc = desc->more; } pr_err("%s: %p many->many\n", __func__, spte); @@ -1035,7 +1041,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm, unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; - unsigned int count = 0; if (!rmap_head->val) return 0; @@ -1043,13 +1048,7 @@ unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) return 1; desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - - while (desc) { - count += desc->spte_count; - desc = desc->more; - } - - return count; + return desc->tail_count + desc->spte_count; } static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level, -- 2.19.1.6.gb485710b