This patch introduce PTE_LIST_SPTE_SKIP which is the placeholder and it will be set on pte-list after removing a spte so that other sptes on this pte_list are not moved and the pte-list-descs on the pte-list are not freed. If vcpu can not add spte to the pte-list (e.g. the rmap on invalid memslot) and spte can not be freed during pte-list walk, we can concurrently clear sptes on the pte-list, the worst case is, we double zap a spte that is safe. This patch only ensures that concurrently zapping pte-list is safe, we will keep spte available during concurrently clearing in the later patches Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx> --- arch/x86/kvm/mmu.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 57 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 99ad2a4..850eab5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -900,6 +900,18 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) } /* + * It is the placeholder and it will be set on pte-list after removing + * a spte so that other sptes on this pte_list are not moved and the + * pte-list-descs on the pte-list are not freed. + * + * If vcpu can not add spte to the pte-list (e.g. the rmap on invalid + * memslot) and spte can not be freed during pte-list walk, we can + * cocurrently clear sptes on the pte-list, the worst case is, we double + * zap a spte that is safe. + */ +#define PTE_LIST_SPTE_SKIP (u64 *)((~0x0ul) & (~1)) + +/* * Pte mapping structures: * * If pte_list bit zero is zero, then pte_list point to the spte. @@ -1003,6 +1015,40 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) } } +static void pte_list_clear_concurrently(u64 *spte, unsigned long *pte_list) +{ + struct pte_list_desc *desc; + unsigned long pte_value = *pte_list; + int i; + + /* Empty pte list stores nothing. */ + WARN_ON(!pte_value); + + if (!(pte_value & 1)) { + if ((u64 *)pte_value == spte) { + *pte_list = (unsigned long)PTE_LIST_SPTE_SKIP; + return; + } + + /* someone has already cleared it. */ + WARN_ON(pte_value != (unsigned long)PTE_LIST_SPTE_SKIP); + return; + } + + desc = (struct pte_list_desc *)(pte_value & ~1ul); + while (desc) { + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) + if (desc->sptes[i] == spte) { + desc->sptes[i] = PTE_LIST_SPTE_SKIP; + return; + } + + desc = desc->more; + } + + return; +} + typedef void (*pte_list_walk_fn) (u64 *spte); static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) { @@ -1214,6 +1260,12 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) return false; } +/* PTE_LIST_SPTE_SKIP is only used on invalid rmap. */ +static void check_valid_sptep(u64 *sptep) +{ + WARN_ON(sptep == PTE_LIST_SPTE_SKIP || !is_rmap_spte(*sptep)); +} + static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, bool pt_protect) { @@ -1222,7 +1274,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, bool flush = false; for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); + check_valid_sptep(sptep); if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { sptep = rmap_get_first(*rmapp, &iter); continue; @@ -1293,7 +1345,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) int need_tlb_flush = 0; while ((sptep = rmap_get_first(*rmapp, &iter))) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); + check_valid_sptep(sptep); rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); drop_spte(kvm, sptep); @@ -1322,7 +1374,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, new_pfn = pte_pfn(*ptep); for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { - BUG_ON(!is_shadow_present_pte(*sptep)); + check_valid_sptep(sptep); rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); need_flush = 1; @@ -1455,7 +1507,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { - BUG_ON(!is_shadow_present_pte(*sptep)); + check_valid_sptep(sptep); if (*sptep & shadow_accessed_mask) { young = 1; @@ -1493,7 +1545,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp) for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { - BUG_ON(!is_shadow_present_pte(*sptep)); + check_valid_sptep(sptep); if (*sptep & shadow_accessed_mask) { young = 1; -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html