On Fri, May 09, 2014 at 09:25:46AM +0100, Marc Zyngier wrote: > On Fri, May 09 2014 at 8:58:18 am BST, Steve Capper <Steve.Capper@xxxxxxx> wrote: > > Hi Steve, > > > Hi, > > I've taken a quick look at this, I see: > > #define kvm_pud_addr_end(addr,end) (end) > > > > I think it should besomething like: > > #define kvm_pud_addr_end(addr, end) \ > > ({ u64 __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ > > (__boundary - 1 < (end) - 1)? __boundary: (end); \ > > }) > > > > Otherwise we'll jump to end, even if end is greater than the end of the > > pud we're examining... > > Certainly. But we're looking at a case where we have only 3 levels of > page tables, hence no PUD. My expectations would be that we're never > going to pass kvm_pud_addr_end anything but the result of > kvm_pgd_addr_end, so we wouldn't need the "bells and whistles" version > on ARMv7. Am I missing something obvious here? > we do from unmap_range(), but I assume what you mean is that that's simply broken and shoul be refactored so we pass only something from pgd_addr_end, in which case we're good with the current definition. > > As it stands, unmap_range does not appear to handle zero or block > > entries for the first level descriptor. That coupled with the > > kvm_pud_addr_end definition will mean it can exit prematurely. > > > > pgd_none isn't checked for (which would be a problem for 4-levels), and > > the function could probably benefit from being split up into pgd, pud, > > pmd and pte levels. > > Yes, unmap_range is utterly borken. agreed. > I wasn't reading Mario's email > properly (too much GICv3 in the morning... ;-). > > Steve, would you be willing to submit a patch for that? > I actually had a half-begun-half-done attempt of refactoring that stashed away, the following (UNTESTED) patch demonstrates my thoughts, comments welcome: diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c..5cc0b0f 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 16f8049..02c12cc 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -46,6 +46,10 @@ static phys_addr_t hyp_idmap_vector; #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) +#ifndef __unused +# define __unused __attribute__((unused)) +#endif + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -90,10 +94,13 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static bool page_empty(void *ptr) +static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - struct page *ptr_page = virt_to_page(ptr); - return page_count(ptr_page) == 1; + pud_t *pud_table __unused = pud_offset(pgd, 0); + pgd_clear(pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); } static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) @@ -124,70 +131,81 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) put_page(virt_to_page(pmd)); } -static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) +static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, + unsigned long long addr, unsigned long long end) { - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - kvm_tlb_flush_vmid_ipa(kvm, addr); - } + pte_t *pte, *start_pte; + unsigned long long start_addr = addr; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (kvm_pte_table_empty(start_pte)) + clear_pmd_entry(kvm, pmd, start_addr); } -static void unmap_range(struct kvm *kvm, pgd_t *pgdp, - unsigned long long start, u64 size) +static void unmap_pmds(struct kvm *kvm, pud_t *pud, + unsigned long long addr, unsigned long long end) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long long addr = start, end = start + size; - u64 next; - - while (addr < end) { - pgd = pgdp + pgd_index(addr); - pud = pud_offset(pgd, addr); - pte = NULL; - if (pud_none(*pud)) { - addr = kvm_pud_addr_end(addr, end); - continue; - } + unsigned long long next, start_addr = addr; + pmd_t *pmd, *start_pmd; - if (pud_huge(*pud)) { - /* - * If we are dealing with a huge pud, just clear it and - * move on. - */ - clear_pud_entry(kvm, pud, addr); - addr = kvm_pud_addr_end(addr, end); - continue; + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) + clear_pmd_entry(kvm, pmd, addr); + else + unmap_ptes(kvm, pmd, addr, end); } + } while (pmd++, addr = next, addr != end); - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr = kvm_pmd_addr_end(addr, end); - continue; - } + if (kvm_pmd_table_empty(start_pmd)) + clear_pud_entry(kvm, pud, start_addr); +} - if (!kvm_pmd_huge(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(kvm, pte, addr); - next = addr + PAGE_SIZE; - } +static void unmap_puds(struct kvm *kvm, pgd_t *pgd, + unsigned long long addr, unsigned long long end) +{ + unsigned long long next, start_addr = addr; + pud_t *pud, *start_pud; - /* - * If the pmd entry is to be cleared, walk back up the ladder - */ - if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { - clear_pmd_entry(kvm, pmd, addr); - next = kvm_pmd_addr_end(addr, end); - if (page_empty(pmd) && !page_empty(pud)) { + start_pud = pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + if (!pud_none(*pud)) { + if (pud_huge(*pid)) { clear_pud_entry(kvm, pud, addr); - next = kvm_pud_addr_end(addr, end); + } else { + unmap_pmds(kvm, pud, addr, end); } } + } while (pud++, addr = next, addr != end); - addr = next; - } + if (kvm_pud_table_empty(start_pud)) + clear_pgd_entry(kvm, pgd, start_addr); +} + + +static void unmap_range(struct kvm *kvm, pgd_t *pgdp, + unsigned long long start, u64 size) +{ + pgd_t *pgd; + unsigned long long addr = start, end = start + size; + unsigned long long next; + + pgd = pgdp + pgd_index(addr); + do { + next = kvm_pgd_addr_end(addr, end); + unmap_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); } static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7d29847..8e138c7 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) +static inline bool kvm_page_empty(void *ptr) +{ + struct page *ptr_page = virt_to_page(ptr); + return page_count(ptr_page) == 1; +} + +#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) +#ifndef CONFIG_ARM64_64K_PAGES +#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#else +#define kvm_pmd_table_empty(pmdp) (0) +#endif +#define kvm_pud_table_empty(pudp) (0) + + struct kvm; #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm