asi_map_user()/asi_unmap_user() can be used to map userspace address ranges for ASI classes that do not specify ASI_MAP_ALL_USERSPACE. In addition, another structure, asi_pgtbl_pool, allows for pre-allocating a set of pages to avoid having to allocate memory for page tables within asi_map_user(), which makes it easier to use that function while holding locks. Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> --- arch/x86/include/asm/asi.h | 19 +++ arch/x86/mm/asi.c | 252 ++++++++++++++++++++++++++++++++++--- include/asm-generic/asi.h | 21 ++++ include/linux/mm_types.h | 2 +- 4 files changed, 275 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h index 35421356584b..bdb2f70d4f85 100644 --- a/arch/x86/include/asm/asi.h +++ b/arch/x86/include/asm/asi.h @@ -44,6 +44,12 @@ struct asi { atomic64_t *tlb_gen; atomic64_t __tlb_gen; int64_t asi_ref_count; + rwlock_t user_map_lock; +}; + +struct asi_pgtbl_pool { + struct page *pgtbl_list; + uint count; }; DECLARE_PER_CPU_ALIGNED(struct asi_state, asi_cpu_state); @@ -74,6 +80,19 @@ void asi_do_lazy_map(struct asi *asi, size_t addr); void asi_clear_user_pgd(struct mm_struct *mm, size_t addr); void asi_clear_user_p4d(struct mm_struct *mm, size_t addr); +int asi_map_user(struct asi *asi, void *addr, size_t len, + struct asi_pgtbl_pool *pool, + size_t allowed_start, size_t allowed_end); +void asi_unmap_user(struct asi *asi, void *va, size_t len); +int asi_fill_pgtbl_pool(struct asi_pgtbl_pool *pool, uint count, gfp_t flags); +void asi_clear_pgtbl_pool(struct asi_pgtbl_pool *pool); + +static inline void asi_init_pgtbl_pool(struct asi_pgtbl_pool *pool) +{ + pool->pgtbl_list = NULL; + pool->count = 0; +} + static inline void asi_init_thread_state(struct thread_struct *thread) { thread->intr_nest_depth = 0; diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c index 29c74b6d4262..9b1bd005f343 100644 --- a/arch/x86/mm/asi.c +++ b/arch/x86/mm/asi.c @@ -86,6 +86,55 @@ void asi_unregister_class(int index) } EXPORT_SYMBOL_GPL(asi_unregister_class); +static ulong get_pgtbl_from_pool(struct asi_pgtbl_pool *pool) +{ + struct page *pgtbl; + + if (pool->count == 0) + return 0; + + pgtbl = pool->pgtbl_list; + pool->pgtbl_list = pgtbl->asi_pgtbl_pool_next; + pgtbl->asi_pgtbl_pool_next = NULL; + pool->count--; + + return (ulong)page_address(pgtbl); +} + +static void return_pgtbl_to_pool(struct asi_pgtbl_pool *pool, ulong virt) +{ + struct page *pgtbl = virt_to_page(virt); + + pgtbl->asi_pgtbl_pool_next = pool->pgtbl_list; + pool->pgtbl_list = pgtbl; + pool->count++; +} + +int asi_fill_pgtbl_pool(struct asi_pgtbl_pool *pool, uint count, gfp_t flags) +{ + if (!static_cpu_has(X86_FEATURE_ASI)) + return 0; + + while (pool->count < count) { + ulong pgtbl = get_zeroed_page(flags); + + if (!pgtbl) + return -ENOMEM; + + return_pgtbl_to_pool(pool, pgtbl); + } + + return 0; +} +EXPORT_SYMBOL_GPL(asi_fill_pgtbl_pool); + +void asi_clear_pgtbl_pool(struct asi_pgtbl_pool *pool) +{ + while (pool->count > 0) + free_page(get_pgtbl_from_pool(pool)); +} +EXPORT_SYMBOL_GPL(asi_clear_pgtbl_pool); + static void asi_clone_pgd(pgd_t *dst_table, pgd_t *src_table, size_t addr) { pgd_t *src = pgd_offset_pgd(src_table, addr); @@ -110,10 +159,12 @@ static void asi_clone_pgd(pgd_t *dst_table, pgd_t *src_table, size_t addr) #define DEFINE_ASI_PGTBL_ALLOC(base, level) \ static level##_t * asi_##level##_alloc(struct asi *asi, \ base##_t *base, ulong addr, \ - gfp_t flags) \ + gfp_t flags, \ + struct asi_pgtbl_pool *pool) \ { \ if (unlikely(base##_none(*base))) { \ - ulong pgtbl = get_zeroed_page(flags); \ + ulong pgtbl = pool ? get_pgtbl_from_pool(pool) \ + : get_zeroed_page(flags); \ phys_addr_t pgtbl_pa; \ \ if (pgtbl == 0) \ @@ -127,7 +178,10 @@ static level##_t * asi_##level##_alloc(struct asi *asi, \ mm_inc_nr_##level##s(asi->mm); \ } else { \ paravirt_release_##level(PHYS_PFN(pgtbl_pa)); \ - free_page(pgtbl); \ + if (pool) \ + return_pgtbl_to_pool(pool, pgtbl); \ + else \ + free_page(pgtbl); \ } \ \ /* NOP on native. PV call on Xen. */ \ @@ -336,6 +390,7 @@ int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi) asi->class = &asi_class[asi_index]; asi->mm = mm; asi->pcid_index = asi_index; + rwlock_init(&asi->user_map_lock); if (asi->class->flags & ASI_MAP_STANDARD_NONSENSITIVE) { uint i; @@ -650,11 +705,6 @@ static bool follow_physaddr(struct mm_struct *mm, size_t virt, /* * Map the given range into the ASI page tables. The source of the mapping * is the regular unrestricted page tables. - * Can be used to map any kernel memory. - * - * The caller MUST ensure that the source mapping will not change during this - * function. For dynamic kernel memory, this is generally ensured by mapping - * the memory within the allocator. * * If the source mapping is a large page and the range being mapped spans the * entire large page, then it will be mapped as a large page in the ASI page @@ -664,19 +714,17 @@ static bool follow_physaddr(struct mm_struct *mm, size_t virt, * destination page, but that should be ok for now, as usually in such cases, * the range would consist of a small-ish number of pages. */ -int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags) +int __asi_map(struct asi *asi, size_t start, size_t end, gfp_t gfp_flags, + struct asi_pgtbl_pool *pool, + size_t allowed_start, size_t allowed_end) { size_t virt; - size_t start = (size_t)addr; - size_t end = start + len; size_t page_size; - if (!static_cpu_has(X86_FEATURE_ASI) || !asi) - return 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len & ~PAGE_MASK); - VM_BUG_ON(start < TASK_SIZE_MAX); + VM_BUG_ON(end & ~PAGE_MASK); + VM_BUG_ON(end > allowed_end); + VM_BUG_ON(start < allowed_start); gfp_flags &= GFP_RECLAIM_MASK; @@ -702,14 +750,15 @@ int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags) continue; \ } \ \ - level = asi_##level##_alloc(asi, base, virt, gfp_flags);\ + level = asi_##level##_alloc(asi, base, virt, \ + gfp_flags, pool); \ if (!level) \ return -ENOMEM; \ \ if (page_size >= LEVEL##_SIZE && \ (level##_none(*level) || level##_leaf(*level)) && \ is_page_within_range(virt, LEVEL##_SIZE, \ - start, end)) { \ + allowed_start, allowed_end)) {\ page_size = LEVEL##_SIZE; \ phys &= LEVEL##_MASK; \ \ @@ -737,6 +786,26 @@ int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags) return 0; } +/* + * Maps the given kernel address range into the ASI page tables. + * + * The caller MUST ensure that the source mapping will not change during this + * function. For dynamic kernel memory, this is generally ensured by mapping + * the memory within the allocator. + */ +int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags) +{ + size_t start = (size_t)addr; + size_t end = start + len; + + if (!static_cpu_has(X86_FEATURE_ASI) || !asi) + return 0; + + VM_BUG_ON(start < TASK_SIZE_MAX); + + return __asi_map(asi, start, end, gfp_flags, NULL, start, end); +} + int asi_map(struct asi *asi, void *addr, size_t len) { return asi_map_gfp(asi, addr, len, GFP_KERNEL); @@ -935,3 +1004,150 @@ void asi_clear_user_p4d(struct mm_struct *mm, size_t addr) if (!pgtable_l5_enabled()) __asi_clear_user_pgd(mm, addr); } + +/* + * Maps the given userspace address range into the ASI page tables. + * + * The caller MUST ensure that the source mapping will not change during this + * function e.g. by synchronizing via MMU notifiers or acquiring the + * appropriate locks. + */ +int asi_map_user(struct asi *asi, void *addr, size_t len, + struct asi_pgtbl_pool *pool, + size_t allowed_start, size_t allowed_end) +{ + int err; + size_t start = (size_t)addr; + size_t end = start + len; + + if (!static_cpu_has(X86_FEATURE_ASI) || !asi) + return 0; + + VM_BUG_ON(end > TASK_SIZE_MAX); + + read_lock(&asi->user_map_lock); + err = __asi_map(asi, start, end, GFP_NOWAIT, pool, + allowed_start, allowed_end); + read_unlock(&asi->user_map_lock); + + return err; +} +EXPORT_SYMBOL_GPL(asi_map_user); + +static bool +asi_unmap_free_pte_range(struct asi_pgtbl_pool *pgtbls_to_free, + pte_t *pte, size_t addr, size_t end) +{ + do { + pte_clear(NULL, addr, pte); + } while (pte++, addr += PAGE_SIZE, addr != end); + + return true; +} + +#define DEFINE_ASI_UNMAP_FREE_RANGE(level, LEVEL, next_level, NEXT_LVL_SIZE) \ +static bool \ +asi_unmap_free_##level##_range(struct asi_pgtbl_pool *pgtbls_to_free, \ + level##_t *level, size_t addr, size_t end) \ +{ \ + bool unmapped = false; \ + size_t next; \ + \ + do { \ + next = level##_addr_end(addr, end); \ + if (level##_none(*level)) \ + continue; \ + \ + if (IS_ALIGNED(addr, LEVEL##_SIZE) && \ + IS_ALIGNED(next, LEVEL##_SIZE)) { \ + if (!level##_large(*level)) { \ + ulong pgtbl = level##_page_vaddr(*level); \ + struct page *page = virt_to_page(pgtbl); \ + \ + page->private = PG_LEVEL_##NEXT_LVL_SIZE; \ + return_pgtbl_to_pool(pgtbls_to_free, pgtbl); \ + } \ + level##_clear(level); \ + unmapped = true; \ + } else { \ + /* \ + * At this time, we don't have a case where we need to \ + * unmap a subset of a huge page. But that could arise \ + * in the future. In that case, we'll need to split \ + * the huge mapping here. \ + */ \ + if (WARN_ON(level##_large(*level))) \ + continue; \ + \ + unmapped |= asi_unmap_free_##next_level##_range( \ + pgtbls_to_free, \ + next_level##_offset(level, addr), \ + addr, next); \ + } \ + } while (level++, addr = next, addr != end); \ + \ + return unmapped; \ +} + +DEFINE_ASI_UNMAP_FREE_RANGE(pmd, PMD, pte, 4K) +DEFINE_ASI_UNMAP_FREE_RANGE(pud, PUD, pmd, 2M) +DEFINE_ASI_UNMAP_FREE_RANGE(p4d, P4D, pud, 1G) +DEFINE_ASI_UNMAP_FREE_RANGE(pgd, PGDIR, p4d, 512G) + +static bool asi_unmap_and_free_range(struct asi_pgtbl_pool *pgtbls_to_free, + struct asi *asi, size_t addr, size_t end) +{ + size_t next; + bool unmapped = false; + pgd_t *pgd = pgd_offset_pgd(asi->pgd, addr); + + BUILD_BUG_ON((void *)&((struct page *)NULL)->private == + (void *)&((struct page *)NULL)->asi_pgtbl_pool_next); + + if (pgtable_l5_enabled()) + return asi_unmap_free_pgd_range(pgtbls_to_free, pgd, addr, end); + + do { + next = pgd_addr_end(addr, end); + unmapped |= asi_unmap_free_p4d_range(pgtbls_to_free, + p4d_offset(pgd, addr), + addr, next); + } while (pgd++, addr = next, addr != end); + + return unmapped; +} + +void asi_unmap_user(struct asi *asi, void *addr, size_t len) +{ + static void (*const free_pgtbl_at_level[])(struct asi *, size_t) = { + NULL, + asi_free_pte, + asi_free_pmd, + asi_free_pud, + asi_free_p4d + }; + + struct asi_pgtbl_pool pgtbls_to_free = { 0 }; + size_t start = (size_t)addr; + size_t end = start + len; + bool unmapped; + + if (!static_cpu_has(X86_FEATURE_ASI) || !asi) + return; + + write_lock(&asi->user_map_lock); + unmapped = asi_unmap_and_free_range(&pgtbls_to_free, asi, start, end); + write_unlock(&asi->user_map_lock); + + if (unmapped) + asi_flush_tlb_range(asi, addr, len); + + while (pgtbls_to_free.count > 0) { + size_t pgtbl = get_pgtbl_from_pool(&pgtbls_to_free); + struct page *page = virt_to_page(pgtbl); + + VM_BUG_ON(page->private >= PG_LEVEL_NUM); + free_pgtbl_at_level[page->private](asi, pgtbl); + } +} +EXPORT_SYMBOL_GPL(asi_unmap_user); diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h index 8513d0d7865a..fffb323d2a00 100644 --- a/include/asm-generic/asi.h +++ b/include/asm-generic/asi.h @@ -26,6 +26,7 @@ struct asi_hooks {}; struct asi {}; +struct asi_pgtbl_pool {}; static inline int asi_register_class(const char *name, uint flags, @@ -92,6 +93,26 @@ void asi_clear_user_pgd(struct mm_struct *mm, size_t addr) { } static inline void asi_clear_user_p4d(struct mm_struct *mm, size_t addr) { } +static inline +int asi_map_user(struct asi *asi, void *addr, size_t len, + struct asi_pgtbl_pool *pool, + size_t allowed_start, size_t allowed_end) +{ + return 0; +} + +static inline void asi_unmap_user(struct asi *asi, void *va, size_t len) { } + +static inline +int asi_fill_pgtbl_pool(struct asi_pgtbl_pool *pool, uint count, gfp_t flags) +{ + return 0; +} + +static inline void asi_clear_pgtbl_pool(struct asi_pgtbl_pool *pool) { } + +static inline void asi_init_pgtbl_pool(struct asi_pgtbl_pool *pool) { } + static inline void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len) { } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7d38229ca85c..c3f209720a84 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -198,7 +198,7 @@ struct page { /* Links the pages_to_free_async list */ struct llist_node async_free_node; - unsigned long _asi_pad_1; + struct page *asi_pgtbl_pool_next; u64 asi_tlb_gen; union { -- 2.35.1.473.g83b2b277ed-goog