The patch titled Subject: mm: secretmem: use PMD-size pages to amortize direct map fragmentation has been added to the -mm tree. Its filename is mm-secretmem-use-pmd-size-pages-to-amortize-direct-map-fragmentation.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/mm-secretmem-use-pmd-size-pages-to-amortize-direct-map-fragmentation.patch and later at https://ozlabs.org/~akpm/mmotm/broken-out/mm-secretmem-use-pmd-size-pages-to-amortize-direct-map-fragmentation.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Mike Rapoport <rppt@xxxxxxxxxxxxx> Subject: mm: secretmem: use PMD-size pages to amortize direct map fragmentation Removing a PAGE_SIZE page from the direct map every time such page is allocated for a secret memory mapping will cause severe fragmentation of the direct map. This fragmentation can be reduced by using PMD-size pages as a pool for small pages for secret memory mappings. Add a gen_pool per secretmem inode and lazily populate this pool with PMD-size pages. Link: https://lkml.kernel.org/r/20200924132904.1391-6-rppt@xxxxxxxxxx Signed-off-by: Mike Rapoport <rppt@xxxxxxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Christopher Lameter <cl@xxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Elena Reshetova <elena.reshetova@xxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Idan Yaniv <idan.yaniv@xxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: James Bottomley <jejb@xxxxxxxxxxxxx> Cc: "Kirill A. Shutemov" <kirill@xxxxxxxxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx> Cc: Palmer Dabbelt <palmerdabbelt@xxxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Shuah Khan <shuah@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Tycho Andersen <tycho@xxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/secretmem.c | 107 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 88 insertions(+), 19 deletions(-) --- a/mm/secretmem.c~mm-secretmem-use-pmd-size-pages-to-amortize-direct-map-fragmentation +++ a/mm/secretmem.c @@ -12,6 +12,7 @@ #include <linux/bitops.h> #include <linux/printk.h> #include <linux/pagemap.h> +#include <linux/genalloc.h> #include <linux/syscalls.h> #include <linux/pseudo_fs.h> #include <linux/set_memory.h> @@ -40,24 +41,66 @@ #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK struct secretmem_ctx { + struct gen_pool *pool; unsigned int mode; }; -static struct page *secretmem_alloc_page(gfp_t gfp) +static int secretmem_pool_increase(struct secretmem_ctx *ctx, gfp_t gfp) { - /* - * FIXME: use a cache of large pages to reduce the direct map - * fragmentation - */ - return alloc_page(gfp); + unsigned long nr_pages = (1 << PMD_PAGE_ORDER); + struct gen_pool *pool = ctx->pool; + unsigned long addr; + struct page *page; + int err; + + page = alloc_pages(gfp, PMD_PAGE_ORDER); + if (!page) + return -ENOMEM; + + addr = (unsigned long)page_address(page); + split_page(page, PMD_PAGE_ORDER); + + err = gen_pool_add(pool, addr, PMD_SIZE, NUMA_NO_NODE); + if (err) { + __free_pages(page, PMD_PAGE_ORDER); + return err; + } + + __kernel_map_pages(page, nr_pages, 0); + + return 0; +} + +static struct page *secretmem_alloc_page(struct secretmem_ctx *ctx, + gfp_t gfp) +{ + struct gen_pool *pool = ctx->pool; + unsigned long addr; + struct page *page; + int err; + + if (gen_pool_avail(pool) < PAGE_SIZE) { + err = secretmem_pool_increase(ctx, gfp); + if (err) + return NULL; + } + + addr = gen_pool_alloc(pool, PAGE_SIZE); + if (!addr) + return NULL; + + page = virt_to_page(addr); + get_page(page); + + return page; } static vm_fault_t secretmem_fault(struct vm_fault *vmf) { + struct secretmem_ctx *ctx = vmf->vma->vm_file->private_data; struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct inode *inode = file_inode(vmf->vma->vm_file); pgoff_t offset = vmf->pgoff; - unsigned long addr; struct page *page; int ret = 0; @@ -66,7 +109,7 @@ static vm_fault_t secretmem_fault(struct page = find_get_entry(mapping, offset); if (!page) { - page = secretmem_alloc_page(vmf->gfp_mask); + page = secretmem_alloc_page(ctx, vmf->gfp_mask); if (!page) return vmf_error(-ENOMEM); @@ -74,14 +117,8 @@ static vm_fault_t secretmem_fault(struct if (unlikely(ret)) goto err_put_page; - ret = set_direct_map_invalid_noflush(page); - if (ret) - goto err_del_page_cache; - - addr = (unsigned long)page_address(page); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - __SetPageUptodate(page); + set_page_private(page, (unsigned long)ctx); ret = VM_FAULT_LOCKED; } @@ -89,8 +126,6 @@ static vm_fault_t secretmem_fault(struct vmf->page = page; return ret; -err_del_page_cache: - delete_from_page_cache(page); err_put_page: put_page(page); return vmf_error(ret); @@ -138,7 +173,11 @@ static int secretmem_migratepage(struct static void secretmem_freepage(struct page *page) { - set_direct_map_default_noflush(page); + unsigned long addr = (unsigned long)page_address(page); + struct secretmem_ctx *ctx = (struct secretmem_ctx *)page_private(page); + struct gen_pool *pool = ctx->pool; + + gen_pool_free(pool, addr, PAGE_SIZE); } static const struct address_space_operations secretmem_aops = { @@ -163,13 +202,18 @@ static struct file *secretmem_file_creat if (!ctx) goto err_free_inode; + ctx->pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE); + if (!ctx->pool) + goto err_free_ctx; + file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) - goto err_free_ctx; + goto err_free_pool; mapping_set_unevictable(inode->i_mapping); + inode->i_private = ctx; inode->i_mapping->private_data = ctx; inode->i_mapping->a_ops = &secretmem_aops; @@ -183,6 +227,8 @@ static struct file *secretmem_file_creat return file; +err_free_pool: + gen_pool_destroy(ctx->pool); err_free_ctx: kfree(ctx); err_free_inode: @@ -221,11 +267,34 @@ err_put_fd: return err; } +static void secretmem_cleanup_chunk(struct gen_pool *pool, + struct gen_pool_chunk *chunk, void *data) +{ + unsigned long start = chunk->start_addr; + unsigned long end = chunk->end_addr; + unsigned long nr_pages, addr; + + nr_pages = (end - start + 1) / PAGE_SIZE; + __kernel_map_pages(virt_to_page(start), nr_pages, 1); + + for (addr = start; addr < end; addr += PAGE_SIZE) + put_page(virt_to_page(addr)); +} + +static void secretmem_cleanup_pool(struct secretmem_ctx *ctx) +{ + struct gen_pool *pool = ctx->pool; + + gen_pool_for_each_chunk(pool, secretmem_cleanup_chunk, ctx); + gen_pool_destroy(pool); +} + static void secretmem_evict_inode(struct inode *inode) { struct secretmem_ctx *ctx = inode->i_private; truncate_inode_pages_final(&inode->i_data); + secretmem_cleanup_pool(ctx); clear_inode(inode); kfree(ctx); } _ Patches currently in -mm which might be from rppt@xxxxxxxxxxxxx are mm-account-pmd-tables-like-pte-tables-fix.patch kvm-ppc-book3s-hv-simplify-kvm_cma_reserve.patch dma-contiguous-simplify-cma_early_percent_memory.patch arm-xtensa-simplify-initialization-of-high-memory-pages.patch arm64-numa-simplify-dummy_numa_init.patch h8300-nds32-openrisc-simplify-detection-of-memory-extents.patch riscv-drop-unneeded-node-initialization.patch mircoblaze-drop-unneeded-numa-and-sparsemem-initializations.patch memblock-make-for_each_memblock_type-iterator-private.patch memblock-make-memblock_debug-and-related-functionality-private.patch memblock-reduce-number-of-parameters-in-for_each_mem_range.patch arch-mm-replace-for_each_memblock-with-for_each_mem_pfn_range.patch arch-drivers-replace-for_each_membock-with-for_each_mem_range.patch arch-drivers-replace-for_each_membock-with-for_each_mem_range-fix-2.patch x86-setup-simplify-initrd-relocation-and-reservation.patch x86-setup-simplify-reserve_crashkernel.patch memblock-remove-unused-memblock_mem_size.patch memblock-implement-for_each_reserved_mem_region-using-__next_mem_region.patch memblock-use-separate-iterators-for-memory-and-reserved-regions.patch mm-add-definition-of-pmd_page_order.patch mmap-make-mlock_future_check-global.patch mm-introduce-memfd_secret-system-call-to-create-secret-memory-areas.patch arch-mm-wire-up-memfd_secret-system-call-were-relevant.patch mm-secretmem-use-pmd-size-pages-to-amortize-direct-map-fragmentation.patch secretmem-test-add-basic-selftest-for-memfd_secret2.patch