The patch titled Subject: radix-tree: implement radix_tree_maybe_preload_order() has been added to the -mm tree. Its filename is radix-tree-implement-radix_tree_maybe_preload_order.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/radix-tree-implement-radix_tree_maybe_preload_order.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/radix-tree-implement-radix_tree_maybe_preload_order.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Subject: radix-tree: implement radix_tree_maybe_preload_order() The new helper is similar to radix_tree_maybe_preload(), but tries to preload number of nodes required to insert (1 << order) continuous naturally-aligned elements. This is required to push huge pages into pagecache. Link: http://lkml.kernel.org/r/1466021202-61880-24-git-send-email-kirill.shutemov@xxxxxxxxxxxxxxx Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/radix-tree.h | 1 lib/radix-tree.c | 84 ++++++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 5 deletions(-) diff -puN include/linux/radix-tree.h~radix-tree-implement-radix_tree_maybe_preload_order include/linux/radix-tree.h --- a/include/linux/radix-tree.h~radix-tree-implement-radix_tree_maybe_preload_order +++ a/include/linux/radix-tree.h @@ -291,6 +291,7 @@ unsigned int radix_tree_gang_lookup_slot unsigned long first_index, unsigned int max_items); int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); +int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); diff -puN lib/radix-tree.c~radix-tree-implement-radix_tree_maybe_preload_order lib/radix-tree.c --- a/lib/radix-tree.c~radix-tree-implement-radix_tree_maybe_preload_order +++ a/lib/radix-tree.c @@ -38,6 +38,9 @@ #include <linux/preempt.h> /* in_interrupt() */ +/* Number of nodes in fully populated tree of given height */ +static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; + /* * Radix tree node cache. */ @@ -342,7 +345,7 @@ radix_tree_node_free(struct radix_tree_n * To make use of this facility, the radix tree must be initialised without * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ -static int __radix_tree_preload(gfp_t gfp_mask) +static int __radix_tree_preload(gfp_t gfp_mask, int nr) { struct radix_tree_preload *rtp; struct radix_tree_node *node; @@ -350,14 +353,14 @@ static int __radix_tree_preload(gfp_t gf preempt_disable(); rtp = this_cpu_ptr(&radix_tree_preloads); - while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { + while (rtp->nr < nr) { preempt_enable(); node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); if (node == NULL) goto out; preempt_disable(); rtp = this_cpu_ptr(&radix_tree_preloads); - if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { + if (rtp->nr < nr) { node->private_data = rtp->nodes; rtp->nodes = node; rtp->nr++; @@ -383,7 +386,7 @@ int radix_tree_preload(gfp_t gfp_mask) { /* Warn on non-sensical use... */ WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); - return __radix_tree_preload(gfp_mask); + return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); } EXPORT_SYMBOL(radix_tree_preload); @@ -395,7 +398,7 @@ EXPORT_SYMBOL(radix_tree_preload); int radix_tree_maybe_preload(gfp_t gfp_mask) { if (gfpflags_allow_blocking(gfp_mask)) - return __radix_tree_preload(gfp_mask); + return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); /* Preloading doesn't help anything with this gfp mask, skip it */ preempt_disable(); return 0; @@ -403,6 +406,51 @@ int radix_tree_maybe_preload(gfp_t gfp_m EXPORT_SYMBOL(radix_tree_maybe_preload); /* + * The same as function above, but preload number of nodes required to insert + * (1 << order) continuous naturally-aligned elements. + */ +int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) +{ + unsigned long nr_subtrees; + int nr_nodes, subtree_height; + + /* Preloading doesn't help anything with this gfp mask, skip it */ + if (!gfpflags_allow_blocking(gfp_mask)) { + preempt_disable(); + return 0; + } + + /* + * Calculate number and height of fully populated subtrees it takes to + * store (1 << order) elements. + */ + nr_subtrees = 1 << order; + for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; + subtree_height++) + nr_subtrees >>= RADIX_TREE_MAP_SHIFT; + + /* + * The worst case is zero height tree with a single item at index 0 and + * then inserting items starting at ULONG_MAX - (1 << order). + * + * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to + * 0-index item. + */ + nr_nodes = RADIX_TREE_MAX_PATH; + + /* Plus branch to fully populated subtrees. */ + nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; + + /* Root node is shared. */ + nr_nodes--; + + /* Plus nodes required to build subtrees. */ + nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; + + return __radix_tree_preload(gfp_mask, nr_nodes); +} + +/* * The maximum index which can be stored in a radix tree */ static inline unsigned long shift_maxindex(unsigned int shift) @@ -1571,6 +1619,31 @@ radix_tree_node_ctor(void *arg) INIT_LIST_HEAD(&node->private_list); } +static __init unsigned long __maxindex(unsigned int height) +{ + unsigned int width = height * RADIX_TREE_MAP_SHIFT; + int shift = RADIX_TREE_INDEX_BITS - width; + + if (shift < 0) + return ~0UL; + if (shift >= BITS_PER_LONG) + return 0UL; + return ~0UL >> shift; +} + +static __init void radix_tree_init_maxnodes(void) +{ + unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) + height_to_maxindex[i] = __maxindex(i); + for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { + for (j = i; j > 0; j--) + height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; + } +} + static int radix_tree_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1597,5 +1670,6 @@ void __init radix_tree_init(void) sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, radix_tree_node_ctor); + radix_tree_init_maxnodes(); hotcpu_notifier(radix_tree_callback, 0); } _ Patches currently in -mm which might be from kirill.shutemov@xxxxxxxxxxxxxxx are revert-mm-make-faultaround-produce-old-ptes.patch revert-mm-disable-fault-around-on-emulated-access-bit-architecture.patch mm-make-swapin-readahead-to-improve-thp-collapse-rate-fix.patch mm-make-swapin-readahead-to-improve-thp-collapse-rate-fix-2.patch mm-make-swapin-readahead-to-improve-thp-collapse-rate-fix-3.patch mm-thp-make-swapin-readahead-under-down_read-of-mmap_sem-fix.patch khugepaged-recheck-pmd-after-mmap_sem-re-acquired.patch thp-mlock-update-unevictable-lrutxt.patch mm-do-not-pass-mm_struct-into-handle_mm_fault.patch mm-introduce-fault_env.patch mm-postpone-page-table-allocation-until-we-have-page-to-map.patch rmap-support-file-thp.patch mm-introduce-do_set_pmd.patch thp-vmstats-add-counters-for-huge-file-pages.patch thp-support-file-pages-in-zap_huge_pmd.patch thp-handle-file-pages-in-split_huge_pmd.patch thp-handle-file-cow-faults.patch thp-skip-file-huge-pmd-on-copy_huge_pmd.patch thp-prepare-change_huge_pmd-for-file-thp.patch thp-run-vma_adjust_trans_huge-outside-i_mmap_rwsem.patch thp-file-pages-support-for-split_huge_page.patch thp-mlock-do-not-mlock-pte-mapped-file-huge-pages.patch vmscan-split-file-huge-pages-before-paging-them-out.patch page-flags-relax-policy-for-pg_mappedtodisk-and-pg_reclaim.patch radix-tree-implement-radix_tree_maybe_preload_order.patch filemap-prepare-find-and-delete-operations-for-huge-pages.patch truncate-handle-file-thp.patch mm-rmap-account-shmem-thp-pages.patch shmem-prepare-huge=-mount-option-and-sysfs-knob.patch shmem-add-huge-pages-support.patch shmem-thp-respect-madv_nohugepage-for-file-mappings.patch thp-extract-khugepaged-from-mm-huge_memoryc.patch khugepaged-move-up_readmmap_sem-out-of-khugepaged_alloc_page.patch shmem-make-shmem_inode_info-lock-irq-safe.patch khugepaged-add-support-of-collapse-for-tmpfs-shmem-pages.patch thp-introduce-config_transparent_huge_pagecache.patch shmem-split-huge-pages-beyond-i_size-under-memory-pressure.patch thp-update-documentation-vm-transhugefilesystems-proctxt.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html