In the file descriptor table duplication code (called at fork()), we need to duplicate an IDR. But we have to do it under a lock (so another thread doesn't open/close a fd in the middle), and there's no suitable preload operation for this today. Adding just idr_copy_preload() isn't enough as another thread could grow the fd table between starting the preload and acquiring the lock. We also need idr_check_preload() to be called after acquiring the lock. Signed-off-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx> --- include/linux/idr.h | 32 +++++++++++ include/linux/radix-tree.h | 3 ++ lib/radix-tree.c | 83 +++++++++++++++++++++++++++++ tools/testing/radix-tree/idr-test.c | 24 +++++++++ tools/testing/radix-tree/linux/radix-tree.h | 2 + tools/testing/radix-tree/main.c | 38 +++++++++++++ 6 files changed, 182 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index d43cf01..eed1c1a 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -166,6 +166,38 @@ static inline bool idr_is_empty(const struct idr *idr) } /** + * idr_copy_preload - preload for idr_copy() + * @src: IDR to be copied from + * @gfp: Allocation mask to use for preloading + * + * Preallocates enough memory for a call to idr_copy(). This function + * returns with preemption disabled. Call idr_preload_end() once the + * copy has completed. + * + * Return: -ENOMEM if the memory could not be allocated. + */ +static inline int idr_copy_preload(const struct idr *src, gfp_t gfp) +{ + return radix_tree_copy_preload(&src->idr_rt, gfp); +} + +/** + * idr_check_preload - Check the preload is still sufficient + * @src: IDR to be copied from + * + * Between the successful allocation of memory and acquiring the lock that + * protects @src, the IDR may have expanded. If this function returns + * false, more memory needs to be preallocated. + * + * Return: true if enough memory remains allocated, false to retry the + * preallocation. + */ +static inline bool idr_check_preload(const struct idr *src) +{ + return radix_tree_check_preload(&src->idr_rt); +} + +/** * idr_preload_end - end preload section started with idr_preload() * * Each idr_preload() should be matched with an invocation of this diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index f701e0b..f53d004 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -354,6 +354,9 @@ static inline void radix_tree_preload_end(void) preempt_enable(); } +int radix_tree_copy_preload(const struct radix_tree_root *, gfp_t); +bool radix_tree_check_preload(const struct radix_tree_root *); + int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t); int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 855ac8e..c1d75224 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -277,6 +277,55 @@ static unsigned long next_index(unsigned long index, return (index & ~node_maxindex(node)) + (offset << node->shift); } +/** + * radix_tree_count_nodes - Returns the number of nodes in this tree + * @root: radix tree root + * + * This routine does not examine every node in the tree; it assumes that + * all entries in the tree at level 1 are nodes. It will overestimate + * the number of nodes in the tree in the presence of entries of order + * RADIX_TREE_MAP_SHIFT or higher. + */ +static unsigned long radix_tree_count_nodes(const struct radix_tree_root *root) +{ + struct radix_tree_node *node, *child; + unsigned long n = 1; + void *entry = rcu_dereference_raw(root->rnode); + unsigned int offset = 0; + + if (!radix_tree_is_internal_node(entry)) + return 0; + + node = entry_to_node(entry); + if (!node->shift) + return n; + + n += node->count; + if (node->shift == RADIX_TREE_MAP_SHIFT) + return n; + + while (node) { + if (offset == RADIX_TREE_MAP_SIZE) { + offset = node->offset + 1; + node = node->parent; + continue; + } + + entry = rcu_dereference_raw(node->slots[offset]); + offset++; + if (!radix_tree_is_internal_node(entry)) + continue; + child = entry_to_node(entry); + n += child->count; + if (node->shift <= 2 * RADIX_TREE_MAP_SHIFT) + continue; + offset = 0; + node = child; + } + + return n; +} + #ifndef __KERNEL__ static void dump_node(struct radix_tree_node *node, unsigned long index) { @@ -530,6 +579,40 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) } EXPORT_SYMBOL(radix_tree_maybe_preload); +/** + * radix_tree_copy_preload - preload for radix_tree_copy() + * @src: radix tree root to be copied from + * @gfp: Allocation mask to use for preloading + * + * Preallocates enough memory for a call to radix_tree_copy(). This function + * returns with preemption disabled. Call radix_tree_preload_end() once the + * copy has completed. + * + * Return: -ENOMEM if the memory could not be allocated. + */ +int radix_tree_copy_preload(const struct radix_tree_root *src, gfp_t gfp_mask) +{ + return __radix_tree_preload(gfp_mask, radix_tree_count_nodes(src)); +} + +/** + * radix_tree_check_preload - Check the preload is still sufficient + * @src: radix tree to be copied from + * @cookie: Cookie returned from radix_tree_copy_preload() + * + * Between the successful allocation of memory and acquiring the lock that + * protects @src, the radix tree may have expanded. Call this function + * to see if the preallocation needs to be expanded too. + * + * Return: true if enough memory remains allocated, false if it does not + * suffice. + */ +bool radix_tree_check_preload(const struct radix_tree_root *src) +{ + struct radix_tree_preload *rtp = this_cpu_ptr(&radix_tree_preloads); + return rtp->nr >= radix_tree_count_nodes(src); +} + #ifdef CONFIG_RADIX_TREE_MULTIORDER /* * Preload with enough objects to ensure that we can split a single entry diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 3f9f429..e8d5386 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -225,6 +225,29 @@ void idr_get_next_test(void) idr_destroy(&idr); } +void idr_copy_test(void) +{ + DEFINE_IDR(src); + DEFINE_IDR(dst); + int i; + void *p; + + for (i = 0; i < 10000; i++) { + struct item *item = item_create(i, 0); + assert(idr_alloc(&src, item, 0, 20000, GFP_KERNEL) == i); + } + + idr_copy_preload(&src, GFP_KERNEL); + idr_for_each_entry(&src, p, i) { + assert(idr_alloc(&dst, p, i, i + 1, GFP_NOWAIT) == i); + } + idr_preload_end(); + + idr_for_each(&src, item_idr_free, NULL); + idr_destroy(&src); + idr_destroy(&dst); +} + void idr_checks(void) { unsigned long i; @@ -276,6 +299,7 @@ void idr_checks(void) idr_tag_test(); idr_nowait_test(); idr_get_next_test(); + idr_copy_test(); } /* diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h index bf1bb23..0c5885e 100644 --- a/tools/testing/radix-tree/linux/radix-tree.h +++ b/tools/testing/radix-tree/linux/radix-tree.h @@ -4,6 +4,8 @@ #include "generated/map-shift.h" #include "../../../../include/linux/radix-tree.h" +unsigned long radix_tree_count_nodes(const struct radix_tree_root *root); + extern int kmalloc_verbose; extern int test_verbose; diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index bc9a784..a7504e2 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -11,6 +11,42 @@ #include "test.h" #include "regression.h" +static void count_node_check(void) +{ + unsigned long i, j, k; + RADIX_TREE(tree, GFP_KERNEL); + + assert(radix_tree_empty(&tree)); + assert(radix_tree_count_nodes(&tree) == 0); + assert(item_insert(&tree, 0) == 0); + assert(radix_tree_count_nodes(&tree) == 0); + + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) { + assert(item_insert(&tree, i) == 0); + assert(radix_tree_count_nodes(&tree) == 1); + } + + j = 3; + k = 3; + for (i = RADIX_TREE_MAP_SIZE; i < i * RADIX_TREE_MAP_SIZE; + i *= RADIX_TREE_MAP_SIZE) { + assert(item_insert(&tree, i) == 0); + assert(radix_tree_count_nodes(&tree) == j); + j += k; + k++; + } + + assert(item_insert(&tree, i) == 0); + assert(radix_tree_count_nodes(&tree) == j); + + item_kill_tree(&tree); +} + +void basic_checks(void) +{ + count_node_check(); +} + void __gang_check(unsigned long middle, long down, long up, int chunk, int hop) { long idx; @@ -362,6 +398,8 @@ int main(int argc, char **argv) rcu_register_thread(); radix_tree_init(); + basic_checks(); + regression1_test(); regression2_test(); regression3_test(); -- 1.8.3.1