Extend ksm to create dedicated unstable and stable trees for each partition. Signed-off-by: Sourav Panda <souravpanda@xxxxxxxxxx> --- mm/ksm.c | 165 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 111 insertions(+), 54 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 927e257c48b5..b575250aaf45 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -144,6 +144,28 @@ struct ksm_scan { unsigned long seqnr; }; +static struct kobject *ksm_base_kobj; + +struct partition_kobj { + struct kobject *kobj; + struct list_head list; + struct rb_root *root_stable_tree; + struct rb_root *root_unstable_tree; +}; + +static LIST_HEAD(partition_list); + +static struct partition_kobj *find_partition_by_kobj(struct kobject *kobj) +{ + struct partition_kobj *partition; + + list_for_each_entry(partition, &partition_list, list) { + if (partition->kobj == kobj) + return partition; + } + return NULL; +} + /** * struct ksm_stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree @@ -182,6 +204,7 @@ struct ksm_stable_node { #ifdef CONFIG_NUMA int nid; #endif + struct partition_kobj *partition; }; /** @@ -218,6 +241,7 @@ struct ksm_rmap_item { struct hlist_node hlist; }; }; + struct partition_kobj *partition; }; #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ @@ -227,8 +251,6 @@ struct ksm_rmap_item { /* The stable and unstable tree heads */ static struct rb_root one_stable_tree[1] = { RB_ROOT }; static struct rb_root one_unstable_tree[1] = { RB_ROOT }; -static struct rb_root *root_stable_tree = one_stable_tree; -static struct rb_root *root_unstable_tree = one_unstable_tree; /* Recently migrated nodes of stable tree, pending proper placement */ static LIST_HEAD(migrate_nodes); @@ -555,7 +577,7 @@ static inline void stable_node_dup_del(struct ksm_stable_node *dup) if (is_stable_node_dup(dup)) __stable_node_dup_del(dup); else - rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid)); + rb_erase(&dup->node, dup->partition->root_stable_tree + NUMA(dup->nid)); #ifdef CONFIG_DEBUG_VM dup->head = NULL; #endif @@ -580,14 +602,20 @@ static inline void free_rmap_item(struct ksm_rmap_item *rmap_item) kmem_cache_free(rmap_item_cache, rmap_item); } -static inline struct ksm_stable_node *alloc_stable_node(void) +static inline struct ksm_stable_node *alloc_stable_node(struct partition_kobj *partition) { /* * The allocation can take too long with GFP_KERNEL when memory is under * pressure, which may lead to hung task warnings. Adding __GFP_HIGH * grants access to memory reserves, helping to avoid this problem. */ - return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH); + struct ksm_stable_node *node = kmem_cache_alloc(stable_node_cache, + GFP_KERNEL | __GFP_HIGH); + + if (node) + node->partition = partition; + + return node; } static inline void free_stable_node(struct ksm_stable_node *stable_node) @@ -777,9 +805,10 @@ static inline int get_kpfn_nid(unsigned long kpfn) } static struct ksm_stable_node *alloc_stable_node_chain(struct ksm_stable_node *dup, - struct rb_root *root) + struct rb_root *root, + struct partition_kobj *partition) { - struct ksm_stable_node *chain = alloc_stable_node(); + struct ksm_stable_node *chain = alloc_stable_node(partition); VM_BUG_ON(is_stable_node_chain(dup)); if (likely(chain)) { INIT_HLIST_HEAD(&chain->hlist); @@ -1016,7 +1045,8 @@ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item) unsigned char age = get_rmap_item_age(rmap_item); if (!age) rb_erase(&rmap_item->node, - root_unstable_tree + NUMA(rmap_item->nid)); + rmap_item->partition->root_unstable_tree + + NUMA(rmap_item->nid)); ksm_pages_unshared--; rmap_item->address &= PAGE_MASK; } @@ -1154,17 +1184,23 @@ static int remove_all_stable_nodes(void) struct ksm_stable_node *stable_node, *next; int nid; int err = 0; - - for (nid = 0; nid < ksm_nr_node_ids; nid++) { - while (root_stable_tree[nid].rb_node) { - stable_node = rb_entry(root_stable_tree[nid].rb_node, - struct ksm_stable_node, node); - if (remove_stable_node_chain(stable_node, - root_stable_tree + nid)) { - err = -EBUSY; - break; /* proceed to next nid */ + struct partition_kobj *partition; + struct rb_root *root_stable_tree; + + list_for_each_entry(partition, &partition_list, list) { + root_stable_tree = partition->root_stable_tree; + + for (nid = 0; nid < ksm_nr_node_ids; nid++) { + while (root_stable_tree[nid].rb_node) { + stable_node = rb_entry(root_stable_tree[nid].rb_node, + struct ksm_stable_node, node); + if (remove_stable_node_chain(stable_node, + root_stable_tree + nid)) { + err = -EBUSY; + break; /* proceed to next nid */ + } + cond_resched(); } - cond_resched(); } } list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { @@ -1802,7 +1838,8 @@ static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d, * This function returns the stable tree node of identical content if found, * -EBUSY if the stable node's page is being migrated, NULL otherwise. */ -static struct folio *stable_tree_search(struct page *page) +static struct folio *stable_tree_search(struct page *page, + struct partition_kobj *partition) { int nid; struct rb_root *root; @@ -1821,7 +1858,7 @@ static struct folio *stable_tree_search(struct page *page) } nid = get_kpfn_nid(folio_pfn(folio)); - root = root_stable_tree + nid; + root = partition->root_stable_tree + nid; again: new = &root->rb_node; parent = NULL; @@ -1991,7 +2028,7 @@ static struct folio *stable_tree_search(struct page *page) VM_BUG_ON(is_stable_node_dup(stable_node_dup)); /* chain is missing so create it */ stable_node = alloc_stable_node_chain(stable_node_dup, - root); + root, partition); if (!stable_node) return NULL; } @@ -2016,7 +2053,8 @@ static struct folio *stable_tree_search(struct page *page) * This function returns the stable tree node just allocated on success, * NULL otherwise. */ -static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio) +static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio, + struct partition_kobj *partition) { int nid; unsigned long kpfn; @@ -2028,7 +2066,7 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio) kpfn = folio_pfn(kfolio); nid = get_kpfn_nid(kpfn); - root = root_stable_tree + nid; + root = partition->root_stable_tree + nid; again: parent = NULL; new = &root->rb_node; @@ -2067,7 +2105,7 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio) } } - stable_node_dup = alloc_stable_node(); + stable_node_dup = alloc_stable_node(partition); if (!stable_node_dup) return NULL; @@ -2082,7 +2120,8 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio) if (!is_stable_node_chain(stable_node)) { struct ksm_stable_node *orig = stable_node; /* chain is missing so create it */ - stable_node = alloc_stable_node_chain(orig, root); + stable_node = alloc_stable_node_chain(orig, root, + partition); if (!stable_node) { free_stable_node(stable_node_dup); return NULL; @@ -2121,7 +2160,7 @@ struct ksm_rmap_item *unstable_tree_search_insert(struct ksm_rmap_item *rmap_ite int nid; nid = get_kpfn_nid(page_to_pfn(page)); - root = root_unstable_tree + nid; + root = rmap_item->partition->root_unstable_tree + nid; new = &root->rb_node; while (*new) { @@ -2291,7 +2330,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite } /* Start by searching for the folio in the stable tree */ - kfolio = stable_tree_search(page); + kfolio = stable_tree_search(page, rmap_item->partition); if (&kfolio->page == page && rmap_item->head == stable_node) { folio_put(kfolio); return; @@ -2344,7 +2383,8 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite * node in the stable tree and add both rmap_items. */ folio_lock(kfolio); - stable_node = stable_tree_insert(kfolio); + stable_node = stable_tree_insert(kfolio, + rmap_item->partition); if (stable_node) { stable_tree_append(tree_rmap_item, stable_node, false); @@ -2502,7 +2542,8 @@ static struct ksm_rmap_item *retrieve_rmap_item(struct page **page, } static void ksm_sync_merge(struct mm_struct *mm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + struct partition_kobj *partition) { struct ksm_rmap_item *rmap_item; struct page *page; @@ -2510,6 +2551,7 @@ static void ksm_sync_merge(struct mm_struct *mm, rmap_item = retrieve_rmap_item(&page, mm, start, end); if (!rmap_item) return; + rmap_item->partition = partition; cmp_and_merge_page(page, rmap_item); put_page(page); } @@ -3328,19 +3370,23 @@ static void ksm_check_stable_tree(unsigned long start_pfn, struct ksm_stable_node *stable_node, *next; struct rb_node *node; int nid; - - for (nid = 0; nid < ksm_nr_node_ids; nid++) { - node = rb_first(root_stable_tree + nid); - while (node) { - stable_node = rb_entry(node, struct ksm_stable_node, node); - if (stable_node_chain_remove_range(stable_node, - start_pfn, end_pfn, - root_stable_tree + - nid)) - node = rb_first(root_stable_tree + nid); - else - node = rb_next(node); - cond_resched(); + struct rb_root *root_stable_tree + + list_for_each_entry(partition, &partition_list, list) { + root_stable_tree = partition->root_stable_tree; + + for (nid = 0; nid < ksm_nr_node_ids; nid++) { + node = rb_first(root_stable_tree + nid); + while (node) { + stable_node = rb_entry(node, struct ksm_stable_node, node); + if (stable_node_chain_remove_range(stable_node, + start_pfn, end_pfn, + root_stable_tree + nid)) + node = rb_first(root_stable_tree + nid); + else + node = rb_next(node); + cond_resched(); + } } } list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { @@ -3551,6 +3597,7 @@ static ssize_t trigger_merge_store(struct kobject *kobj, int ret; struct task_struct *task; struct mm_struct *mm; + struct partition_kobj *partition; input = kstrdup(buf, GFP_KERNEL); if (!input) @@ -3583,9 +3630,13 @@ static ssize_t trigger_merge_store(struct kobject *kobj, if (!mm) return -EINVAL; + partition = find_partition_by_kobj(kobj); + if (!partition) + return -EINVAL; + mutex_lock(&ksm_thread_mutex); wait_while_offlining(); - ksm_sync_merge(mm, start, end); + ksm_sync_merge(mm, start, end, partition); mutex_unlock(&ksm_thread_mutex); mmput(mm); @@ -3606,6 +3657,8 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj, { int err; unsigned long knob; + struct rb_root *root_stable_tree; + struct partition_kobj *partition; err = kstrtoul(buf, 10, &knob); if (err) @@ -3615,6 +3668,10 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj, mutex_lock(&ksm_thread_mutex); wait_while_offlining(); + + partition = find_partition_by_kobj(kobj); + root_stable_tree = partition->root_stable_tree; + if (ksm_merge_across_nodes != knob) { if (ksm_pages_shared || remove_all_stable_nodes()) err = -EBUSY; @@ -3633,10 +3690,10 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj, if (!buf) err = -ENOMEM; else { - root_stable_tree = buf; - root_unstable_tree = buf + nr_node_ids; + partition->root_stable_tree = buf; + partition->root_unstable_tree = buf + nr_node_ids; /* Stable tree is empty but not the unstable */ - root_unstable_tree[0] = one_unstable_tree[0]; + partition->root_unstable_tree[0] = one_unstable_tree[0]; } } if (!err) { @@ -3834,14 +3891,6 @@ KSM_ATTR_RO(full_scans); #ifdef CONFIG_SELECTIVE_KSM static struct kobject *ksm_base_kobj; - -struct partition_kobj { - struct kobject *kobj; - struct list_head list; -}; - -static LIST_HEAD(partition_list); - #else /* CONFIG_SELECTIVE_KSM */ static ssize_t smart_scan_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -4055,6 +4104,7 @@ static ssize_t add_partition_store(struct kobject *kobj, struct partition_kobj *new_partition_kobj; char partition_name[50]; int err; + struct rb_root *tree_root; mutex_lock(&ksm_thread_mutex); @@ -4081,6 +4131,13 @@ static ssize_t add_partition_store(struct kobject *kobj, goto unlock; } + tree_root = kcalloc(nr_node_ids + nr_node_ids, sizeof(*tree_root), GFP_KERNEL); + if (!tree_root) { + err = -ENOMEM; + goto unlock; + } + new_partition_kobj->root_stable_tree = tree_root; + new_partition_kobj->root_unstable_tree = tree_root + nr_node_ids; err = sysfs_create_group(new_partition_kobj->kobj, &ksm_attr_group); if (err) { pr_err("ksm: register sysfs failed\n"); -- 2.49.0.395.g12beb8f557-goog