[RFC PATCH 4/6] mm: create dedicated trees for SELECTIVE KSM partitions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Extend ksm to create dedicated unstable and stable trees for
each partition.

Signed-off-by: Sourav Panda <souravpanda@xxxxxxxxxx>
---
 mm/ksm.c | 165 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 111 insertions(+), 54 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 927e257c48b5..b575250aaf45 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -144,6 +144,28 @@ struct ksm_scan {
 	unsigned long seqnr;
 };
 
+static struct kobject *ksm_base_kobj;
+
+struct partition_kobj {
+	struct kobject *kobj;
+	struct list_head list;
+	struct rb_root *root_stable_tree;
+	struct rb_root *root_unstable_tree;
+};
+
+static LIST_HEAD(partition_list);
+
+static struct partition_kobj *find_partition_by_kobj(struct kobject *kobj)
+{
+	struct partition_kobj *partition;
+
+	list_for_each_entry(partition, &partition_list, list) {
+		if (partition->kobj == kobj)
+			return partition;
+	}
+	return NULL;
+}
+
 /**
  * struct ksm_stable_node - node of the stable rbtree
  * @node: rb node of this ksm page in the stable tree
@@ -182,6 +204,7 @@ struct ksm_stable_node {
 #ifdef CONFIG_NUMA
 	int nid;
 #endif
+	struct partition_kobj *partition;
 };
 
 /**
@@ -218,6 +241,7 @@ struct ksm_rmap_item {
 			struct hlist_node hlist;
 		};
 	};
+	struct partition_kobj *partition;
 };
 
 #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
@@ -227,8 +251,6 @@ struct ksm_rmap_item {
 /* The stable and unstable tree heads */
 static struct rb_root one_stable_tree[1] = { RB_ROOT };
 static struct rb_root one_unstable_tree[1] = { RB_ROOT };
-static struct rb_root *root_stable_tree = one_stable_tree;
-static struct rb_root *root_unstable_tree = one_unstable_tree;
 
 /* Recently migrated nodes of stable tree, pending proper placement */
 static LIST_HEAD(migrate_nodes);
@@ -555,7 +577,7 @@ static inline void stable_node_dup_del(struct ksm_stable_node *dup)
 	if (is_stable_node_dup(dup))
 		__stable_node_dup_del(dup);
 	else
-		rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid));
+		rb_erase(&dup->node, dup->partition->root_stable_tree + NUMA(dup->nid));
 #ifdef CONFIG_DEBUG_VM
 	dup->head = NULL;
 #endif
@@ -580,14 +602,20 @@ static inline void free_rmap_item(struct ksm_rmap_item *rmap_item)
 	kmem_cache_free(rmap_item_cache, rmap_item);
 }
 
-static inline struct ksm_stable_node *alloc_stable_node(void)
+static inline struct ksm_stable_node *alloc_stable_node(struct partition_kobj *partition)
 {
 	/*
 	 * The allocation can take too long with GFP_KERNEL when memory is under
 	 * pressure, which may lead to hung task warnings.  Adding __GFP_HIGH
 	 * grants access to memory reserves, helping to avoid this problem.
 	 */
-	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH);
+	struct ksm_stable_node *node =  kmem_cache_alloc(stable_node_cache,
+							 GFP_KERNEL | __GFP_HIGH);
+
+	if (node)
+		node->partition = partition;
+
+	return node;
 }
 
 static inline void free_stable_node(struct ksm_stable_node *stable_node)
@@ -777,9 +805,10 @@ static inline int get_kpfn_nid(unsigned long kpfn)
 }
 
 static struct ksm_stable_node *alloc_stable_node_chain(struct ksm_stable_node *dup,
-						   struct rb_root *root)
+						   struct rb_root *root,
+						   struct partition_kobj *partition)
 {
-	struct ksm_stable_node *chain = alloc_stable_node();
+	struct ksm_stable_node *chain = alloc_stable_node(partition);
 	VM_BUG_ON(is_stable_node_chain(dup));
 	if (likely(chain)) {
 		INIT_HLIST_HEAD(&chain->hlist);
@@ -1016,7 +1045,8 @@ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item)
 		unsigned char age = get_rmap_item_age(rmap_item);
 		if (!age)
 			rb_erase(&rmap_item->node,
-				 root_unstable_tree + NUMA(rmap_item->nid));
+				 rmap_item->partition->root_unstable_tree +
+				 NUMA(rmap_item->nid));
 		ksm_pages_unshared--;
 		rmap_item->address &= PAGE_MASK;
 	}
@@ -1154,17 +1184,23 @@ static int remove_all_stable_nodes(void)
 	struct ksm_stable_node *stable_node, *next;
 	int nid;
 	int err = 0;
-
-	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
-		while (root_stable_tree[nid].rb_node) {
-			stable_node = rb_entry(root_stable_tree[nid].rb_node,
-						struct ksm_stable_node, node);
-			if (remove_stable_node_chain(stable_node,
-						     root_stable_tree + nid)) {
-				err = -EBUSY;
-				break;	/* proceed to next nid */
+	struct partition_kobj *partition;
+	struct rb_root *root_stable_tree;
+
+	list_for_each_entry(partition, &partition_list, list) {
+		root_stable_tree = partition->root_stable_tree;
+
+		for (nid = 0; nid < ksm_nr_node_ids; nid++) {
+			while (root_stable_tree[nid].rb_node) {
+				stable_node = rb_entry(root_stable_tree[nid].rb_node,
+						       struct ksm_stable_node, node);
+				if (remove_stable_node_chain(stable_node,
+							     root_stable_tree + nid)) {
+					err = -EBUSY;
+					break;	/* proceed to next nid */
+				}
+				cond_resched();
 			}
-			cond_resched();
 		}
 	}
 	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
@@ -1802,7 +1838,8 @@ static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d,
  * This function returns the stable tree node of identical content if found,
  * -EBUSY if the stable node's page is being migrated, NULL otherwise.
  */
-static struct folio *stable_tree_search(struct page *page)
+static struct folio *stable_tree_search(struct page *page,
+					struct partition_kobj *partition)
 {
 	int nid;
 	struct rb_root *root;
@@ -1821,7 +1858,7 @@ static struct folio *stable_tree_search(struct page *page)
 	}
 
 	nid = get_kpfn_nid(folio_pfn(folio));
-	root = root_stable_tree + nid;
+	root = partition->root_stable_tree + nid;
 again:
 	new = &root->rb_node;
 	parent = NULL;
@@ -1991,7 +2028,7 @@ static struct folio *stable_tree_search(struct page *page)
 		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
 		/* chain is missing so create it */
 		stable_node = alloc_stable_node_chain(stable_node_dup,
-						      root);
+						      root, partition);
 		if (!stable_node)
 			return NULL;
 	}
@@ -2016,7 +2053,8 @@ static struct folio *stable_tree_search(struct page *page)
  * This function returns the stable tree node just allocated on success,
  * NULL otherwise.
  */
-static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
+static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio,
+						  struct partition_kobj *partition)
 {
 	int nid;
 	unsigned long kpfn;
@@ -2028,7 +2066,7 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
 
 	kpfn = folio_pfn(kfolio);
 	nid = get_kpfn_nid(kpfn);
-	root = root_stable_tree + nid;
+	root = partition->root_stable_tree + nid;
 again:
 	parent = NULL;
 	new = &root->rb_node;
@@ -2067,7 +2105,7 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
 		}
 	}
 
-	stable_node_dup = alloc_stable_node();
+	stable_node_dup = alloc_stable_node(partition);
 	if (!stable_node_dup)
 		return NULL;
 
@@ -2082,7 +2120,8 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
 		if (!is_stable_node_chain(stable_node)) {
 			struct ksm_stable_node *orig = stable_node;
 			/* chain is missing so create it */
-			stable_node = alloc_stable_node_chain(orig, root);
+			stable_node = alloc_stable_node_chain(orig, root,
+							      partition);
 			if (!stable_node) {
 				free_stable_node(stable_node_dup);
 				return NULL;
@@ -2121,7 +2160,7 @@ struct ksm_rmap_item *unstable_tree_search_insert(struct ksm_rmap_item *rmap_ite
 	int nid;
 
 	nid = get_kpfn_nid(page_to_pfn(page));
-	root = root_unstable_tree + nid;
+	root = rmap_item->partition->root_unstable_tree + nid;
 	new = &root->rb_node;
 
 	while (*new) {
@@ -2291,7 +2330,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
 	}
 
 	/* Start by searching for the folio in the stable tree */
-	kfolio = stable_tree_search(page);
+	kfolio = stable_tree_search(page, rmap_item->partition);
 	if (&kfolio->page == page && rmap_item->head == stable_node) {
 		folio_put(kfolio);
 		return;
@@ -2344,7 +2383,8 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
 			 * node in the stable tree and add both rmap_items.
 			 */
 			folio_lock(kfolio);
-			stable_node = stable_tree_insert(kfolio);
+			stable_node = stable_tree_insert(kfolio,
+							 rmap_item->partition);
 			if (stable_node) {
 				stable_tree_append(tree_rmap_item, stable_node,
 						   false);
@@ -2502,7 +2542,8 @@ static struct ksm_rmap_item *retrieve_rmap_item(struct page **page,
 }
 
 static void ksm_sync_merge(struct mm_struct *mm,
-			   unsigned long start, unsigned long end)
+			   unsigned long start, unsigned long end,
+			   struct partition_kobj *partition)
 {
 	struct ksm_rmap_item *rmap_item;
 	struct page *page;
@@ -2510,6 +2551,7 @@ static void ksm_sync_merge(struct mm_struct *mm,
 	rmap_item = retrieve_rmap_item(&page, mm, start, end);
 	if (!rmap_item)
 		return;
+	rmap_item->partition = partition;
 	cmp_and_merge_page(page, rmap_item);
 	put_page(page);
 }
@@ -3328,19 +3370,23 @@ static void ksm_check_stable_tree(unsigned long start_pfn,
 	struct ksm_stable_node *stable_node, *next;
 	struct rb_node *node;
 	int nid;
-
-	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
-		node = rb_first(root_stable_tree + nid);
-		while (node) {
-			stable_node = rb_entry(node, struct ksm_stable_node, node);
-			if (stable_node_chain_remove_range(stable_node,
-							   start_pfn, end_pfn,
-							   root_stable_tree +
-							   nid))
-				node = rb_first(root_stable_tree + nid);
-			else
-				node = rb_next(node);
-			cond_resched();
+	struct rb_root *root_stable_tree
+
+	list_for_each_entry(partition, &partition_list, list) {
+		root_stable_tree = partition->root_stable_tree;
+
+		for (nid = 0; nid < ksm_nr_node_ids; nid++) {
+			node = rb_first(root_stable_tree + nid);
+			while (node) {
+				stable_node = rb_entry(node, struct ksm_stable_node, node);
+				if (stable_node_chain_remove_range(stable_node,
+								   start_pfn, end_pfn,
+								   root_stable_tree + nid))
+					node = rb_first(root_stable_tree + nid);
+				else
+					node = rb_next(node);
+				cond_resched();
+			}
 		}
 	}
 	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
@@ -3551,6 +3597,7 @@ static ssize_t trigger_merge_store(struct kobject *kobj,
 	int ret;
 	struct task_struct *task;
 	struct mm_struct *mm;
+	struct partition_kobj *partition;
 
 	input = kstrdup(buf, GFP_KERNEL);
 	if (!input)
@@ -3583,9 +3630,13 @@ static ssize_t trigger_merge_store(struct kobject *kobj,
 	if (!mm)
 		return -EINVAL;
 
+	partition = find_partition_by_kobj(kobj);
+	if (!partition)
+		return -EINVAL;
+
 	mutex_lock(&ksm_thread_mutex);
 	wait_while_offlining();
-	ksm_sync_merge(mm, start, end);
+	ksm_sync_merge(mm, start, end, partition);
 	mutex_unlock(&ksm_thread_mutex);
 
 	mmput(mm);
@@ -3606,6 +3657,8 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 {
 	int err;
 	unsigned long knob;
+	struct rb_root *root_stable_tree;
+	struct partition_kobj *partition;
 
 	err = kstrtoul(buf, 10, &knob);
 	if (err)
@@ -3615,6 +3668,10 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 
 	mutex_lock(&ksm_thread_mutex);
 	wait_while_offlining();
+
+	partition = find_partition_by_kobj(kobj);
+	root_stable_tree = partition->root_stable_tree;
+
 	if (ksm_merge_across_nodes != knob) {
 		if (ksm_pages_shared || remove_all_stable_nodes())
 			err = -EBUSY;
@@ -3633,10 +3690,10 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 			if (!buf)
 				err = -ENOMEM;
 			else {
-				root_stable_tree = buf;
-				root_unstable_tree = buf + nr_node_ids;
+				partition->root_stable_tree = buf;
+				partition->root_unstable_tree = buf + nr_node_ids;
 				/* Stable tree is empty but not the unstable */
-				root_unstable_tree[0] = one_unstable_tree[0];
+				partition->root_unstable_tree[0] = one_unstable_tree[0];
 			}
 		}
 		if (!err) {
@@ -3834,14 +3891,6 @@ KSM_ATTR_RO(full_scans);
 
 #ifdef CONFIG_SELECTIVE_KSM
 static struct kobject *ksm_base_kobj;
-
-struct partition_kobj {
-	struct kobject *kobj;
-	struct list_head list;
-};
-
-static LIST_HEAD(partition_list);
-
 #else /* CONFIG_SELECTIVE_KSM */
 static ssize_t smart_scan_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *buf)
@@ -4055,6 +4104,7 @@ static ssize_t add_partition_store(struct kobject *kobj,
 	struct partition_kobj *new_partition_kobj;
 	char partition_name[50];
 	int err;
+	struct rb_root *tree_root;
 
 	mutex_lock(&ksm_thread_mutex);
 
@@ -4081,6 +4131,13 @@ static ssize_t add_partition_store(struct kobject *kobj,
 		goto unlock;
 	}
 
+	tree_root = kcalloc(nr_node_ids + nr_node_ids, sizeof(*tree_root), GFP_KERNEL);
+	if (!tree_root) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+	new_partition_kobj->root_stable_tree = tree_root;
+	new_partition_kobj->root_unstable_tree = tree_root + nr_node_ids;
 	err = sysfs_create_group(new_partition_kobj->kobj, &ksm_attr_group);
 	if (err) {
 		pr_err("ksm: register sysfs failed\n");
-- 
2.49.0.395.g12beb8f557-goog





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux