[RFC PATCH 6/6] mm: syscall alternative for SELECTIVE_KSM

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Partition can be created or opened using:

  int ksm_fd = ksm_open(ksm_name, flag);
    name specifies the ksm partition to be created or opened.
    flags:
      O_CREAT
        Create the ksm partition object if it does not exist.
      O_EXCL
        If O_CREAT was also specified, and a ksm partition object
        with the given name already exists, return an error.

Trigger the merge using:
  ksm_merge(ksm_fd, pid, start_addr, size);

Limitation: Only supporting x86 syscall_64.

Signed-off-by: Sourav Panda <souravpanda@xxxxxxxxxx>
---
 arch/x86/entry/syscalls/syscall_64.tbl |   3 +-
 include/linux/ksm.h                    |   4 +
 mm/ksm.c                               | 156 ++++++++++++++++++++++++-
 3 files changed, 161 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5eb708bff1c7..352d747dbe33 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -390,7 +390,8 @@
 464	common	getxattrat		sys_getxattrat
 465	common	listxattrat		sys_listxattrat
 466	common	removexattrat		sys_removexattrat
-
+467	common	ksm_open		sys_ksm_open
+468	common	ksm_merge		sys_ksm_merge
 #
 # Due to a historical design error, certain syscalls are numbered differently
 # in x32 as compared to native x86_64.  These syscalls have numbers 512-547.
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index d73095b5cd96..a94c89403c29 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -14,6 +14,10 @@
 #include <linux/rmap.h>
 #include <linux/sched.h>
 
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#define MAX_KSM_NAME_LEN 128
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
diff --git a/mm/ksm.c b/mm/ksm.c
index fd7626d5d8c9..71558120b034 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -147,7 +147,8 @@ struct ksm_scan {
 static struct kobject *ksm_base_kobj;
 
 struct partition_kobj {
-	struct kobject *kobj;
+	struct kobject *kobj;	/* Not required for the syscall interface */
+	char name[MAX_KSM_NAME_LEN];
 	struct list_head list;
 	struct rb_root *root_stable_tree;
 	struct rb_root *root_unstable_tree;
@@ -166,6 +167,106 @@ static struct partition_kobj *find_partition_by_kobj(struct kobject *kobj)
 	return NULL;
 }
 
+static struct partition_kobj *find_ksm_partition(char *partition_name)
+{
+	struct partition_kobj *partition;
+
+	list_for_each_entry(partition, &partition_list, list) {
+		if (strcmp(partition->name, partition_name) == 0)
+			return partition;
+	}
+	return NULL;
+}
+
+static DEFINE_MUTEX(ksm_partition_lock);
+
+static int ksm_release(struct inode *inode, struct file *file)
+{
+	struct partition_kobj *ksm = file->private_data;
+
+	mutex_lock(&ksm_partition_lock);
+	list_del(&ksm->list);
+	mutex_unlock(&ksm_partition_lock);
+
+	kfree(ksm);
+	return 0;
+}
+
+static const struct file_operations ksm_fops = {
+	.release = ksm_release,
+};
+
+static struct partition_kobj *ksm_create_partition(char *ksm_name)
+{
+	struct partition_kobj *partition;
+	struct rb_root *tree_root;
+
+	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
+	if (!partition)
+		return NULL;
+
+	tree_root = kcalloc(nr_node_ids + nr_node_ids, sizeof(*tree_root),
+			    GFP_KERNEL);
+	if (!tree_root)
+		return NULL;
+
+	partition->root_stable_tree = tree_root;
+	partition->root_unstable_tree = tree_root + nr_node_ids;
+	strncpy(partition->name, ksm_name, sizeof(partition->name));
+
+	list_add(&partition->list, &partition_list);
+
+	return partition;
+}
+
+static int ksm_partition_fd(struct partition_kobj *partition)
+{
+	int fd;
+	struct file *file;
+	int ret;
+
+	file = anon_inode_getfile("ksm_partition", &ksm_fops, partition, O_RDWR);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		return ret;
+	}
+
+	fd = get_unused_fd_flags(O_RDWR);
+	if (fd < 0) {
+		fput(file);
+		return fd;
+	}
+	fd_install(fd, file);
+	return fd;
+}
+
+SYSCALL_DEFINE2(ksm_open, const char __user *, ksm_name, int, flags) {
+	char name[MAX_KSM_NAME_LEN];
+	struct partition_kobj *partition;
+	int ret;
+
+	ret = strncpy_from_user(name, ksm_name, sizeof(name));
+	if (ret < 0)
+		return -EFAULT;
+
+	partition = find_ksm_partition(name);
+
+	if (flags & O_EXCL && partition) /* Partition already exists, return error */
+		return -EEXIST;
+
+	if (flags & O_CREAT && !partition) {
+		/* Partition does not exist, but we are allowed to create one */
+		mutex_lock(&ksm_partition_lock);
+		partition = ksm_create_partition(name);
+		mutex_unlock(&ksm_partition_lock);
+	}
+
+	if (!partition)
+		return flags & O_CREAT ? -ENOMEM : -ENOENT;
+
+	return ksm_partition_fd(partition);
+}
+
 /**
  * struct ksm_stable_node - node of the stable rbtree
  * @node: rb node of this ksm page in the stable tree
@@ -4324,6 +4425,59 @@ static int __init ksm_thread_sysfs_init(void)
 }
 #endif /* CONFIG_SELECTIVE_KSM */
 
+SYSCALL_DEFINE4(ksm_merge, int, ksm_fd, pid_t, pid, unsigned long, start, size_t, size) {
+	unsigned long end = start + size;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	struct partition_kobj *partition;
+	struct file *file;
+
+	file = fget(ksm_fd);
+	if (!file)
+		return -EBADF;
+
+	partition = file->private_data;
+	if (!partition) {
+		fput(file);
+		return -EINVAL;
+	}
+
+	if (start >= end) {
+		fput(file);
+		return -EINVAL;
+	}
+
+	/* Find the mm_struct */
+	rcu_read_lock();
+	task = find_task_by_vpid(pid);
+	if (!task) {
+		fput(file);
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	get_task_struct(task);
+
+	rcu_read_unlock();
+	mm = get_task_mm(task);
+	put_task_struct(task);
+
+	if (!mm) {
+		fput(file);
+		return -EINVAL;
+	}
+
+	mutex_lock(&ksm_thread_mutex);
+	wait_while_offlining();
+	ksm_sync_merge(mm, start, end, partition);
+	mutex_unlock(&ksm_thread_mutex);
+
+	mmput(mm);
+
+	fput(file);
+	return 0;
+}
+
 static int __init ksm_init(void)
 {
 	int err;
-- 
2.49.0.395.g12beb8f557-goog





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux