Partition can be created or opened using: int ksm_fd = ksm_open(ksm_name, flag); name specifies the ksm partition to be created or opened. flags: O_CREAT Create the ksm partition object if it does not exist. O_EXCL If O_CREAT was also specified, and a ksm partition object with the given name already exists, return an error. Trigger the merge using: ksm_merge(ksm_fd, pid, start_addr, size); Limitation: Only supporting x86 syscall_64. Signed-off-by: Sourav Panda <souravpanda@xxxxxxxxxx> --- arch/x86/entry/syscalls/syscall_64.tbl | 3 +- include/linux/ksm.h | 4 + mm/ksm.c | 156 ++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c7..352d747dbe33 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,7 +390,8 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat - +467 common ksm_open sys_ksm_open +468 common ksm_merge sys_ksm_merge # # Due to a historical design error, certain syscalls are numbered differently # in x32 as compared to native x86_64. These syscalls have numbers 512-547. diff --git a/include/linux/ksm.h b/include/linux/ksm.h index d73095b5cd96..a94c89403c29 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -14,6 +14,10 @@ #include <linux/rmap.h> #include <linux/sched.h> +#include <linux/anon_inodes.h> +#include <linux/syscalls.h> +#define MAX_KSM_NAME_LEN 128 + #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); diff --git a/mm/ksm.c b/mm/ksm.c index fd7626d5d8c9..71558120b034 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -147,7 +147,8 @@ struct ksm_scan { static struct kobject *ksm_base_kobj; struct partition_kobj { - struct kobject *kobj; + struct kobject *kobj; /* Not required for the syscall interface */ + char name[MAX_KSM_NAME_LEN]; struct list_head list; struct rb_root *root_stable_tree; struct rb_root *root_unstable_tree; @@ -166,6 +167,106 @@ static struct partition_kobj *find_partition_by_kobj(struct kobject *kobj) return NULL; } +static struct partition_kobj *find_ksm_partition(char *partition_name) +{ + struct partition_kobj *partition; + + list_for_each_entry(partition, &partition_list, list) { + if (strcmp(partition->name, partition_name) == 0) + return partition; + } + return NULL; +} + +static DEFINE_MUTEX(ksm_partition_lock); + +static int ksm_release(struct inode *inode, struct file *file) +{ + struct partition_kobj *ksm = file->private_data; + + mutex_lock(&ksm_partition_lock); + list_del(&ksm->list); + mutex_unlock(&ksm_partition_lock); + + kfree(ksm); + return 0; +} + +static const struct file_operations ksm_fops = { + .release = ksm_release, +}; + +static struct partition_kobj *ksm_create_partition(char *ksm_name) +{ + struct partition_kobj *partition; + struct rb_root *tree_root; + + partition = kzalloc(sizeof(*partition), GFP_KERNEL); + if (!partition) + return NULL; + + tree_root = kcalloc(nr_node_ids + nr_node_ids, sizeof(*tree_root), + GFP_KERNEL); + if (!tree_root) + return NULL; + + partition->root_stable_tree = tree_root; + partition->root_unstable_tree = tree_root + nr_node_ids; + strncpy(partition->name, ksm_name, sizeof(partition->name)); + + list_add(&partition->list, &partition_list); + + return partition; +} + +static int ksm_partition_fd(struct partition_kobj *partition) +{ + int fd; + struct file *file; + int ret; + + file = anon_inode_getfile("ksm_partition", &ksm_fops, partition, O_RDWR); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + return ret; + } + + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) { + fput(file); + return fd; + } + fd_install(fd, file); + return fd; +} + +SYSCALL_DEFINE2(ksm_open, const char __user *, ksm_name, int, flags) { + char name[MAX_KSM_NAME_LEN]; + struct partition_kobj *partition; + int ret; + + ret = strncpy_from_user(name, ksm_name, sizeof(name)); + if (ret < 0) + return -EFAULT; + + partition = find_ksm_partition(name); + + if (flags & O_EXCL && partition) /* Partition already exists, return error */ + return -EEXIST; + + if (flags & O_CREAT && !partition) { + /* Partition does not exist, but we are allowed to create one */ + mutex_lock(&ksm_partition_lock); + partition = ksm_create_partition(name); + mutex_unlock(&ksm_partition_lock); + } + + if (!partition) + return flags & O_CREAT ? -ENOMEM : -ENOENT; + + return ksm_partition_fd(partition); +} + /** * struct ksm_stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree @@ -4324,6 +4425,59 @@ static int __init ksm_thread_sysfs_init(void) } #endif /* CONFIG_SELECTIVE_KSM */ +SYSCALL_DEFINE4(ksm_merge, int, ksm_fd, pid_t, pid, unsigned long, start, size_t, size) { + unsigned long end = start + size; + struct task_struct *task; + struct mm_struct *mm; + struct partition_kobj *partition; + struct file *file; + + file = fget(ksm_fd); + if (!file) + return -EBADF; + + partition = file->private_data; + if (!partition) { + fput(file); + return -EINVAL; + } + + if (start >= end) { + fput(file); + return -EINVAL; + } + + /* Find the mm_struct */ + rcu_read_lock(); + task = find_task_by_vpid(pid); + if (!task) { + fput(file); + rcu_read_unlock(); + return -ESRCH; + } + + get_task_struct(task); + + rcu_read_unlock(); + mm = get_task_mm(task); + put_task_struct(task); + + if (!mm) { + fput(file); + return -EINVAL; + } + + mutex_lock(&ksm_thread_mutex); + wait_while_offlining(); + ksm_sync_merge(mm, start, end, partition); + mutex_unlock(&ksm_thread_mutex); + + mmput(mm); + + fput(file); + return 0; +} + static int __init ksm_init(void) { int err; -- 2.49.0.395.g12beb8f557-goog