Add a ram-based filesystem that contains page table sharing information and files that enables processes to share page tables. This patch adds the basic filesystem that can be mounted. Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx> --- Documentation/filesystems/msharefs.rst | 19 +++++ include/uapi/linux/magic.h | 1 + mm/Makefile | 2 +- mm/mshare.c | 103 +++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 Documentation/filesystems/msharefs.rst create mode 100644 mm/mshare.c diff --git a/Documentation/filesystems/msharefs.rst b/Documentation/filesystems/msharefs.rst new file mode 100644 index 000000000000..fd161f67045d --- /dev/null +++ b/Documentation/filesystems/msharefs.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================================== +msharefs - a filesystem to support shared page tables +===================================================== + +msharefs is a ram-based filesystem that allows multiple processes to +share page table entries for shared pages. + +msharefs is typically mounted like this:: + + mount -t msharefs none /sys/fs/mshare + +When a process calls mshare syscall with a name for the shared address +range, a file with the same name is created under msharefs with that +name. This file can be opened by another process, if permissions +allow, to query the addresses shared under this range. These files are +removed by mshare_unlink syscall and can not be deleted directly. +Hence these files are created as immutable files. diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f724129c0425..2a57a6ec6f3e 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -105,5 +105,6 @@ #define Z3FOLD_MAGIC 0x33 #define PPC_CMM_MAGIC 0xc7571590 #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ +#define MSHARE_MAGIC 0x4d534852 /* "MSHR" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/mm/Makefile b/mm/Makefile index 6f9ffa968a1a..51a2ab9080d9 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -37,7 +37,7 @@ CFLAGS_init-mm.o += $(call cc-disable-warning, override-init) CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides) mmu-y := nommu.o -mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \ +mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o mshare.o \ mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ msync.o page_vma_mapped.o pagewalk.o \ pgtable-generic.o rmap.o vmalloc.o diff --git a/mm/mshare.c b/mm/mshare.c new file mode 100644 index 000000000000..c8fab3869bab --- /dev/null +++ b/mm/mshare.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Enable copperating processes to share page table between + * them to reduce the extra memory consumed by multiple copies + * of page tables. + * + * This code adds an in-memory filesystem - msharefs. + * msharefs is used to manage page table sharing + * + * + * Copyright (C) 2022 Oracle Corp. All rights reserved. + * Author: Khalid Aziz <khalid.aziz@xxxxxxxxxx> + * + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/pseudo_fs.h> +#include <linux/fileattr.h> +#include <uapi/linux/magic.h> +#include <uapi/linux/limits.h> + +static struct super_block *msharefs_sb; + +static const struct file_operations msharefs_file_operations = { + .open = simple_open, + .llseek = no_llseek, +}; + +static int +msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr) +{ + unsigned long hash = init_name_hash(dentry); + const unsigned char *s = qstr->name; + unsigned int len = qstr->len; + + while (len--) + hash = partial_name_hash(*s++, hash); + qstr->hash = end_name_hash(hash); + return 0; +} + +static const struct dentry_operations msharefs_d_ops = { + .d_hash = msharefs_d_hash, +}; + +static int +msharefs_fill_super(struct super_block *sb, struct fs_context *fc) +{ + static const struct tree_descr empty_descr = {""}; + int err; + + sb->s_d_op = &msharefs_d_ops; + err = simple_fill_super(sb, MSHARE_MAGIC, &empty_descr); + if (err) + return err; + + msharefs_sb = sb; + return 0; +} + +static int +msharefs_get_tree(struct fs_context *fc) +{ + return get_tree_single(fc, msharefs_fill_super); +} + +static const struct fs_context_operations msharefs_context_ops = { + .get_tree = msharefs_get_tree, +}; + +static int +mshare_init_fs_context(struct fs_context *fc) +{ + fc->ops = &msharefs_context_ops; + return 0; +} + +static struct file_system_type mshare_fs = { + .name = "msharefs", + .init_fs_context = mshare_init_fs_context, + .kill_sb = kill_litter_super, +}; + +static int +mshare_init(void) +{ + int ret = 0; + + ret = sysfs_create_mount_point(fs_kobj, "mshare"); + if (ret) + return ret; + + ret = register_filesystem(&mshare_fs); + if (ret) + sysfs_remove_mount_point(fs_kobj, "mshare"); + + return ret; +} + +fs_initcall(mshare_init); -- 2.32.0