[RFC PATCH v2 08/12] famfs: module operations & fs_context

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Start building up from the famfs module operations. This commit
includes the following:

* Register as a file system
* Parse mount parameters
* Allocate or find (and initialize) a superblock via famfs_get_tree()
* Lookup the host dax device, and bail if it's in use (or not dax)
* Register as the holder of the dax device if it's available
* Add Kconfig and Makefile misc to build famfs
* Add FAMFS_SUPER_MAGIC to include/uapi/linux/magic.h
* Add export of fs/namei.c:may_open_dev(), which famfs needs to call
* Update MAINTAINERS file for the fs/famfs/ path

The following exports had to happen to enable famfs:

* This uses the new fs/super.c:kill_char_super() - the other kill*super
  helpers were not quite right.
* This uses the dev_dax_iomap export of dax_dev_get()

This commit builds but is otherwise too incomplete to run

Signed-off-by: John Groves <john@xxxxxxxxxx>
---
 MAINTAINERS                |   1 +
 fs/Kconfig                 |   2 +
 fs/Makefile                |   1 +
 fs/famfs/Kconfig           |  10 ++
 fs/famfs/Makefile          |   5 +
 fs/famfs/famfs_inode.c     | 345 +++++++++++++++++++++++++++++++++++++
 fs/famfs/famfs_internal.h  |  36 ++++
 fs/namei.c                 |   1 +
 include/uapi/linux/magic.h |   1 +
 9 files changed, 402 insertions(+)
 create mode 100644 fs/famfs/Kconfig
 create mode 100644 fs/famfs/Makefile
 create mode 100644 fs/famfs/famfs_inode.c
 create mode 100644 fs/famfs/famfs_internal.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3f2d847dcf01..365d678e2f40 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8188,6 +8188,7 @@ L:	linux-cxl@xxxxxxxxxxxxxxx
 L:	linux-fsdevel@xxxxxxxxxxxxxxx
 S:	Supported
 F:	Documentation/filesystems/famfs.rst
+F:	fs/famfs
 
 FANOTIFY
 M:	Jan Kara <jack@xxxxxxx>
diff --git a/fs/Kconfig b/fs/Kconfig
index a46b0cbc4d8f..53b4629e92a0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,8 @@ source "fs/autofs/Kconfig"
 source "fs/fuse/Kconfig"
 source "fs/overlayfs/Kconfig"
 
+source "fs/famfs/Kconfig"
+
 menu "Caches"
 
 source "fs/netfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 6ecc9b0a53f2..3393f399a9e9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -129,3 +129,4 @@ obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
 obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
+obj-$(CONFIG_FAMFS)             += famfs/
diff --git a/fs/famfs/Kconfig b/fs/famfs/Kconfig
new file mode 100644
index 000000000000..edb8980820f7
--- /dev/null
+++ b/fs/famfs/Kconfig
@@ -0,0 +1,10 @@
+
+
+config FAMFS
+       tristate "famfs: shared memory file system"
+       depends on DEV_DAX && FS_DAX && DEV_DAX_IOMAP
+       help
+	  Support for the famfs file system. Famfs is a dax file system that
+	  can support scale-out shared access to fabric-attached memory
+	  (e.g. CXL shared memory). Famfs is not a general purpose file system;
+	  it is an enabler for data sets in shared memory.
diff --git a/fs/famfs/Makefile b/fs/famfs/Makefile
new file mode 100644
index 000000000000..62230bcd6793
--- /dev/null
+++ b/fs/famfs/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_FAMFS) += famfs.o
+
+famfs-y := famfs_inode.o
diff --git a/fs/famfs/famfs_inode.c b/fs/famfs/famfs_inode.c
new file mode 100644
index 000000000000..61306240fc0b
--- /dev/null
+++ b/fs/famfs/famfs_inode.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * famfs - dax file system for shared fabric-attached memory
+ *
+ * Copyright 2023-2024 Micron Technology, inc
+ *
+ * This file system, originally based on ramfs the dax support from xfs,
+ * is intended to allow multiple host systems to mount a common file system
+ * view of dax files that map to shared memory.
+ */
+
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/parser.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/dax.h>
+#include <linux/hugetlb.h>
+#include <linux/iomap.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+
+#include "famfs_internal.h"
+
+#define FAMFS_DEFAULT_MODE	0755
+
+static struct inode *famfs_get_inode(struct super_block *sb,
+				     const struct inode *dir,
+				     umode_t mode, dev_t dev)
+{
+	struct inode *inode = new_inode(sb);
+	struct timespec64 tv;
+
+	if (!inode)
+		return NULL;
+
+	inode->i_ino = get_next_ino();
+	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
+	inode->i_mapping->a_ops = &ram_aops;
+	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+	mapping_set_unevictable(inode->i_mapping);
+	tv = inode_set_ctime_current(inode);
+	inode_set_mtime_to_ts(inode, tv);
+	inode_set_atime_to_ts(inode, tv);
+
+	switch (mode & S_IFMT) {
+	default:
+		init_special_inode(inode, mode, dev);
+		break;
+	case S_IFREG:
+		inode->i_op = NULL /* famfs_file_inode_operations */;
+		inode->i_fop = NULL /* &famfs_file_operations */;
+		break;
+	case S_IFDIR:
+		inode->i_op = NULL /* famfs_dir_inode_operations */;
+		inode->i_fop = &simple_dir_operations;
+
+		/* Directory inodes start off with i_nlink == 2 (for ".") */
+		inc_nlink(inode);
+		break;
+	case S_IFLNK:
+		inode->i_op = &page_symlink_inode_operations;
+		inode_nohighmem(inode);
+		break;
+	}
+	return inode;
+}
+
+/*
+ * famfs dax_operations  (for char dax)
+ */
+static int
+famfs_dax_notify_failure(struct dax_device *dax_dev, u64 offset,
+			u64 len, int mf_flags)
+{
+	struct super_block *sb = dax_holder(dax_dev);
+	struct famfs_fs_info *fsi = sb->s_fs_info;
+
+	pr_err("%s: rootdev=%s offset=%lld len=%llu flags=%x\n", __func__,
+	       fsi->rootdev, offset, len, mf_flags);
+
+	return 0;
+}
+
+static const struct dax_holder_operations famfs_dax_holder_ops = {
+	.notify_failure		= famfs_dax_notify_failure,
+};
+
+/*****************************************************************************
+ * fs_context_operations
+ */
+
+static int
+famfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	int rc = 0;
+
+	sb->s_maxbytes		= MAX_LFS_FILESIZE;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
+	sb->s_magic		= FAMFS_SUPER_MAGIC;
+	sb->s_op		= NULL /* famfs_super_ops */;
+	sb->s_time_gran		= 1;
+
+	return rc;
+}
+
+static int
+lookup_daxdev(const char *pathname, dev_t *devno)
+{
+	struct inode *inode;
+	struct path path;
+	int err;
+
+	if (!pathname || !*pathname)
+		return -EINVAL;
+
+	err = kern_path(pathname, LOOKUP_FOLLOW, &path);
+	if (err)
+		return err;
+
+	inode = d_backing_inode(path.dentry);
+	if (!S_ISCHR(inode->i_mode)) {
+		err = -EINVAL;
+		goto out_path_put;
+	}
+
+	if (!may_open_dev(&path)) { /* had to export this */
+		err = -EACCES;
+		goto out_path_put;
+	}
+
+	 /* if it's dax, i_rdev is struct dax_device */
+	*devno = inode->i_rdev;
+
+out_path_put:
+	path_put(&path);
+	return err;
+}
+
+static int
+famfs_get_tree(struct fs_context *fc)
+{
+	struct famfs_fs_info *fsi = fc->s_fs_info;
+	struct dax_device *dax_devp;
+	struct super_block *sb;
+	struct inode *inode;
+	dev_t daxdevno;
+	int err;
+
+	/* TODO: clean up chatty messages */
+
+	err = lookup_daxdev(fc->source, &daxdevno);
+	if (err)
+		return err;
+
+	fsi->daxdevno = daxdevno;
+
+	/* This will set sb->s_dev=daxdevno */
+	sb = sget_dev(fc, daxdevno);
+	if (IS_ERR(sb)) {
+		pr_err("%s: sget_dev error\n", __func__);
+		return PTR_ERR(sb);
+	}
+
+	if (sb->s_root) {
+		pr_info("%s: found a matching suerblock for %s\n",
+			__func__, fc->source);
+
+		/* We don't expect to find a match by dev_t; if we do, it must
+		 * already be mounted, so we bail
+		 */
+		err = -EBUSY;
+		goto deactivate_out;
+	} else {
+		pr_info("%s: initializing new superblock for %s\n",
+			__func__, fc->source);
+		err = famfs_fill_super(sb, fc);
+		if (err)
+			goto deactivate_out;
+	}
+
+	/* This will fail if it's not a dax device */
+	dax_devp = dax_dev_get(daxdevno);
+	if (!dax_devp) {
+		pr_warn("%s: device %s not found or not dax\n",
+		       __func__, fc->source);
+		err = -ENODEV;
+		goto deactivate_out;
+	}
+
+	err = fs_dax_get(dax_devp, sb, &famfs_dax_holder_ops);
+	if (err) {
+		pr_err("%s: fs_dax_get(%lld) failed\n", __func__, (u64)daxdevno);
+		err = -EBUSY;
+		goto deactivate_out;
+	}
+	fsi->dax_devp = dax_devp;
+
+	inode = famfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root) {
+		pr_err("%s: d_make_root() failed\n", __func__);
+		err = -ENOMEM;
+		fs_put_dax(fsi->dax_devp, sb);
+		goto deactivate_out;
+	}
+
+	sb->s_flags |= SB_ACTIVE;
+
+	WARN_ON(fc->root);
+	fc->root = dget(sb->s_root);
+	return err;
+
+deactivate_out:
+	pr_debug("%s: deactivating sb=%llx\n", __func__, (u64)sb);
+	deactivate_locked_super(sb);
+	return err;
+}
+
+/*****************************************************************************/
+
+enum famfs_param {
+	Opt_mode,
+	Opt_dax,
+};
+
+const struct fs_parameter_spec famfs_fs_parameters[] = {
+	fsparam_u32oct("mode",	  Opt_mode),
+	fsparam_string("dax",     Opt_dax),
+	{}
+};
+
+static int famfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct famfs_fs_info *fsi = fc->s_fs_info;
+	struct fs_parse_result result;
+	int opt;
+
+	opt = fs_parse(fc, famfs_fs_parameters, param, &result);
+	if (opt == -ENOPARAM) {
+		opt = vfs_parse_fs_param_source(fc, param);
+		if (opt != -ENOPARAM)
+			return opt;
+
+		return 0;
+	}
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_mode:
+		fsi->mount_opts.mode = result.uint_32 & S_IALLUGO;
+		break;
+	case Opt_dax:
+		if (strcmp(param->string, "always"))
+			pr_notice("%s: invalid dax mode %s\n",
+				  __func__, param->string);
+		break;
+	}
+
+	return 0;
+}
+
+static void famfs_free_fc(struct fs_context *fc)
+{
+	struct famfs_fs_info *fsi = fc->s_fs_info;
+
+	if (fsi && fsi->rootdev)
+		kfree(fsi->rootdev);
+
+	kfree(fsi);
+}
+
+static const struct fs_context_operations famfs_context_ops = {
+	.free		= famfs_free_fc,
+	.parse_param	= famfs_parse_param,
+	.get_tree	= famfs_get_tree,
+};
+
+static int famfs_init_fs_context(struct fs_context *fc)
+{
+	struct famfs_fs_info *fsi;
+
+	fsi = kzalloc(sizeof(*fsi), GFP_KERNEL);
+	if (!fsi)
+		return -ENOMEM;
+
+	fsi->mount_opts.mode = FAMFS_DEFAULT_MODE;
+	fc->s_fs_info        = fsi;
+	fc->ops              = &famfs_context_ops;
+	return 0;
+}
+
+static void famfs_kill_sb(struct super_block *sb)
+{
+	struct famfs_fs_info *fsi = sb->s_fs_info;
+
+	if (fsi->dax_devp)
+		fs_put_dax(fsi->dax_devp, sb);
+	if (fsi && fsi->rootdev)
+		kfree(fsi->rootdev);
+	kfree(fsi);
+	sb->s_fs_info = NULL;
+
+	kill_char_super(sb); /* new */
+}
+
+#define MODULE_NAME "famfs"
+static struct file_system_type famfs_fs_type = {
+	.name		  = MODULE_NAME,
+	.init_fs_context  = famfs_init_fs_context,
+	.parameters	  = famfs_fs_parameters,
+	.kill_sb	  = famfs_kill_sb,
+	.fs_flags	  = FS_USERNS_MOUNT,
+};
+
+/******************************************************************************
+ * Module stuff
+ */
+static int __init init_famfs_fs(void)
+{
+	int rc;
+
+	rc = register_filesystem(&famfs_fs_type);
+
+	return rc;
+}
+
+static void
+__exit famfs_exit(void)
+{
+	unregister_filesystem(&famfs_fs_type);
+	pr_info("%s: unregistered\n", __func__);
+}
+
+fs_initcall(init_famfs_fs);
+module_exit(famfs_exit);
+
+MODULE_AUTHOR("John Groves, Micron Technology");
+MODULE_LICENSE("GPL");
diff --git a/fs/famfs/famfs_internal.h b/fs/famfs/famfs_internal.h
new file mode 100644
index 000000000000..951b32ec4fbd
--- /dev/null
+++ b/fs/famfs/famfs_internal.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * famfs - dax file system for shared fabric-attached memory
+ *
+ * Copyright 2023-2024 Micron Technology, Inc.
+ *
+ * This file system, originally based on ramfs the dax support from xfs,
+ * is intended to allow multiple host systems to mount a common file system
+ * view of dax files that map to shared memory.
+ */
+#ifndef FAMFS_INTERNAL_H
+#define FAMFS_INTERNAL_H
+
+struct famfs_mount_opts {
+	umode_t mode;
+};
+
+/**
+ * @famfs_fs_info
+ *
+ * @mount_opts: the mount options
+ * @dax_devp:   The underlying character devdax device
+ * @rootdev:    Dax device path used in mount
+ * @daxdevno:   Dax device dev_t
+ * @deverror:   True if the dax device has called our notify_failure entry
+ *              point, or if other "shutdown" conditions exist
+ */
+struct famfs_fs_info {
+	struct famfs_mount_opts  mount_opts;
+	struct dax_device       *dax_devp;
+	char                    *rootdev;
+	dev_t                    daxdevno;
+	bool                     deverror;
+};
+
+#endif /* FAMFS_INTERNAL_H */
diff --git a/fs/namei.c b/fs/namei.c
index c5b2a25be7d0..f24b268473cd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3229,6 +3229,7 @@ bool may_open_dev(const struct path *path)
 	return !(path->mnt->mnt_flags & MNT_NODEV) &&
 		!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
 }
+EXPORT_SYMBOL(may_open_dev);
 
 static int may_open(struct mnt_idmap *idmap, const struct path *path,
 		    int acc_mode, int flag)
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 1b40a968ba91..e9bdd6a415e2 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -37,6 +37,7 @@
 #define HOSTFS_SUPER_MAGIC	0x00c0ffee
 #define OVERLAYFS_SUPER_MAGIC	0x794c7630
 #define FUSE_SUPER_MAGIC	0x65735546
+#define FAMFS_SUPER_MAGIC	0x87b282ff
 
 #define MINIX_SUPER_MAGIC	0x137F		/* minix v1 fs, 14 char names */
 #define MINIX_SUPER_MAGIC2	0x138F		/* minix v1 fs, 30 char names */
-- 
2.43.0





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux