[RFC PATCH 06/17] zuf: mounting

Boaz harrosh <boaz@xxxxxxxxxxxxx> · Tue, 19 Feb 2019 13:51:25 +0200

From: Boaz Harrosh <boazh@xxxxxxxxxx>

In this patch we already establish a mounted filesystem.

These are the steps for mounting a zufs Filesystem:

* All devices (Single or DT) are opened and established in an md
  object. This md-object is given an pmem-id

* mount_bdev is called with the main (first) device, in turn
  fill_supper is called.

* fill_supper despatches a mount_operation(register_fs_info) to the
  server with the pmem-id of the md-object above.

*  The Server at the zus mount routine. Will first thing do
  a GRAB_PMEM(pmem-id) ioctl call to establish a special filehandle
  through which it will have full access to the all of its pmem space.
  With that it will call the zusFS to continue to inspect the content
  of the pmem and mount the FS.

* On return from mount the zusFS returns the root inode info

* fill_supper continues to create a root vfs-inode and returns
  successfully.

* We now have a mounted super_block, with corresponding super_block
  objects in the Server.

Signed-off-by: Boaz Harrosh <boazh@xxxxxxxxxx>
---
 fs/zuf/Makefile  |   2 +-
 fs/zuf/_extern.h |   4 +
 fs/zuf/inode.c   |  22 ++
 fs/zuf/super.c   | 623 ++++++++++++++++++++++++++++++++++++++++++++++-
 fs/zuf/zuf.h     | 122 ++++++++++
 fs/zuf/zus_api.h |  57 +++++
 6 files changed, 828 insertions(+), 2 deletions(-)
 create mode 100644 fs/zuf/inode.c

diff --git a/fs/zuf/Makefile b/fs/zuf/Makefile
index 091bf053a6ed..eaeffc65078f 100644
--- a/fs/zuf/Makefile
+++ b/fs/zuf/Makefile
@@ -17,5 +17,5 @@ zuf-y += md.o t1.o t2.o
 zuf-y += zuf-core.o zuf-root.o
 
 # Main FS
-zuf-y += super.o
+zuf-y += super.o inode.o
 zuf-y += module.o
diff --git a/fs/zuf/_extern.h b/fs/zuf/_extern.h
index 15d632ea5ed2..dc6b41b6410b 100644
--- a/fs/zuf/_extern.h
+++ b/fs/zuf/_extern.h
@@ -20,6 +20,10 @@
  * extern functions declarations
  */
 
+/* inode.c */
+struct inode *zuf_iget(struct super_block *sb, struct zus_inode_info *zus_ii,
+		       zu_dpp_t _zi, bool *exist);
+
 /* super.c */
 int zuf_init_inodecache(void);
 void zuf_destroy_inodecache(void);
diff --git a/fs/zuf/inode.c b/fs/zuf/inode.c
new file mode 100644
index 000000000000..315a273e6f6d
--- /dev/null
+++ b/fs/zuf/inode.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Inode methods (allocate/free/read/write).
+ *
+ * Copyright (c) 2018 NetApp Inc. All rights reserved.
+ *
+ * ZUFS-License: GPL-2.0. See module.c for LICENSE details.
+ *
+ * Authors:
+ *	Boaz Harrosh <boazh@xxxxxxxxxx>
+ *	Sagi Manole <sagim@xxxxxxxxxx>"
+ */
+
+#include "zuf.h"
+
+struct inode *zuf_iget(struct super_block *sb, struct zus_inode_info *zus_ii,
+		       zu_dpp_t _zi, bool *exist)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
diff --git a/fs/zuf/super.c b/fs/zuf/super.c
index f7f7798425a9..7f819be7056e 100644
--- a/fs/zuf/super.c
+++ b/fs/zuf/super.c
@@ -18,8 +18,575 @@
 
 #include "zuf.h"
 
+static struct super_operations zuf_sops;
 static struct kmem_cache *zuf_inode_cachep;
 
+enum {
+	Opt_uid,
+	Opt_gid,
+	Opt_pedantic,
+	Opt_ephemeral,
+	Opt_dax,
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{ Opt_pedantic,		"pedantic"		},
+	{ Opt_pedantic,		"pedantic=%d"		},
+	{ Opt_ephemeral,	"ephemeral"		},
+	{ Opt_dax,		"dax"			},
+	{ Opt_err,		NULL			},
+};
+
+static int _parse_options(struct zuf_sb_info *sbi, const char *data,
+			  bool remount, struct zufs_parse_options *po)
+{
+	char *orig_options, *options;
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int err = 0;
+	bool ephemeral = false;
+	size_t mount_options_len = 0;
+
+	/* no options given */
+	if (!data)
+		return 0;
+
+	options = orig_options = kstrdup(data, GFP_KERNEL);
+	if (!options)
+		return -ENOMEM;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		/* Initialize args struct so we know whether arg was found */
+		args[0].to = args[0].from = NULL;
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_pedantic:
+			if (!args[0].from) {
+				po->mount_flags |= ZUFS_M_PEDANTIC;
+				set_opt(sbi, PEDANTIC);
+				continue;
+			}
+			if (match_int(&args[0], &po->pedantic))
+				goto bad_opt;
+			break;
+		case Opt_ephemeral:
+			po->mount_flags |= ZUFS_M_EPHEMERAL;
+			set_opt(sbi, EPHEMERAL);
+			ephemeral = true;
+			break;
+		case Opt_dax:
+			set_opt(sbi, DAX);
+			break;
+		default: {
+			if (mount_options_len != 0) {
+				po->mount_options[mount_options_len] = ',';
+				mount_options_len++;
+			}
+			strcat(po->mount_options, p);
+			mount_options_len += strlen(p);
+		}
+		}
+	}
+
+	if (remount && test_opt(sbi, EPHEMERAL) && (ephemeral == false))
+		clear_opt(sbi, EPHEMERAL);
+out:
+	kfree(orig_options);
+	return err;
+
+bad_opt:
+	zuf_warn_cnd(test_opt(sbi, SILENT), "Bad mount option: \"%s\"\n", p);
+	err = -EINVAL;
+	goto out;
+}
+
+static int _print_tier_info(struct multi_devices *md, char **buff, int start,
+			    int count, int *_space, char *str)
+{
+	int space = *_space;
+	char *b = *buff;
+	int printed;
+	int i;
+
+	printed = snprintf(b, space, str);
+	if (unlikely(printed > space))
+		return -ENOSPC;
+
+	b += printed;
+	space -= printed;
+
+	for (i = start; i < start + count; ++i) {
+		printed = snprintf(b, space, "%s%s", i == start ? "" : ",",
+				   _bdev_name(md_dev_info(md, i)->bdev));
+
+		if (unlikely(printed > space))
+			return -ENOSPC;
+
+		b += printed;
+		space -= printed;
+	}
+	*_space = space;
+	*buff = b;
+
+	return 0;
+}
+
+static void _print_mount_info(struct zuf_sb_info *sbi, char *mount_options)
+{
+	struct multi_devices *md = sbi->md;
+	char buff[992];
+	int space = sizeof(buff);
+	char *b = buff;
+	int err;
+
+	err = _print_tier_info(md, &b, 0, md->t1_count, &space, "t1=");
+	if (unlikely(err))
+		goto no_space;
+
+	if (md->t2_count == 0)
+		goto print_options;
+
+	err = _print_tier_info(md, &b, md->t1_count, md->t2_count, &space,
+			       " t2=");
+	if (unlikely(err))
+		goto no_space;
+
+print_options:
+	if (mount_options) {
+		int printed = snprintf(b, space, " -o %s", mount_options);
+
+		if (unlikely(printed > space))
+			goto no_space;
+	}
+
+print:
+	zuf_info("mounted %s (0x%lx/0x%lx)\n", buff,
+		 md_t1_blocks(sbi->md), md_t2_blocks(sbi->md));
+	return;
+
+no_space:
+	snprintf(buff + sizeof(buff) - 4, 4, "...");
+	goto print;
+}
+
+static void _sb_mwtime_now(struct super_block *sb, struct md_dev_table *zdt)
+{
+	struct timespec64 now = current_time(sb->s_root->d_inode);
+
+	timespec_to_mt(&zdt->s_mtime, &now);
+	zdt->s_wtime = zdt->s_mtime;
+	/* TOZO _persist_md(sb, &zdt->s_mtime, 2*sizeof(zdt->s_mtime)); */
+}
+
+static int _setup_bdi(struct super_block *sb, const char *device_name)
+{
+	int err;
+
+	if (sb->s_bdi && (sb->s_bdi != &noop_backing_dev_info)) {
+		/*
+		 * sb->s_bdi points to blkdev's bdi however we want to redirect
+		 * it to our private bdi...
+		 */
+		bdi_put(sb->s_bdi);
+	}
+	sb->s_bdi = &noop_backing_dev_info;
+
+	err = super_setup_bdi_name(sb, "zuf-%s", device_name);
+	if (unlikely(err)) {
+		zuf_err("Failed to super_setup_bdi\n");
+		return err;
+	}
+
+	sb->s_bdi->ra_pages = ZUFS_READAHEAD_PAGES;
+	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK;
+	return 0;
+}
+
+static int _sb_add(struct zuf_root_info *zri, struct super_block *sb,
+		   __u64 *sb_id)
+{
+	uint i;
+	int err;
+
+	mutex_lock(&zri->sbl_lock);
+
+	if (zri->sbl.num == zri->sbl.max) {
+		struct super_block **new_array;
+
+		new_array = krealloc(zri->sbl.array,
+				  (zri->sbl.max + SBL_INC) * sizeof(*new_array),
+				  GFP_KERNEL | __GFP_ZERO);
+		if (unlikely(!new_array)) {
+			err = -ENOMEM;
+			goto out;
+		}
+		zri->sbl.max += SBL_INC;
+		zri->sbl.array = new_array;
+	}
+
+	for (i = 0; i < zri->sbl.max; ++i)
+		if (!zri->sbl.array[i])
+			break;
+
+	if (unlikely(i == zri->sbl.max)) {
+		zuf_err("!!!!! can't be! i=%d g_sbl.num=%d g_sbl.max=%d\n",
+			i, zri->sbl.num, zri->sbl.max);
+		err = -EFAULT;
+		goto out;
+	}
+
+	++zri->sbl.num;
+	zri->sbl.array[i] = sb;
+	*sb_id = i + 1;
+	err = 0;
+
+	zuf_dbg_vfs("sb_id=%lld\n", *sb_id);
+out:
+	mutex_unlock(&zri->sbl_lock);
+	return err;
+}
+
+static void _sb_remove(struct zuf_root_info *zri, struct super_block *sb)
+{
+	uint i;
+
+	mutex_lock(&zri->sbl_lock);
+
+	for (i = 0; i < zri->sbl.max; ++i)
+		if (zri->sbl.array[i] == sb)
+			break;
+	if (unlikely(i == zri->sbl.max)) {
+		zuf_err("!!!!! can't be! i=%d g_sbl.num=%d g_sbl.max=%d\n",
+			i, zri->sbl.num, zri->sbl.max);
+		goto out;
+	}
+
+	zri->sbl.array[i] = NULL;
+	--zri->sbl.num;
+out:
+	mutex_unlock(&zri->sbl_lock);
+}
+
+struct super_block *zuf_sb_from_id(struct zuf_root_info *zri, __u64 sb_id,
+				   struct zus_sb_info *zus_sbi)
+{
+	struct super_block *sb;
+
+	--sb_id;
+
+	if (zri->sbl.max <= sb_id) {
+		zuf_err("Invalid SB_ID 0x%llx\n", sb_id);
+		return NULL;
+	}
+
+	sb = zri->sbl.array[sb_id];
+	if (!sb) {
+		zuf_err("Stale SB_ID 0x%llx\n", sb_id);
+		return NULL;
+	}
+
+	return sb;
+}
+
+static void zuf_put_super(struct super_block *sb)
+{
+	struct zuf_sb_info *sbi = SBI(sb);
+
+	if (sbi->zus_sbi) {
+		struct zufs_ioc_mount zim = {
+			.zmi.zus_sbi = sbi->zus_sbi,
+		};
+
+		zufc_dispatch_mount(ZUF_ROOT(sbi), NULL, ZUFS_M_UMOUNT, &zim);
+		sbi->zus_sbi = NULL;
+	}
+
+	/* NOTE!!! this is a HACK! we should not touch the s_umount
+	 * lock but to make lockdep happy we do that since our devices
+	 * are held exclusivly. Need to revisit every kernel version
+	 * change.
+	 */
+	if (sbi->md) {
+		up_write(&sb->s_umount);
+		zuf_rm_pmem(sbi->md);
+		md_fini(sbi->md, sb->s_bdev);
+		down_write(&sb->s_umount);
+	}
+
+	_sb_remove(ZUF_ROOT(sbi), sb);
+	sb->s_fs_info = NULL;
+	if (!test_opt(sbi, FAILED))
+		zuf_info("unmounted /dev/%s\n", _bdev_name(sb->s_bdev));
+	kfree(sbi);
+}
+
+struct __fill_super_params {
+	struct multi_devices *md;
+	char *mount_options;
+};
+
+static int zuf_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct zuf_sb_info *sbi = NULL;
+	struct __fill_super_params *fsp = data;
+	struct zufs_ioc_mount zim = {};
+	struct zufs_ioc_mount *ioc_mount;
+	enum big_alloc_type bat;
+	struct register_fs_info *rfi;
+	struct inode *root_i;
+	size_t zim_size, mount_options_len;
+	bool exist;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct md_dev_table) > MDT_SIZE);
+	BUILD_BUG_ON(sizeof(struct zus_inode) != ZUFS_INODE_SIZE);
+
+	mount_options_len = (fsp->mount_options ?
+					strlen(fsp->mount_options) : 0) + 1;
+	zim_size = sizeof(zim) + mount_options_len;
+	ioc_mount = big_alloc(zim_size, sizeof(zim), &zim,
+			      GFP_KERNEL | __GFP_ZERO, &bat);
+	if (unlikely(!ioc_mount))
+		return -ENOMEM;
+
+	ioc_mount->zmi.po.mount_options_len = mount_options_len;
+
+	err = _sb_add(zuf_fst(sb)->zri, sb, &ioc_mount->zmi.sb_id);
+	if (unlikely(err))
+		goto error;
+
+	sbi = kzalloc(sizeof(struct zuf_sb_info), GFP_KERNEL);
+	if (!sbi) {
+		zuf_err_cnd(silent, "Not enough memory to allocate sbi\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	sb->s_fs_info = sbi;
+	sbi->sb = sb;
+
+	/* Initialize embedded objects */
+	spin_lock_init(&sbi->s_mmap_dirty_lock);
+	INIT_LIST_HEAD(&sbi->s_mmap_dirty);
+	if (silent) {
+		ioc_mount->zmi.po.mount_flags |= ZUFS_M_SILENT;
+		set_opt(sbi, SILENT);
+	}
+
+	sbi->md = fsp->md;
+	err = md_set_sb(sbi->md, sb->s_bdev, sb, silent);
+	if (unlikely(err))
+		goto error;
+	zuf_add_pmem(zuf_fst(sb)->zri, sbi->md);
+
+	err = _parse_options(sbi, fsp->mount_options, 0, &ioc_mount->zmi.po);
+	if (err)
+		goto error;
+
+	err = _setup_bdi(sb, _bdev_name(sb->s_bdev));
+	if (err) {
+		zuf_err_cnd(silent, "Failed to setup bdi => %d\n", err);
+		goto error;
+	}
+
+	/* Tell ZUS to mount an FS for us */
+	ioc_mount->zmi.pmem_kern_id = zuf_pmem_id(sbi->md);
+	err = zufc_dispatch_mount(ZUF_ROOT(sbi), zuf_fst(sb)->zus_zfi,
+				  ZUFS_M_MOUNT, ioc_mount);
+	if (unlikely(err))
+		goto error;
+	sbi->zus_sbi = ioc_mount->zmi.zus_sbi;
+
+	/* Init with default values */
+	sb->s_blocksize_bits = ioc_mount->zmi.s_blocksize_bits;
+	sb->s_blocksize = 1 << ioc_mount->zmi.s_blocksize_bits;
+
+	rfi = &zuf_fst(sb)->rfi;
+
+	sb->s_magic = rfi->FS_magic;
+	sb->s_time_gran = rfi->s_time_gran;
+	sb->s_maxbytes = rfi->s_maxbytes;
+	sb->s_flags |= MS_NOSEC | (ioc_mount->zmi.acl_on ? SB_POSIXACL : 0);
+
+	sb->s_op = &zuf_sops;
+
+	root_i = zuf_iget(sb, ioc_mount->zmi.zus_ii, ioc_mount->zmi._zi,
+			  &exist);
+	if (IS_ERR(root_i)) {
+		err = PTR_ERR(root_i);
+		goto error;
+	}
+	WARN_ON(exist);
+
+	sb->s_root = d_make_root(root_i);
+	if (!sb->s_root) {
+		zuf_err_cnd(silent, "get tozu root inode failed\n");
+		iput(root_i); /* undo zuf_iget */
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (!zuf_rdonly(sb))
+		_sb_mwtime_now(sb, md_zdt(sbi->md));
+
+	mt_to_timespec(&root_i->i_ctime, &zus_zi(root_i)->i_ctime);
+	mt_to_timespec(&root_i->i_mtime, &zus_zi(root_i)->i_mtime);
+
+	_print_mount_info(sbi, fsp->mount_options);
+	clear_opt(sbi, SILENT);
+	big_free(ioc_mount, bat);
+	return 0;
+
+error:
+	zuf_warn("NOT mounting => %d\n", err);
+	if (sbi) {
+		set_opt(sbi, FAILED);
+		zuf_put_super(sb);
+	}
+	big_free(ioc_mount, bat);
+	return err;
+}
+
+static void _zst_to_kst(const struct statfs64 *zst, struct kstatfs *kst)
+{
+	kst->f_type	= zst->f_type;
+	kst->f_bsize	= zst->f_bsize;
+	kst->f_blocks	= zst->f_blocks;
+	kst->f_bfree	= zst->f_bfree;
+	kst->f_bavail	= zst->f_bavail;
+	kst->f_files	= zst->f_files;
+	kst->f_ffree	= zst->f_ffree;
+	kst->f_fsid	= zst->f_fsid;
+	kst->f_namelen	= zst->f_namelen;
+	kst->f_frsize	= zst->f_frsize;
+	kst->f_flags	= zst->f_flags;
+}
+
+static int zuf_statfs(struct dentry *d, struct kstatfs *buf)
+{
+	struct zuf_sb_info *sbi = SBI(d->d_sb);
+	struct zufs_ioc_statfs ioc_statfs = {
+		.hdr.in_len = offsetof(struct zufs_ioc_statfs, statfs_out),
+		.hdr.out_len = sizeof(ioc_statfs),
+		.hdr.operation = ZUFS_OP_STATFS,
+		.zus_sbi = sbi->zus_sbi,
+	};
+	int err;
+
+	err = zufc_dispatch(ZUF_ROOT(sbi), &ioc_statfs.hdr, NULL, 0);
+	if (unlikely(err && err != -EINTR)) {
+		zuf_err("zufc_dispatch failed op=ZUFS_OP_STATFS => %d\n", err);
+		return err;
+	}
+
+	_zst_to_kst(&ioc_statfs.statfs_out, buf);
+	return 0;
+}
+
+static int zuf_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct zuf_sb_info *sbi = SBI(root->d_sb);
+
+	if (test_opt(sbi, EPHEMERAL))
+		seq_puts(seq, ",ephemeral");
+	if (test_opt(sbi, DAX))
+		seq_puts(seq, ",dax");
+
+	return 0;
+}
+
+static int zuf_show_devname(struct seq_file *seq, struct dentry *root)
+{
+	seq_printf(seq, "/dev/%s", _bdev_name(root->d_sb->s_bdev));
+
+	return 0;
+}
+
+static int zuf_remount(struct super_block *sb, int *mntflags, char *data)
+{
+	struct zuf_sb_info *sbi = SBI(sb);
+	struct zufs_ioc_mount zim = {};
+	struct zufs_ioc_mount *ioc_mount;
+	size_t remount_options_len, zim_size;
+	enum big_alloc_type bat;
+	int err;
+
+	zuf_info("remount... -o %s\n", data);
+
+	remount_options_len = data ? (strlen(data) + 1) : 0;
+	zim_size = sizeof(zim) + remount_options_len;
+	ioc_mount = big_alloc(zim_size, sizeof(zim), &zim,
+			      GFP_KERNEL | __GFP_ZERO, &bat);
+	if (unlikely(!ioc_mount))
+		return -ENOMEM;
+
+	ioc_mount->zmi.zus_sbi = sbi->zus_sbi,
+	ioc_mount->zmi.remount_flags = zuf_rdonly(sb) ? ZUFS_REM_WAS_RO : 0;
+	ioc_mount->zmi.po.mount_options_len = remount_options_len;
+
+	/* Store the old options */
+	ioc_mount->zmi.old_mount_opt = sbi->s_mount_opt;
+
+	err = _parse_options(sbi, data, 1, &ioc_mount->zmi.po);
+	if (unlikely(err))
+		goto fail;
+
+	if (*mntflags & MS_RDONLY) {
+		ioc_mount->zmi.remount_flags |= ZUFS_REM_WILL_RO;
+
+		if (!zuf_rdonly(sb))
+			_sb_mwtime_now(sb, md_zdt(sbi->md));
+	} else if (zuf_rdonly(sb)) {
+		_sb_mwtime_now(sb, md_zdt(sbi->md));
+	}
+
+	err = zufc_dispatch_mount(ZUF_ROOT(sbi), zuf_fst(sb)->zus_zfi,
+				  ZUFS_M_REMOUNT, ioc_mount);
+	if (unlikely(err))
+		goto fail;
+
+	big_free(ioc_mount, bat);
+	return 0;
+
+fail:
+	sbi->s_mount_opt = ioc_mount->zmi.old_mount_opt;
+	big_free(ioc_mount, bat);
+	zuf_dbg_err("remount failed restore option\n");
+	return err;
+}
+
+static int zuf_update_s_wtime(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY)) {
+		struct timespec64 now = current_time(sb->s_root->d_inode);
+
+		timespec_to_mt(&md_zdt(SBI(sb)->md)->s_wtime, &now);
+	}
+	return 0;
+}
+
+static struct inode *zuf_alloc_inode(struct super_block *sb)
+{
+	struct zuf_inode_info *zii;
+
+	zii = kmem_cache_alloc(zuf_inode_cachep, GFP_NOFS);
+	if (!zii)
+		return NULL;
+
+	zii->vfs_inode.i_version.counter = 1;
+	return &zii->vfs_inode;
+}
+
+static void zuf_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(zuf_inode_cachep, ZUII(inode));
+}
+
 static void _init_once(void *foo)
 {
 	struct zuf_inode_info *zii = foo;
@@ -46,8 +613,62 @@ void zuf_destroy_inodecache(void)
 	kmem_cache_destroy(zuf_inode_cachep);
 }
 
+static struct super_operations zuf_sops = {
+	.alloc_inode	= zuf_alloc_inode,
+	.destroy_inode	= zuf_destroy_inode,
+	.put_super	= zuf_put_super,
+	.freeze_fs	= zuf_update_s_wtime,
+	.unfreeze_fs	= zuf_update_s_wtime,
+	.statfs		= zuf_statfs,
+	.remount_fs	= zuf_remount,
+	.show_options	= zuf_show_options,
+	.show_devname	= zuf_show_devname,
+};
+
 struct dentry *zuf_mount(struct file_system_type *fs_type, int flags,
 			 const char *dev_name, void *data)
 {
-	return ERR_PTR(-ENOTSUPP);
+	int silent = flags & MS_SILENT ? 1 : 0;
+	struct __fill_super_params fsp = {
+		.mount_options = data,
+	};
+	struct zuf_fs_type *fst = ZUF_FST(fs_type);
+	struct register_fs_info *rfi = &fst->rfi;
+	struct mdt_check mc = {
+		.alloc_mask	= ZUFS_ALLOC_MASK,
+		.major_ver	= rfi->FS_ver_major,
+		.minor_ver	= rfi->FS_ver_minor,
+		.magic		= rfi->FS_magic,
+
+		.holder = fs_type,
+		.silent = silent,
+	};
+	struct dentry *ret = NULL;
+	const char *dev_path = NULL;
+	int err;
+
+	zuf_dbg_vfs("dev_name=%s, data=%s\n", dev_name, (const char *)data);
+
+	fsp.md = md_alloc(sizeof(struct zuf_pmem));
+	if (IS_ERR(fsp.md)) {
+		err = PTR_ERR(fsp.md);
+		fsp.md = NULL;
+		goto out;
+	}
+
+	err = md_init(fsp.md, dev_name, &mc, &dev_path);
+	if (unlikely(err)) {
+		zuf_err_cnd(silent, "md_init failed! => %d\n", err);
+		goto out;
+	}
+
+	zuf_dbg_vfs("mounting with dev_path=%s\n", dev_path);
+	ret = mount_bdev(fs_type, flags, dev_path, &fsp, zuf_fill_super);
+
+out:
+	if (unlikely(err) && fsp.md)
+		md_fini(fsp.md, NULL);
+
+	kfree(dev_path);
+	return err ? ERR_PTR(err) : ret;
 }
diff --git a/fs/zuf/zuf.h b/fs/zuf/zuf.h
index 0689f2031ec7..e23907f5e94e 100644
--- a/fs/zuf/zuf.h
+++ b/fs/zuf/zuf.h
@@ -133,11 +133,32 @@ static inline uint zuf_pmem_id(struct multi_devices *md)
 
 // void zuf_del_fs_type(struct zuf_root_info *zri, struct zuf_fs_type *zft);
 
+/*
+ * Private Super-block flags
+ */
+enum {
+	ZUF_MOUNT_PEDANTIC	= 0x000001,	/* Check for memory leaks */
+	ZUF_MOUNT_PEDANTIC_SHADOW = 0x00002,	/* */
+	ZUF_MOUNT_SILENT	= 0x000004,	/* verbosity is silent */
+	ZUF_MOUNT_EPHEMERAL	= 0x000008,	/* Don't persist the data */
+	ZUF_MOUNT_FAILED	= 0x000010,	/* mark a failed-mount */
+	ZUF_MOUNT_DAX		= 0x000020,	/* mounted with dax option */
+	ZUF_MOUNT_POSIXACL	= 0x000040,	/* mounted with posix acls */
+};
+
+#define clear_opt(sbi, opt)       (sbi->s_mount_opt &= ~ZUF_MOUNT_ ## opt)
+#define set_opt(sbi, opt)         (sbi->s_mount_opt |= ZUF_MOUNT_ ## opt)
+#define test_opt(sbi, opt)      (sbi->s_mount_opt & ZUF_MOUNT_ ## opt)
+
 /*
  * ZUF per-inode data in memory
  */
 struct zuf_inode_info {
 	struct inode		vfs_inode;
+
+	/* cookies from Server */
+	struct zus_inode	*zi;
+	struct zus_inode_info	*zus_ii;
 };
 
 static inline struct zuf_inode_info *ZUII(struct inode *inode)
@@ -145,6 +166,28 @@ static inline struct zuf_inode_info *ZUII(struct inode *inode)
 	return container_of(inode, struct zuf_inode_info, vfs_inode);
 }
 
+/*
+ * ZUF super-block data in memory
+ */
+struct zuf_sb_info {
+	struct super_block *sb;
+	struct multi_devices *md;
+
+	/* zus cookie*/
+	struct zus_sb_info *zus_sbi;
+
+	/* Mount options */
+	unsigned long	s_mount_opt;
+
+	spinlock_t		s_mmap_dirty_lock;
+	struct list_head	s_mmap_dirty;
+};
+
+static inline struct zuf_sb_info *SBI(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
 static inline struct zuf_fs_type *ZUF_FST(struct file_system_type *fs_type)
 {
 	return container_of(fs_type, struct zuf_fs_type, vfs_fst);
@@ -155,6 +198,85 @@ static inline struct zuf_fs_type *zuf_fst(struct super_block *sb)
 	return ZUF_FST(sb->s_type);
 }
 
+static inline struct zuf_root_info *ZUF_ROOT(struct zuf_sb_info *sbi)
+{
+	return zuf_fst(sbi->sb)->zri;
+}
+
+static inline bool zuf_rdonly(struct super_block *sb)
+{
+	return sb->s_flags & MS_RDONLY;
+}
+
+static inline struct zus_inode *zus_zi(struct inode *inode)
+{
+	return ZUII(inode)->zi;
+}
+
+static inline void mt_to_timespec(struct timespec64 *t, __le64 *mt)
+{
+	u32 nsec;
+
+	t->tv_sec = div_s64_rem(le64_to_cpu(*mt), NSEC_PER_SEC, &nsec);
+	t->tv_nsec = nsec;
+}
+
+static inline void timespec_to_mt(__le64 *mt, struct timespec64 *t)
+{
+	*mt = cpu_to_le64(t->tv_sec * NSEC_PER_SEC + t->tv_nsec);
+}
+
+/* CAREFUL: Needs an sfence eventually, after this call */
+static inline
+void zus_inode_cmtime_now(struct inode *inode, struct zus_inode *zi)
+{
+	inode->i_mtime = inode->i_ctime = current_time(inode);
+	timespec_to_mt(&zi->i_ctime, &inode->i_ctime);
+	zi->i_mtime = zi->i_ctime;
+}
+
+static inline
+void zus_inode_ctime_now(struct inode *inode, struct zus_inode *zi)
+{
+	inode->i_ctime = current_time(inode);
+	timespec_to_mt(&zi->i_ctime, &inode->i_ctime);
+}
+
+enum big_alloc_type { ba_stack, ba_kmalloc, ba_vmalloc };
+
+static inline
+void *big_alloc(uint bytes, uint local_size, void *local, gfp_t gfp,
+		enum big_alloc_type *bat)
+{
+	void *ptr;
+
+	if (bytes <= local_size) {
+		*bat = ba_stack;
+		ptr = local;
+	} else if (bytes <= PAGE_SIZE) {
+		*bat = ba_kmalloc;
+		ptr = kmalloc(bytes, gfp);
+	} else {
+		*bat = ba_vmalloc;
+		ptr = vmalloc(bytes);
+	}
+
+	return ptr;
+}
+
+static inline void big_free(void *ptr, enum big_alloc_type bat)
+{
+	switch (bat) {
+	case ba_stack:
+		break;
+	case ba_kmalloc:
+		kfree(ptr);
+		break;
+	case ba_vmalloc:
+		vfree(ptr);
+	}
+}
+
 struct zuf_dispatch_op;
 typedef int (*overflow_handler)(struct zuf_dispatch_op *zdo, void *parg,
 				ulong zt_max_bytes);
diff --git a/fs/zuf/zus_api.h b/fs/zuf/zus_api.h
index e0c439b6c8e9..ca8e10a1f5a8 100644
--- a/fs/zuf/zus_api.h
+++ b/fs/zuf/zus_api.h
@@ -77,6 +77,8 @@
 #define EZUF_RETRY_DONE 540
 
 
+#define ZUFS_READAHEAD_PAGES	8
+
 /* All device sizes offsets must align on 2M */
 #define ZUFS_ALLOC_MASK		(1024 * 1024 * 2 - 1)
 
@@ -107,6 +109,45 @@ static inline zu_dpp_t enc_zu_dpp_t(ulong v, uint pool)
 	return v | pool;
 }
 
+/*
+ * Structure of a ZUS inode.
+ * This is all the inode fields
+ */
+
+/* zus_inode size */
+#define ZUFS_INODE_SIZE 128    /* must be power of two */
+#define ZUFS_INODE_BITS   7
+
+struct zus_inode {
+	__le16	i_flags;	/* Inode flags */
+	__le16	i_mode;		/* File mode */
+	__le32	i_nlink;	/* Links count */
+	__le64	i_size;		/* Size of data in bytes */
+/* 16*/	struct __zi_on_disk_desc {
+		__le64	a[2];
+	}	i_on_disk;	/* FS-specific on disc placement */
+/* 32*/	__le64	i_blocks;
+	__le64	i_mtime;	/* Inode/data Modification time */
+	__le64	i_ctime;	/* Inode/data Changed time */
+	__le64	i_atime;	/* Data Access time */
+/* 64 - cache-line boundary */
+	__le64	i_ino;		/* Inode number */
+	__le32	i_uid;		/* Owner Uid */
+	__le32	i_gid;		/* Group Id */
+	__le64	i_xattr;	/* FS-specific Extended attribute block */
+	__le64	i_generation;	/* File version (for NFS) */
+/* 96*/	union {
+		__le32	i_rdev;		/* special-inode major/minor etc ...*/
+		u8	i_symlink[32];	/* if i_size < sizeof(i_symlink) */
+		__le64	i_sym_dpp;	/* Link location if long symlink */
+		struct  _zu_dir {
+			__le64	dir_root;
+			__le64  parent;
+		}	i_dir;
+	};
+	/* Total ZUFS_INODE_SIZE bytes always */
+};
+
 /* ~~~~~ ZUFS API ioctl commands ~~~~~ */
 enum {
 	ZUS_API_MAP_MAX_PAGES	= 1024,
@@ -176,6 +217,11 @@ enum e_mount_operation {
 	ZUFS_M_DDBG_WR,
 };
 
+enum {
+	ZUFS_REM_WAS_RO	= 0x00000001,
+	ZUFS_REM_WILL_RO	= 0x00000002,
+};
+
 struct zufs_mount_info {
 	/* IN */
 	struct zus_fs_info *zus_zfi;
@@ -269,11 +315,22 @@ struct zufs_ioc_wait_operation {
  */
 enum e_zufs_operation {
 	ZUFS_OP_NULL = 0,
+	ZUFS_OP_STATFS,
 
 	ZUFS_OP_BREAK,		/* Kernel telling Server to exit */
 	ZUFS_OP_MAX_OPT,
 };
 
+/* ZUFS_OP_STATFS */
+struct zufs_ioc_statfs {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_sb_info *zus_sbi;
+
+	/* OUT */
+	struct statfs64 statfs_out;
+};
+
 /* Allocate a special_file that will be a dual-port communication buffer with
  * user mode.
  * Server will access the buffer via the mmap of this file.
-- 
2.20.1