[6/9] pohmelfs: distributed locking and cache coherency protocol.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



POHMELFS utilizes writeback cache, which is built on top of MO(E)SI-like
coherency protocol. This patch includes its implementation and cache
object processing helpers (like allocation and completion callbacks).

POHMELFS uses scalable cached read/write locking. No additional requests
are performed if lock is granted to the filesystem. The same protocol
is used by the server to on-demand flushing of the client's cache (for
example when server wants to update local data).

Signed-off-by: Evgeniy Polyakov <zbr@xxxxxxxxxxx>

diff --git a/fs/pohmelfs/lock.c b/fs/pohmelfs/lock.c
new file mode 100644
index 0000000..47e7562
--- /dev/null
+++ b/fs/pohmelfs/lock.c
@@ -0,0 +1,186 @@
+/*
+ * 2007+ Copyright (c) Evgeniy Polyakov <zbr@xxxxxxxxxxx>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/backing-dev.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+
+#include "netfs.h"
+
+static int pohmelfs_send_lock_trans(struct pohmelfs_inode *pi,
+		u64 id, u64 start, u32 size, int type)
+{
+	struct inode *inode = &pi->vfs_inode;
+	struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb);
+	struct netfs_trans *t;
+	struct netfs_cmd *cmd;
+	int path_len, err;
+	void *data;
+	struct netfs_lock *l;
+	int isize = (type & POHMELFS_LOCK_GRAB) ? 0 : sizeof(struct netfs_inode_info);
+
+	mutex_lock(&psb->path_lock);
+	err = pohmelfs_path_length(pi);
+	mutex_unlock(&psb->path_lock);
+
+	if (err < 0)
+		goto err_out_exit;
+
+	path_len = err;
+
+	err = -ENOMEM;
+	t = netfs_trans_alloc(psb, path_len + sizeof(struct netfs_lock) + isize, 0, 0);
+	if (!t)
+		goto err_out_exit;
+
+	cmd = netfs_trans_current(t);
+	data = cmd + 1;
+	l = data + path_len;
+
+	cmd->cmd = NETFS_LOCK;
+	cmd->start = 0;
+	cmd->id = id;
+	cmd->size = sizeof(struct netfs_lock) + path_len + isize;
+	cmd->ext = path_len;
+	cmd->csize = 0;
+
+	netfs_convert_cmd(cmd);
+
+	mutex_lock(&psb->path_lock);
+	err = pohmelfs_construct_path_string(pi, cmd+1, path_len);
+	mutex_unlock(&psb->path_lock);
+	if (err < 0)
+		goto err_out_free;
+
+	l->start = start;
+	l->size = size;
+	l->type = type;
+	l->ino = pi->ino;
+
+	netfs_convert_lock(l);
+
+	if (isize) {
+		struct netfs_inode_info *info = (struct netfs_inode_info *)(l + 1);
+
+		info->mode = inode->i_mode;
+		info->nlink = inode->i_nlink;
+		info->uid = inode->i_uid;
+		info->gid = inode->i_gid;
+		info->blocks = inode->i_blocks;
+		info->rdev = inode->i_rdev;
+		info->size = inode->i_size;
+		info->version = inode->i_version;
+
+		netfs_convert_inode_info(info);
+	}
+
+	netfs_trans_update(cmd, t, path_len + sizeof(struct netfs_lock) + isize);
+
+	return netfs_trans_finish(t, psb);
+
+err_out_free:
+	netfs_trans_free(t);
+err_out_exit:
+	return err;
+}
+
+int pohmelfs_data_lock(struct pohmelfs_inode *pi, u64 start, u32 size, int type)
+{
+	struct pohmelfs_sb *psb = POHMELFS_SB(pi->vfs_inode.i_sb);
+	struct pohmelfs_mcache *m;
+	int err = -ENOMEM;
+	struct iattr iattr;
+	struct inode *inode = &pi->vfs_inode;
+
+	dprintk("%s: %p: ino: %llu, start: %llu, size: %u, "
+			"type: %d, locked as: %d, owned: %d.\n",
+			__func__, &pi->vfs_inode, pi->ino,
+			start, size, type, pi->lock_type,
+			!!test_bit(NETFS_INODE_OWNED, &pi->state));
+
+	if (test_bit(NETFS_INODE_OWNED, &pi->state) && (type == pi->lock_type))
+		return 0;
+
+	if ((type == POHMELFS_READ_LOCK) && (pi->lock_type == POHMELFS_WRITE_LOCK))
+		return 0;
+
+	clear_bit(NETFS_INODE_OWNED, &pi->state);
+	clear_bit(NETFS_INODE_REMOTE_SYNCED, &pi->state);
+
+	m = pohmelfs_mcache_alloc(psb, start, size, NULL);
+	if (IS_ERR(m))
+		return PTR_ERR(m);
+
+	err = pohmelfs_send_lock_trans(pi, m->gen, start, size,
+			type | POHMELFS_LOCK_GRAB);
+	if (err)
+		goto err_out_put;
+
+	err = wait_for_completion_timeout(&m->complete, psb->mcache_timeout);
+	if (err)
+		err = m->err;
+	else
+		err = -ETIMEDOUT;
+
+	dprintk("%s: %p: ino: %llu, mgen: %llu, start: %llu, size: %u, err: %d.\n",
+		__func__, &pi->vfs_inode, pi->ino, m->gen, start, size, err);
+
+	if (err && (err != -ENOENT))
+		goto err_out_put;
+
+	if (!err) {
+		netfs_convert_inode_info(&m->info);
+
+		iattr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_SIZE | ATTR_ATIME;
+		iattr.ia_mode = m->info.mode;
+		iattr.ia_uid = m->info.uid;
+		iattr.ia_gid = m->info.gid;
+		iattr.ia_size = m->info.size;
+		iattr.ia_atime = CURRENT_TIME;
+
+		err = pohmelfs_setattr_raw(inode, &iattr);
+		if (!err) {
+			struct dentry *dentry = d_find_alias(inode);
+			if (dentry) {
+				fsnotify_change(dentry, iattr.ia_valid);
+				dput(dentry);
+			}
+		}
+	}
+
+	pi->lock_type = type;
+	set_bit(NETFS_INODE_OWNED, &pi->state);
+
+	pohmelfs_mcache_put(psb, m);
+
+	return 0;
+
+err_out_put:
+	pohmelfs_mcache_put(psb, m);
+	return err;
+}
+
+int pohmelfs_data_unlock(struct pohmelfs_inode *pi, u64 start, u32 size, int type)
+{
+	dprintk("%s: %p: ino: %llu, start: %llu, size: %u, type: %d.\n",
+			__func__, &pi->vfs_inode, pi->ino, start, size, type);
+	pi->lock_type = 0;
+	clear_bit(NETFS_INODE_OWNED, &pi->state);
+	clear_bit(NETFS_INODE_REMOTE_SYNCED, &pi->state);
+	return pohmelfs_send_lock_trans(pi, pi->ino, start, size, type);
+}
diff --git a/fs/pohmelfs/mcache.c b/fs/pohmelfs/mcache.c
new file mode 100644
index 0000000..2e2d36e
--- /dev/null
+++ b/fs/pohmelfs/mcache.c
@@ -0,0 +1,171 @@
+/*
+ * 2007+ Copyright (c) Evgeniy Polyakov <zbr@xxxxxxxxxxx>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+
+#include "netfs.h"
+
+static struct kmem_cache *pohmelfs_mcache_cache;
+static mempool_t *pohmelfs_mcache_pool;
+
+static inline int pohmelfs_mcache_cmp(u64 gen, u64 new)
+{
+	if (gen < new)
+		return 1;
+	if (gen > new)
+		return -1;
+	return 0;
+}
+
+struct pohmelfs_mcache *pohmelfs_mcache_search(struct pohmelfs_sb *psb, u64 gen)
+{
+	struct rb_root *root = &psb->mcache_root;
+	struct rb_node *n = root->rb_node;
+	struct pohmelfs_mcache *tmp, *ret = NULL;
+	int cmp;
+
+	while (n) {
+		tmp = rb_entry(n, struct pohmelfs_mcache, mcache_entry);
+
+		cmp = pohmelfs_mcache_cmp(tmp->gen, gen);
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else {
+			ret = tmp;
+			pohmelfs_mcache_get(ret);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int pohmelfs_mcache_insert(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
+{
+	struct rb_root *root = &psb->mcache_root;
+	struct rb_node **n = &root->rb_node, *parent = NULL;
+	struct pohmelfs_mcache *ret = NULL, *tmp;
+	int cmp;
+
+	while (*n) {
+		parent = *n;
+
+		tmp = rb_entry(parent, struct pohmelfs_mcache, mcache_entry);
+
+		cmp = pohmelfs_mcache_cmp(tmp->gen, m->gen);
+		if (cmp < 0)
+			n = &parent->rb_left;
+		else if (cmp > 0)
+			n = &parent->rb_right;
+		else {
+			ret = tmp;
+			break;
+		}
+	}
+
+	if (ret)
+		return -EEXIST;
+
+	rb_link_node(&m->mcache_entry, parent, n);
+	rb_insert_color(&m->mcache_entry, root);
+
+	return 0;
+}
+
+static int pohmelfs_mcache_remove(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
+{
+	if (m && m->mcache_entry.rb_parent_color) {
+		rb_erase(&m->mcache_entry, &psb->mcache_root);
+		m->mcache_entry.rb_parent_color = 0;
+		return 1;
+	}
+	return 0;
+}
+
+void pohmelfs_mcache_remove_locked(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
+{
+	mutex_lock(&psb->mcache_lock);
+	pohmelfs_mcache_remove(psb, m);
+	mutex_unlock(&psb->mcache_lock);
+}
+
+struct pohmelfs_mcache *pohmelfs_mcache_alloc(struct pohmelfs_sb *psb, u64 start,
+		unsigned int size, void *data)
+{
+	struct pohmelfs_mcache *m;
+	int err = -ENOMEM;
+
+	m = mempool_alloc(pohmelfs_mcache_pool, GFP_KERNEL);
+	if (!m)
+		goto err_out_exit;
+
+	init_completion(&m->complete);
+	m->err = 0;
+	atomic_set(&m->refcnt, 1);
+	m->data = data;
+	m->start = start;
+	m->size = size;
+	m->gen = atomic_long_inc_return(&psb->mcache_gen);
+
+	mutex_lock(&psb->mcache_lock);
+	err = pohmelfs_mcache_insert(psb, m);
+	mutex_unlock(&psb->mcache_lock);
+	if (err)
+		goto err_out_free;
+
+	return m;
+
+err_out_free:
+	mempool_free(m, pohmelfs_mcache_pool);
+err_out_exit:
+	return ERR_PTR(err);
+}
+
+void pohmelfs_mcache_free(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
+{
+	pohmelfs_mcache_remove_locked(psb, m);
+
+	mempool_free(m, pohmelfs_mcache_pool);
+}
+
+int __init pohmelfs_mcache_init(void)
+{
+	pohmelfs_mcache_cache = kmem_cache_create("pohmelfs_mcache_cache",
+				sizeof(struct pohmelfs_mcache),
+				0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), NULL);
+	if (!pohmelfs_mcache_cache)
+		goto err_out_exit;
+
+	pohmelfs_mcache_pool = mempool_create_slab_pool(256, pohmelfs_mcache_cache);
+	if (!pohmelfs_mcache_pool)
+		goto err_out_free;
+
+	return 0;
+
+err_out_free:
+	kmem_cache_destroy(pohmelfs_mcache_cache);
+err_out_exit:
+	return -ENOMEM;
+}
+
+void pohmelfs_mcache_exit(void)
+{
+	mempool_destroy(pohmelfs_mcache_pool);
+	kmem_cache_destroy(pohmelfs_mcache_cache);
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux