[PATCH 3/4] Orangefs: hooks and call-outs

hubcap <hubcap@xxxxxxxxxxxx> · Wed, 31 Dec 2014 15:53:06 -0500

From: Mike Marshall <hubcap@xxxxxxxxxxxx>

Signed-off-by: Mike Marshall <hubcap@xxxxxxxxxxxx>
---
 fs/orangefs/acl.c     | 176 ++++++++++++++++
 fs/orangefs/dcache.c  | 140 +++++++++++++
 fs/orangefs/dir.c     | 395 ++++++++++++++++++++++++++++++++++++
 fs/orangefs/inode.c   | 468 ++++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/namei.c   | 473 +++++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/super.c   | 548 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/symlink.c |  30 +++
 fs/orangefs/xattr.c   | 537 +++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 2767 insertions(+)
 create mode 100644 fs/orangefs/acl.c
 create mode 100644 fs/orangefs/dcache.c
 create mode 100644 fs/orangefs/dir.c
 create mode 100644 fs/orangefs/inode.c
 create mode 100644 fs/orangefs/namei.c
 create mode 100644 fs/orangefs/super.c
 create mode 100644 fs/orangefs/symlink.c
 create mode 100644 fs/orangefs/xattr.c

diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
new file mode 100644
index 0000000..af18cf1
--- /dev/null
+++ b/fs/orangefs/acl.c
@@ -0,0 +1,176 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/fs_struct.h>
+
+struct posix_acl *pvfs2_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	int ret;
+	char *key = NULL, *value = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		key = PVFS2_XATTR_NAME_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		key = PVFS2_XATTR_NAME_ACL_DEFAULT;
+		break;
+	default:
+		gossip_err("pvfs2_get_acl: bogus value of type %d\n", type);
+		return ERR_PTR(-EINVAL);
+	}
+	/*
+	 * Rather than incurring a network call just to determine the exact
+	 * length of the attribute, I just allocate a max length to save on
+	 * the network call. Conceivably, we could pass NULL to
+	 * pvfs2_inode_getxattr() to probe the length of the value, but
+	 * I don't do that for now.
+	 */
+	value = kmalloc(PVFS_MAX_XATTR_VALUELEN, GFP_KERNEL);
+	if (value == NULL) {
+		gossip_err("pvfs2_get_acl: Could not allocate value ptr\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "inode %pU, key %s, type %d\n",
+		     get_khandle_from_ino(inode),
+		     key,
+		     type);
+	ret = pvfs2_inode_getxattr(inode,
+				   "",
+				   key,
+				   value,
+				   PVFS_MAX_XATTR_VALUELEN);
+	/* if the key exists, convert it to an in-memory rep */
+	if (ret > 0) {
+		acl = posix_acl_from_xattr(&init_user_ns, value, ret);
+	} else if (ret == -ENODATA || ret == -ENOSYS) {
+		acl = NULL;
+	} else {
+		gossip_err("inode %pU retrieving acl's failed with error %d\n",
+			   get_khandle_from_ino(inode),
+			   ret);
+		acl = ERR_PTR(ret);
+	}
+	/* kfree(NULL) is safe, so don't worry if value ever got used */
+	kfree(value);
+	return acl;
+}
+
+int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	int error = 0;
+	void *value = NULL;
+	size_t size = 0;
+	const char *name = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = PVFS2_XATTR_NAME_ACL_ACCESS;
+		if (acl) {
+			umode_t mode = inode->i_mode;
+			/*
+			 * can we represent this with the traditional file
+			 * mode permission bits?
+			 */
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0) {
+				gossip_err("%s: posix_acl_equiv_mode err: %d\n",
+					   __func__,
+					   error);
+				return error;
+			}
+
+			if (inode->i_mode != mode)
+				SetModeFlag(pvfs2_inode);
+			inode->i_mode = mode;
+			mark_inode_dirty_sync(inode);
+			if (error == 0)
+				acl = NULL;
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = PVFS2_XATTR_NAME_ACL_DEFAULT;
+		break;
+	default:
+		gossip_err("%s: invalid type %d!\n", __func__, type);
+		return -EINVAL;
+	}
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "%s: inode %pU, key %s type %d\n",
+		     __func__, get_khandle_from_ino(inode),
+		     name,
+		     type);
+
+	if (acl) {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+
+		error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+		if (error < 0)
+			goto out;
+	}
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "%s: name %s, value %p, size %zd, acl %p\n",
+		     __func__, name, value, size, acl);
+	/*
+	 * Go ahead and set the extended attribute now. NOTE: Suppose acl
+	 * was NULL, then value will be NULL and size will be 0 and that
+	 * will xlate to a removexattr. However, we don't want removexattr
+	 * complain if attributes does not exist.
+	 */
+	error = pvfs2_inode_setxattr(inode, "", name, value, size, 0);
+
+out:
+	kfree(value);
+	if (!error)
+		set_cached_acl(inode, type, acl);
+	return error;
+}
+
+int pvfs2_init_acl(struct inode *inode, struct inode *dir)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct posix_acl *default_acl, *acl;
+	umode_t mode = inode->i_mode;
+	int error = 0;
+
+	ClearModeFlag(pvfs2_inode);
+
+	error = posix_acl_create(dir, &mode, &default_acl, &acl);
+	if (error)
+		return error;
+
+	if (default_acl) {
+		error = pvfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		posix_acl_release(default_acl);
+	}
+
+	if (acl) {
+		if (!error)
+			error = pvfs2_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		posix_acl_release(acl);
+	}
+
+	/* If mode of the inode was changed, then do a forcible ->setattr */
+	if (mode != inode->i_mode) {
+		SetModeFlag(pvfs2_inode);
+		inode->i_mode = mode;
+		pvfs2_flush_inode(inode);
+	}
+
+	return error;
+}
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
new file mode 100644
index 0000000..464cddb
--- /dev/null
+++ b/fs/orangefs/dcache.c
@@ -0,0 +1,140 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Implementation of dentry (directory cache) functions.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+
+/* Returns 1 if dentry can still be trusted, else 0. */
+static int pvfs2_revalidate_lookup(struct dentry *dentry)
+{
+	struct dentry *parent_dentry = dget_parent(dentry);
+	struct inode *parent_inode = parent_dentry->d_inode;
+	struct pvfs2_inode_s *parent = PVFS2_I(parent_inode);
+	struct inode *inode = dentry->d_inode;
+	struct pvfs2_kernel_op *new_op;
+	int ret = 0;
+	int err = 0;
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
+
+	new_op = op_alloc(PVFS2_VFS_OP_LOOKUP);
+	if (!new_op)
+		goto out_put_parent;
+
+	new_op->upcall.req.lookup.sym_follow = PVFS2_LOOKUP_LINK_NO_FOLLOW;
+	new_op->upcall.req.lookup.parent_refn = parent->refn;
+	strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, PVFS2_NAME_LEN);
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+		     "%s:%s:%d interrupt flag [%d]\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     get_interruptible_flag(parent_inode));
+
+	err = service_operation(new_op, "pvfs2_lookup",
+			get_interruptible_flag(parent_inode));
+	if (err)
+		goto out_drop;
+
+	if (new_op->downcall.status != 0 ||
+	    !match_handle(new_op->downcall.resp.lookup.refn.khandle, inode)) {
+		gossip_debug(GOSSIP_DCACHE_DEBUG,
+			"%s:%s:%d "
+			"lookup failure |%s| or no match |%s|.\n",
+			__FILE__,
+			__func__,
+			__LINE__,
+			new_op->downcall.status ? "true" : "false",
+			match_handle(new_op->downcall.resp.lookup.refn.khandle,
+					inode) ? "false" : "true");
+		gossip_debug(GOSSIP_DCACHE_DEBUG,
+			     "%s:%s:%d revalidate failed\n",
+			     __FILE__, __func__, __LINE__);
+		goto out_drop;
+	}
+
+	ret = 1;
+out_release_op:
+	op_release(new_op);
+out_put_parent:
+	dput(parent_dentry);
+	return ret;
+out_drop:
+	d_drop(dentry);
+	goto out_release_op;
+}
+
+/*
+ * Verify that dentry is valid.
+ *
+ * Should return 1 if dentry can still be trusted, else 0
+ */
+static int pvfs2_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct inode *inode;
+	int ret = 0;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n",
+		     __func__, dentry);
+
+	/* find inode from dentry */
+	if (!dentry->d_inode) {
+		gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n",
+			     __func__);
+		goto invalid_exit;
+	}
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode valid.\n", __func__);
+	inode = dentry->d_inode;
+
+	/*
+	 * first perform a lookup to make sure that the object not only
+	 * exists, but is still in the expected place in the name space
+	 */
+	if (!is_root_handle(inode)) {
+		if (!pvfs2_revalidate_lookup(dentry))
+			goto invalid_exit;
+	} else {
+		gossip_debug(GOSSIP_DCACHE_DEBUG,
+			     "%s: root handle, lookup skipped.\n",
+			     __func__);
+	}
+
+	/* now perform getattr */
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+		     "%s: doing getattr: inode: %p, handle: %pU\n",
+		     __func__,
+		     inode,
+		     get_khandle_from_ino(inode));
+	ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+		     "%s: getattr %s (ret = %d), returning %s for dentry i_count=%d\n",
+		     __func__,
+		     (ret == 0 ? "succeeded" : "failed"),
+		     ret,
+		     (ret == 0 ? "valid" : "INVALID"),
+		     atomic_read(&inode->i_count));
+	if (ret != 0)
+		goto invalid_exit;
+
+	/* dentry is valid! */
+	return 1;
+
+invalid_exit:
+	return 0;
+}
+
+const struct dentry_operations pvfs2_dentry_operations = {
+	.d_revalidate = pvfs2_d_revalidate,
+};
diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
new file mode 100644
index 0000000..91d1e62
--- /dev/null
+++ b/fs/orangefs/dir.c
@@ -0,0 +1,395 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+struct readdir_handle_t {
+	int buffer_index;
+	struct pvfs2_readdir_response readdir_response;
+	void *dents_buf;
+};
+
+/*
+ * decode routine needed by kmod to make sense of the shared page for readdirs.
+ */
+static long decode_dirents(char *ptr, struct pvfs2_readdir_response *readdir)
+{
+	int i;
+	struct pvfs2_readdir_response *rd =
+		(struct pvfs2_readdir_response *) ptr;
+	char *buf = ptr;
+	char **pptr = &buf;
+
+	readdir->token = rd->token;
+	readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount;
+	readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount *
+					sizeof(*readdir->dirent_array),
+					GFP_KERNEL);
+	if (readdir->dirent_array == NULL)
+		return -ENOMEM;
+	*pptr += offsetof(struct pvfs2_readdir_response, dirent_array);
+	for (i = 0; i < readdir->pvfs_dirent_outcount; i++) {
+		dec_string(pptr, &readdir->dirent_array[i].d_name,
+			   &readdir->dirent_array[i].d_length);
+		readdir->dirent_array[i].khandle =
+			*(struct pvfs2_khandle *) *pptr;
+		*pptr += 16;
+	}
+	return (unsigned long)*pptr - (unsigned long)ptr;
+}
+
+static long readdir_handle_ctor(struct readdir_handle_t *rhandle, void *buf,
+				int buffer_index)
+{
+	long ret;
+
+	if (buf == NULL) {
+		gossip_err
+		    ("Invalid NULL buffer specified in readdir_handle_ctor\n");
+		return -ENOMEM;
+	}
+	if (buffer_index < 0) {
+		gossip_err
+		    ("Invalid buffer index specified in readdir_handle_ctor\n");
+		return -EINVAL;
+	}
+	rhandle->buffer_index = buffer_index;
+	rhandle->dents_buf = buf;
+	ret = decode_dirents(buf, &rhandle->readdir_response);
+	if (ret < 0) {
+		gossip_err("Could not decode readdir from buffer %ld\n", ret);
+		rhandle->buffer_index = -1;
+		gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf);
+		vfree(buf);
+		rhandle->dents_buf = NULL;
+	}
+	return ret;
+}
+
+static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap,
+		struct readdir_handle_t *rhandle)
+{
+	if (rhandle == NULL)
+		return;
+
+	/* kfree(NULL) is safe */
+	kfree(rhandle->readdir_response.dirent_array);
+	rhandle->readdir_response.dirent_array = NULL;
+
+	if (rhandle->buffer_index >= 0) {
+		readdir_index_put(bufmap, rhandle->buffer_index);
+		rhandle->buffer_index = -1;
+	}
+	if (rhandle->dents_buf) {
+		gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n",
+			     rhandle->dents_buf);
+		vfree(rhandle->dents_buf);
+		rhandle->dents_buf = NULL;
+	}
+	return;
+}
+
+/*
+ * Read directory entries from an instance of an open directory.
+ *
+ * \note This routine was converted for the readdir to iterate change
+ *       in "struct file_operations". "converted" mostly amounts to
+ *       changing occurrences of "readdir" and "filldir" in the
+ *       comments to "iterate" and "dir_emit". Also filldir calls
+ *       were changed to dir_emit calls.
+ *
+ * \param dir_emit callback function called for each entry read.
+ *
+ * \retval <0 on error
+ * \retval 0  when directory has been completely traversed
+ * \retval >0 if we don't call dir_emit for all entries
+ *
+ * \note If the dir_emit call-back returns non-zero, then iterate should
+ *       assume that it has had enough, and should return as well.
+ */
+static int pvfs2_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct pvfs2_bufmap *bufmap = NULL;
+	int ret = 0;
+	int buffer_index;
+	uint64_t *ptoken = file->private_data;
+	uint64_t pos = 0;
+	ino_t ino = 0;
+	struct dentry *dentry = file->f_path.dentry;
+	struct pvfs2_kernel_op *new_op = NULL;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode);
+	int buffer_full = 0;
+	struct readdir_handle_t rhandle;
+	int i = 0;
+	int len = 0;
+	ino_t current_ino = 0;
+	char *current_entry = NULL;
+	long bytes_decoded;
+
+	gossip_ldebug(GOSSIP_DIR_DEBUG,
+		      "%s: ctx->pos:%lld, token = %llu\n",
+		      __func__,
+		      lld(ctx->pos),
+		      llu(*ptoken));
+
+	pos = (uint64_t) ctx->pos;
+
+	/* are we done? */
+	if (pos == PVFS_READDIR_END) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "Skipping to termination path\n");
+		return 0;
+	}
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "pvfs2_readdir called on %s (pos=%llu)\n",
+		     dentry->d_name.name, llu(pos));
+
+	rhandle.buffer_index = -1;
+	rhandle.dents_buf = NULL;
+	memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response));
+
+	new_op = op_alloc(PVFS2_VFS_OP_READDIR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->uses_shared_memory = 1;
+	new_op->upcall.req.readdir.refn = pvfs2_inode->refn;
+	new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR;
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
+		     __func__,
+		     &new_op->upcall.req.readdir.refn.khandle);
+
+	/*
+	 * NOTE: the position we send to the readdir upcall is out of
+	 * sync with ctx->pos since:
+	 * 1. pvfs2 doesn't include the "." and ".." entries that are
+	 *    added below.
+	 * 2. the introduction of distributed directory logic makes token no
+	 *    longer be related to f_pos and pos. Instead an independent
+	 *    variable is used inside the function and stored in the
+	 *    private_data of the file structure.
+	 */
+	new_op->upcall.req.readdir.token = *ptoken;
+
+get_new_buffer_index:
+	ret = readdir_index_get(&bufmap, &buffer_index);
+	if (ret < 0) {
+		gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n",
+			    ret);
+		goto out_free_op;
+	}
+	new_op->upcall.req.readdir.buf_index = buffer_index;
+
+	ret = service_operation(new_op,
+				"pvfs2_readdir",
+				get_interruptible_flag(dentry->d_inode));
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "Readdir downcall status is %d.  ret:%d\n",
+		     new_op->downcall.status,
+		     ret);
+
+	if (ret == -EAGAIN && op_state_purged(new_op)) {
+		/*
+		 * readdir shared memory aread has been wiped due to
+		 * pvfs2-client-core restarting, so we must get a new
+		 * index into the shared memory.
+		 */
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			"%s: Getting new buffer_index for retry of readdir..\n",
+			 __func__);
+		readdir_index_put(bufmap, buffer_index);
+		goto get_new_buffer_index;
+	}
+
+	if (ret == -EIO && op_state_purged(new_op)) {
+		gossip_err("%s: Client is down. Aborting readdir call.\n",
+			__func__);
+		readdir_index_put(bufmap, buffer_index);
+		goto out_free_op;
+	}
+
+	if (ret < 0 || new_op->downcall.status != 0) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "Readdir request failed.  Status:%d\n",
+			     new_op->downcall.status);
+		readdir_index_put(bufmap, buffer_index);
+		if (ret >= 0)
+			ret = new_op->downcall.status;
+		goto out_free_op;
+	}
+
+	bytes_decoded =
+		readdir_handle_ctor(&rhandle,
+				    new_op->downcall.trailer_buf,
+				    buffer_index);
+	if (bytes_decoded < 0) {
+		gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n",
+			ret);
+		ret = bytes_decoded;
+		readdir_index_put(bufmap, buffer_index);
+		goto out_free_op;
+	}
+
+	if (bytes_decoded != new_op->downcall.trailer_size) {
+		gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n",
+			bytes_decoded,
+			(long)new_op->downcall.trailer_size);
+		ret = -EINVAL;
+		goto out_destroy_handle;
+	}
+
+	if (pos == 0) {
+		ino = get_ino_from_khandle(dentry->d_inode);
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: calling dir_emit of \".\" with pos = %llu\n",
+			     __func__,
+			     llu(pos));
+		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
+		if (ret < 0)
+			goto out_destroy_handle;
+		ctx->pos++;
+		gossip_ldebug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+		pos++;
+	}
+
+	if (pos == 1) {
+		ino = get_parent_ino_from_dentry(dentry);
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
+			     __func__,
+			     llu(pos));
+		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
+		if (ret < 0)
+			goto out_destroy_handle;
+		ctx->pos++;
+		gossip_ldebug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+		pos++;
+	}
+
+	for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) {
+		len = rhandle.readdir_response.dirent_array[i].d_length;
+		current_entry = rhandle.readdir_response.dirent_array[i].d_name;
+		current_ino = pvfs2_khandle_to_ino(
+			&(rhandle.readdir_response.dirent_array[i].khandle));
+
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "calling dir_emit for %s with len %d, pos %ld\n",
+			     current_entry,
+			     len,
+			     (unsigned long)pos);
+		ret =
+		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
+		if (ret < 0) {
+			gossip_debug(GOSSIP_DIR_DEBUG,
+				     "dir_emit() failed. ret:%d\n",
+				     ret);
+			if (i < 2) {
+				gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n");
+				gossip_err("Duplicate entries may appear.\n");
+			}
+			buffer_full = 1;
+			break;
+		}
+		ctx->pos++;
+		gossip_ldebug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+
+		pos++;
+	}
+
+	/* this means that all of the dir_emit calls succeeded */
+	if (i == rhandle.readdir_response.pvfs_dirent_outcount) {
+		/* update token */
+		*ptoken = rhandle.readdir_response.token;
+	} else {
+		/* this means a dir_emit call failed */
+		if (rhandle.readdir_response.token == PVFS_READDIR_END) {
+			/*
+			 * If PVFS hit end of directory, then there
+			 * is no way to do math on the token that it
+			 * returned. Instead we go by ctx->pos but
+			 * back up to account for the artificial .
+			 * and .. entries.
+			 */
+			ctx->pos -= 3;
+		} else {
+			/*
+			 * this means a dir_emit call failed. !!! need to set
+			 * back to previous ctx->pos, no middle value allowed
+			 */
+			pos -= (i - 1);
+			ctx->pos -= (i - 1);
+		}
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			"at least one dir_emit call failed. Setting ctx->pos to: %lld\n",
+			lld(ctx->pos));
+	}
+
+	/*
+	 * Did we hit the end of the directory?
+	 */
+	if (rhandle.readdir_response.token == PVFS_READDIR_END &&
+	    !buffer_full) {
+		gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n");
+		ctx->pos = PVFS_READDIR_END;
+	}
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "pos = %llu, token = %llu"
+		     ", ctx->pos should have been %lld\n",
+		     llu(pos),
+		     llu(*ptoken),
+		     lld(ctx->pos));
+
+out_destroy_handle:
+	readdir_handle_dtor(bufmap, &rhandle);
+out_free_op:
+	op_release(new_op);
+	gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret);
+	return ret;
+}
+
+static int pvfs2_dir_open(struct inode *inode, struct file *file)
+{
+	uint64_t *ptoken;
+
+	file->private_data = kmalloc(sizeof(uint64_t), GFP_KERNEL);
+	if (!file->private_data)
+		return -ENOMEM;
+
+	ptoken = file->private_data;
+	*ptoken = PVFS_READDIR_START;
+	return 0;
+}
+
+static int pvfs2_dir_release(struct inode *inode, struct file *file)
+{
+	pvfs2_flush_inode(inode);
+	kfree(file->private_data);
+	return 0;
+}
+
+/** PVFS2 implementation of VFS directory operations */
+const struct file_operations pvfs2_dir_operations = {
+	.read = generic_read_dir,
+	.iterate = pvfs2_readdir,
+	.open = pvfs2_dir_open,
+	.release = pvfs2_dir_release,
+};
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
new file mode 100644
index 0000000..5da2a20
--- /dev/null
+++ b/fs/orangefs/inode.c
@@ -0,0 +1,468 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS inode operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+static int read_one_page(struct page *page)
+{
+	void *page_data;
+	int ret;
+	int max_block;
+	ssize_t bytes_read = 0;
+	struct inode *inode = page->mapping->host;
+	const uint32_t blocksize = PAGE_CACHE_SIZE;	/* inode->i_blksize */
+	const uint32_t blockbits = PAGE_CACHE_SHIFT;	/* inode->i_blkbits */
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		    "pvfs2_readpage called with page %p\n",
+		     page);
+	page_data = pvfs2_kmap(page);
+
+	max_block = ((inode->i_size / blocksize) + 1);
+
+	if (page->index < max_block) {
+		loff_t blockptr_offset = (((loff_t) page->index) << blockbits);
+		bytes_read = pvfs2_inode_read(inode,
+					      page_data,
+					      blocksize,
+					      &blockptr_offset,
+					      inode->i_size);
+	}
+	/* only zero remaining unread portions of the page data */
+	if (bytes_read > 0)
+		memset(page_data + bytes_read, 0, blocksize - bytes_read);
+	else
+		memset(page_data, 0, blocksize);
+	/* takes care of potential aliasing */
+	flush_dcache_page(page);
+	if (bytes_read < 0) {
+		ret = bytes_read;
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		if (PageError(page))
+			ClearPageError(page);
+		ret = 0;
+	}
+	pvfs2_kunmap(page);
+	/* unlock the page after the ->readpage() routine completes */
+	unlock_page(page);
+	return ret;
+}
+
+static int pvfs2_readpage(struct file *file, struct page *page)
+{
+	return read_one_page(page);
+}
+
+static int pvfs2_readpages(struct file *file,
+			   struct address_space *mapping,
+			   struct list_head *pages,
+			   unsigned nr_pages)
+{
+	int page_idx;
+	int ret;
+
+	gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_readpages called\n");
+
+	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+		struct page *page;
+		page = list_entry(pages->prev, struct page, lru);
+		list_del(&page->lru);
+		if (!add_to_page_cache(page,
+				       mapping,
+				       page->index,
+				       GFP_KERNEL)) {
+			ret = read_one_page(page);
+			gossip_debug(GOSSIP_INODE_DEBUG,
+				"failure adding page to cache, read_one_page returned: %d\n",
+				ret);
+	      } else {
+			page_cache_release(page);
+	      }
+	}
+	BUG_ON(!list_empty(pages));
+	return 0;
+}
+
+static void pvfs2_invalidatepage(struct page *page,
+				 unsigned int offset,
+				 unsigned int length)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_invalidatepage called on page %p "
+		     "(offset is %u)\n",
+		     page,
+		     offset);
+
+	ClearPageUptodate(page);
+	ClearPageMappedToDisk(page);
+	return;
+
+}
+
+static int pvfs2_releasepage(struct page *page, gfp_t foo)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_releasepage called on page %p\n",
+		     page);
+	return 0;
+}
+
+/*
+ * Having a direct_IO entry point in the address_space_operations
+ * struct causes the kernel to allows us to use O_DIRECT on
+ * open. Nothing will ever call this thing, but in the future we
+ * will need to be able to use O_DIRECT on open in order to support
+ * AIO. Modeled after NFS, they do this too.
+ */
+/*
+static ssize_t pvfs2_direct_IO(int rw,
+			struct kiocb *iocb,
+			struct iov_iter *iter,
+                        loff_t offset)
+{
+        gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_direct_IO: %s\n",
+                      iocb->ki_filp->f_path.dentry->d_name.name);
+
+        return -EINVAL;
+}
+*/
+
+struct backing_dev_info pvfs2_backing_dev_info = {
+	.name = "pvfs2",
+	.ra_pages = 0,
+	.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+/** PVFS2 implementation of address space operations */
+const struct address_space_operations pvfs2_address_operations = {
+	.readpage = pvfs2_readpage,
+	.readpages = pvfs2_readpages,
+	.invalidatepage = pvfs2_invalidatepage,
+	.releasepage = pvfs2_releasepage,
+/*	.direct_IO = pvfs2_direct_IO */
+};
+
+static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	loff_t orig_size = i_size_read(inode);
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
+		     __func__,
+		     get_khandle_from_ino(inode),
+		     &pvfs2_inode->refn.khandle,
+		     pvfs2_inode->refn.fs_id,
+		     iattr->ia_size);
+
+	truncate_setsize(inode, iattr->ia_size);
+
+	new_op = op_alloc(PVFS2_VFS_OP_TRUNCATE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.truncate.refn = pvfs2_inode->refn;
+	new_op->upcall.req.truncate.size = (int64_t) iattr->ia_size;
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+
+	/*
+	 * the truncate has no downcall members to retrieve, but
+	 * the status value tells us if it went through ok or not
+	 */
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2: pvfs2_truncate got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Only change the c/mtime if we are changing the size or we are
+	 * explicitly asked to change it.  This handles the semantic difference
+	 * between truncate() and ftruncate() as implemented in the VFS.
+	 *
+	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+	 * special case where we need to update the times despite not having
+	 * these flags set.  For all other operations the VFS set these flags
+	 * explicitly if it wants a timestamp update.
+	 */
+	if (orig_size != i_size_read(inode) &&
+	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
+		iattr->ia_ctime = iattr->ia_mtime =
+			current_fs_time(inode->i_sb);
+		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+	}
+
+	return ret;
+}
+
+/*
+ * Change attributes of an object referenced by dentry.
+ */
+int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	int ret = -EINVAL;
+	struct inode *inode = dentry->d_inode;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_setattr: called on %s\n",
+		     dentry->d_name.name);
+
+	ret = inode_change_ok(inode, iattr);
+	if (ret)
+		goto out;
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		ret = pvfs2_setattr_size(inode, iattr);
+		if (ret)
+			goto out;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+
+	ret = pvfs2_inode_setattr(inode, iattr);
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_setattr: inode_setattr returned %d\n",
+		     ret);
+
+	if (!ret && (iattr->ia_valid & ATTR_MODE))
+		/* change mod on a file that has ACLs */
+		ret = posix_acl_chmod(inode, inode->i_mode);
+
+out:
+	gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: returning %d\n", ret);
+	return ret;
+}
+
+/*
+ * Obtain attributes of an object given a dentry
+ */
+int pvfs2_getattr(struct vfsmount *mnt,
+		  struct dentry *dentry,
+		  struct kstat *kstat)
+{
+	int ret = -ENOENT;
+	struct inode *inode = dentry->d_inode;
+	struct pvfs2_inode_s *pvfs2_inode = NULL;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_getattr: called on %s\n",
+		     dentry->d_name.name);
+
+	/*
+	 * Similar to the above comment, a getattr also expects that all
+	 * fields/attributes of the inode would be refreshed. So again, we
+	 * dont have too much of a choice but refresh all the attributes.
+	 */
+	ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	if (ret == 0) {
+		generic_fillattr(inode, kstat);
+		/* override block size reported to stat */
+		pvfs2_inode = PVFS2_I(inode);
+		kstat->blksize = pvfs2_inode->blksize;
+	} else {
+		/* assume an I/O error and flag inode as bad */
+		gossip_debug(GOSSIP_INODE_DEBUG,
+			     "%s:%s:%d calling make bad inode\n",
+			     __FILE__,
+			     __func__,
+			     __LINE__);
+		pvfs2_make_bad_inode(inode);
+	}
+	return ret;
+}
+
+/* PVFS2 implementation of VFS inode operations for files */
+struct inode_operations pvfs2_file_inode_operations = {
+	.get_acl = pvfs2_get_acl,
+	.set_acl = pvfs2_set_acl,
+	.setattr = pvfs2_setattr,
+	.getattr = pvfs2_getattr,
+	.setxattr = generic_setxattr,
+	.getxattr = generic_getxattr,
+	.listxattr = pvfs2_listxattr,
+	.removexattr = generic_removexattr,
+};
+
+static int pvfs2_init_iops(struct inode *inode)
+{
+	inode->i_mapping->a_ops = &pvfs2_address_operations;
+	inode->i_mapping->backing_dev_info = &pvfs2_backing_dev_info;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &pvfs2_file_inode_operations;
+		inode->i_fop = &pvfs2_file_operations;
+		inode->i_blkbits = PAGE_CACHE_SHIFT;
+		break;
+	case S_IFLNK:
+		inode->i_op = &pvfs2_symlink_inode_operations;
+		break;
+	case S_IFDIR:
+		inode->i_op = &pvfs2_dir_inode_operations;
+		inode->i_fop = &pvfs2_dir_operations;
+		break;
+	default:
+		gossip_debug(GOSSIP_INODE_DEBUG,
+			     "%s: unsupported mode\n",
+			     __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Given a PVFS2 object identifier (fsid, handle), convert it into a ino_t type
+ * that will be used as a hash-index from where the handle will
+ * be searched for in the VFS hash table of inodes.
+ */
+static inline ino_t pvfs2_handle_hash(PVFS_object_kref *ref)
+{
+	if (!ref)
+		return 0;
+	return pvfs2_khandle_to_ino(&(ref->khandle));
+}
+
+/*
+ * Called to set up an inode from iget5_locked.
+ */
+static int pvfs2_set_inode(struct inode *inode, void *data)
+{
+	PVFS_object_kref *ref = (PVFS_object_kref *) data;
+	struct pvfs2_inode_s *pvfs2_inode = NULL;
+
+	/* Make sure that we have sane parameters */
+	if (!data || !inode)
+		return 0;
+	pvfs2_inode = PVFS2_I(inode);
+	if (!pvfs2_inode)
+		return 0;
+	pvfs2_inode->refn.fs_id = ref->fs_id;
+	pvfs2_inode->refn.khandle = ref->khandle;
+	return 0;
+}
+
+/*
+ * Called to determine if handles match.
+ */
+static int pvfs2_test_inode(struct inode *inode, void *data)
+{
+	PVFS_object_kref *ref = (PVFS_object_kref *) data;
+	struct pvfs2_inode_s *pvfs2_inode = NULL;
+
+	pvfs2_inode = PVFS2_I(inode);
+	return (!PVFS_khandle_cmp(&(pvfs2_inode->refn.khandle), &(ref->khandle))
+		&& pvfs2_inode->refn.fs_id == ref->fs_id);
+}
+
+/*
+ * Front-end to lookup the inode-cache maintained by the VFS using the PVFS2
+ * file handle.
+ *
+ * @sb: the file system super block instance.
+ * @ref: The PVFS2 object for which we are trying to locate an inode structure.
+ */
+struct inode *pvfs2_iget(struct super_block *sb, PVFS_object_kref *ref)
+{
+	struct inode *inode = NULL;
+	unsigned long hash;
+	int error;
+
+	hash = pvfs2_handle_hash(ref);
+	inode = iget5_locked(sb, hash, pvfs2_test_inode, pvfs2_set_inode, ref);
+	if (!inode || !(inode->i_state & I_NEW))
+		return inode;
+
+	error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	if (error) {
+		iget_failed(inode);
+		return ERR_PTR(error);
+	}
+
+	inode->i_ino = hash;	/* needed for stat etc */
+	pvfs2_init_iops(inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
+		     &ref->khandle,
+		     ref->fs_id,
+		     hash,
+		     inode->i_ino);
+
+	return inode;
+}
+
+/*
+ * Allocate an inode for a newly created file and insert it into the inode hash.
+ */
+struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir,
+		int mode, dev_t dev, PVFS_object_kref *ref)
+{
+	unsigned long hash = pvfs2_handle_hash(ref);
+	struct inode *inode;
+	int error;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_get_custom_inode_common: called\n"
+		     "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
+		     sb,
+		     MAJOR(dev),
+		     MINOR(dev),
+		     mode);
+
+	inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+
+	pvfs2_set_inode(inode, ref);
+	inode->i_ino = hash;	/* needed for stat etc */
+
+	error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	if (error)
+		goto out_iput;
+
+	pvfs2_init_iops(inode);
+
+	inode->i_mode = mode;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_size = PAGE_CACHE_SIZE;
+	inode->i_rdev = dev;
+
+	error = insert_inode_locked4(inode, hash, pvfs2_test_inode, ref);
+	if (error < 0)
+		goto out_iput;
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "Initializing ACL's for inode %pU\n",
+		     get_khandle_from_ino(inode));
+	pvfs2_init_acl(inode, dir);
+	return inode;
+
+out_iput:
+	iput(inode);
+	return ERR_PTR(error);
+}
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
new file mode 100644
index 0000000..8f69dd2
--- /dev/null
+++ b/fs/orangefs/namei.c
@@ -0,0 +1,473 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS namei operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+
+/*
+ * Get a newly allocated inode to go with a negative dentry.
+ */
+static int pvfs2_create(struct inode *dir,
+			struct dentry *dentry,
+			umode_t mode,
+			bool exclusive)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
+
+	new_op = op_alloc(PVFS2_VFS_OP_CREATE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.create.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.create.attributes,
+			       PVFS_TYPE_METAFILE, mode);
+
+	strncpy(new_op->upcall.req.create.d_name,
+		dentry->d_name.name, PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Create Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.create.refn.khandle,
+		     new_op->downcall.resp.create.refn.fs_id, ret);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "%s: failed with error code %d\n",
+			     __func__, ret);
+		goto out;
+	}
+
+	inode = pvfs2_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
+				&new_op->downcall.resp.create.refn);
+	if (IS_ERR(inode)) {
+		gossip_err("*** Failed to allocate pvfs2 file inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Assigned file inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Inode (Regular File) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+	ret = 0;
+out:
+	op_release(new_op);
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: returning %d\n", __func__, ret);
+	return ret;
+}
+
+/*
+ * Attempt to resolve an object name (dentry->d_name), parent handle, and
+ * fsid into a handle for the object.
+ */
+static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry,
+				   unsigned int flags)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	struct dentry *res;
+	int ret = -EINVAL;
+
+	/*
+	 * in theory we could skip a lookup here (if the intent is to
+	 * create) in order to avoid a potentially failed lookup, but
+	 * leaving it in can skip a valid lookup and try to create a file
+	 * that already exists (e.g. the vfs already handles checking for
+	 * -EEXIST on O_EXCL opens, which is broken if we skip this lookup
+	 * in the create path)
+	 */
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
+		     __func__, dentry->d_name.name);
+
+	if (dentry->d_name.len > (PVFS2_NAME_LEN - 1))
+		return ERR_PTR(-ENAMETOOLONG);
+
+	new_op = op_alloc(PVFS2_VFS_OP_LOOKUP);
+	if (!new_op)
+		return ERR_PTR(-ENOMEM);
+
+	new_op->upcall.req.lookup.sym_follow = flags & LOOKUP_FOLLOW;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     &parent->refn.khandle);
+	new_op->upcall.req.lookup.parent_refn = parent->refn;
+
+	strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
+		PVFS2_NAME_LEN);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: doing lookup on %s under %pU,%d (follow=%s)\n",
+		     __func__,
+		     new_op->upcall.req.lookup.d_name,
+		     &new_op->upcall.req.lookup.parent_refn.khandle,
+		     new_op->upcall.req.lookup.parent_refn.fs_id,
+		     ((new_op->upcall.req.lookup.sym_follow ==
+		       PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no"));
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Lookup Got %pU, fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.lookup.refn.khandle,
+		     new_op->downcall.resp.lookup.refn.fs_id,
+		     ret);
+
+	if (ret < 0) {
+		if (ret == -ENOENT) {
+			/*
+			 * if no inode was found, add a negative dentry to
+			 * dcache anyway; if we don't, we don't hold expected
+			 * lookup semantics and we most noticeably break
+			 * during directory renames.
+			 *
+			 * however, if the operation failed or exited, do not
+			 * add the dentry (e.g. in the case that a touch is
+			 * issued on a file that already exists that was
+			 * interrupted during this lookup -- no need to add
+			 * another negative dentry for an existing file)
+			 */
+
+			gossip_debug(GOSSIP_NAME_DEBUG,
+				     "pvfs2_lookup: Adding *negative* dentry "
+				     "%p for %s\n",
+				     dentry,
+				     dentry->d_name.name);
+
+			d_add(dentry, NULL);
+			res = NULL;
+			goto out;
+		}
+
+		/* must be a non-recoverable error */
+		res = ERR_PTR(ret);
+		goto out;
+	}
+
+	inode = pvfs2_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+	if (IS_ERR(inode)) {
+		gossip_debug(GOSSIP_NAME_DEBUG,
+			"error %ld from iget\n", PTR_ERR(inode));
+		res = ERR_CAST(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s:%s:%d "
+		     "Found good inode [%lu] with count [%d]\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     inode->i_ino,
+		     (int)atomic_read(&inode->i_count));
+
+	/* update dentry/inode pair into dcache */
+	res = d_splice_alias(inode, dentry);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Lookup success (inode ct = %d)\n",
+		     (int)atomic_read(&inode->i_count));
+out:
+	op_release(new_op);
+	return res;
+}
+
+/* return 0 on success; non-zero otherwise */
+static int pvfs2_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: called on %s\n"
+		     "  (inode %pU): Parent is %pU | fs_id %d\n",
+		     __func__,
+		     dentry->d_name.name,
+		     get_khandle_from_ino(inode),
+		     &parent->refn.khandle,
+		     parent->refn.fs_id);
+
+	new_op = op_alloc(PVFS2_VFS_OP_REMOVE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.remove.parent_refn = parent->refn;
+	strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
+		PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, "pvfs2_unlink",
+				get_interruptible_flag(inode));
+
+	/* when request is serviced properly, free req op struct */
+	op_release(new_op);
+
+	if (!ret) {
+		drop_nlink(inode);
+
+		SetMtimeFlag(parent);
+		dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+		mark_inode_dirty_sync(dir);
+	}
+	return ret;
+}
+
+/*
+ * pvfs2_link() is only implemented here to make sure that we return a
+ * reasonable error code (the kernel will return a misleading EPERM
+ * otherwise).  PVFS2 does not support hard links.
+ */
+static int pvfs2_link(struct dentry *old_dentry,
+		      struct inode *dir,
+		      struct dentry *dentry)
+{
+	return -EOPNOTSUPP;
+}
+
+/*
+ * pvfs2_mknod() is only implemented here to make sure that we return a
+ * reasonable error code (the kernel will return a misleading EPERM
+ * otherwise).  PVFS2 does not support special files such as fifos or devices.
+ */
+static int pvfs2_mknod(struct inode *dir,
+		       struct dentry *dentry,
+		       umode_t mode,
+		       dev_t rdev)
+{
+	return -EOPNOTSUPP;
+}
+
+static int pvfs2_symlink(struct inode *dir,
+			 struct dentry *dentry,
+			 const char *symname)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	int mode = 755;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
+
+	if (!symname)
+		return -EINVAL;
+
+	new_op = op_alloc(PVFS2_VFS_OP_SYMLINK);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.sym.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
+			       PVFS_TYPE_SYMLINK,
+			       mode);
+
+	strncpy(new_op->upcall.req.sym.entry_name,
+		dentry->d_name.name,
+		PVFS2_NAME_LEN);
+	strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Symlink Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.sym.refn.khandle,
+		     new_op->downcall.resp.sym.refn.fs_id, ret);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			    "%s: failed with error code %d\n",
+			    __func__, ret);
+		goto out;
+	}
+
+	inode = pvfs2_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
+				&new_op->downcall.resp.sym.refn);
+	if (IS_ERR(inode)) {
+		gossip_err
+		    ("*** Failed to allocate pvfs2 symlink inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Assigned symlink inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Inode (Symlink) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+	ret = 0;
+out:
+	op_release(new_op);
+	return ret;
+}
+
+static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	int ret;
+
+	new_op = op_alloc(PVFS2_VFS_OP_MKDIR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.mkdir.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
+			       PVFS_TYPE_DIRECTORY, mode);
+
+	strncpy(new_op->upcall.req.mkdir.d_name,
+		dentry->d_name.name, PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Mkdir Got PVFS2 handle %pU on fsid %d\n",
+		     &new_op->downcall.resp.mkdir.refn.khandle,
+		     new_op->downcall.resp.mkdir.refn.fs_id);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "%s: failed with error code %d\n",
+			     __func__, ret);
+		goto out;
+	}
+
+	inode = pvfs2_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
+				&new_op->downcall.resp.mkdir.refn);
+	if (IS_ERR(inode)) {
+		gossip_err("*** Failed to allocate pvfs2 dir inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Assigned dir inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Inode (Directory) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	/*
+	 * NOTE: we have no good way to keep nlink consistent for directories
+	 * across clients; keep constant at 1.
+	 */
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+out:
+	op_release(new_op);
+	return ret;
+}
+
+static int pvfs2_rename(struct inode *old_dir,
+			struct dentry *old_dentry,
+			struct inode *new_dir,
+			struct dentry *new_dentry)
+{
+	struct pvfs2_kernel_op *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n",
+		     old_dentry->d_parent->d_name.name,
+		     old_dentry->d_name.name,
+		     new_dentry->d_parent->d_name.name,
+		     new_dentry->d_name.name,
+		     d_count(new_dentry));
+
+	new_op = op_alloc(PVFS2_VFS_OP_RENAME);
+	if (!new_op)
+		return -EINVAL;
+
+	new_op->upcall.req.rename.old_parent_refn = PVFS2_I(old_dir)->refn;
+	new_op->upcall.req.rename.new_parent_refn = PVFS2_I(new_dir)->refn;
+
+	strncpy(new_op->upcall.req.rename.d_old_name,
+		old_dentry->d_name.name,
+		PVFS2_NAME_LEN);
+	strncpy(new_op->upcall.req.rename.d_new_name,
+		new_dentry->d_name.name,
+		PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op,
+				"pvfs2_rename",
+				get_interruptible_flag(old_dentry->d_inode));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "pvfs2_rename: got downcall status %d\n",
+		     ret);
+
+	if (new_dentry->d_inode)
+		new_dentry->d_inode->i_ctime = CURRENT_TIME;
+
+	op_release(new_op);
+	return ret;
+}
+
+/* PVFS2 implementation of VFS inode operations for directories */
+struct inode_operations pvfs2_dir_inode_operations = {
+	.lookup = pvfs2_lookup,
+	.get_acl = pvfs2_get_acl,
+	.set_acl = pvfs2_set_acl,
+	.create = pvfs2_create,
+	.link = pvfs2_link,
+	.unlink = pvfs2_unlink,
+	.symlink = pvfs2_symlink,
+	.mkdir = pvfs2_mkdir,
+	.rmdir = pvfs2_unlink,
+	.mknod = pvfs2_mknod,
+	.rename = pvfs2_rename,
+	.setattr = pvfs2_setattr,
+	.getattr = pvfs2_getattr,
+	.setxattr = generic_setxattr,
+	.getxattr = generic_getxattr,
+	.removexattr = generic_removexattr,
+	.listxattr = pvfs2_listxattr,
+};
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
new file mode 100644
index 0000000..298a85e
--- /dev/null
+++ b/fs/orangefs/super.c
@@ -0,0 +1,548 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+#include <linux/parser.h>
+
+/* a cache for pvfs2-inode objects (i.e. pvfs2 inode private data) */
+static struct kmem_cache *pvfs2_inode_cache;
+
+/* list for storing pvfs2 specific superblocks in use */
+LIST_HEAD(pvfs2_superblocks);
+
+DEFINE_SPINLOCK(pvfs2_superblocks_lock);
+
+enum {
+	Opt_intr,
+	Opt_acl,
+
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{ Opt_acl,	"acl" },
+	{ Opt_intr,	"intr" },
+	{ Opt_err,	NULL }
+};
+
+
+static int parse_mount_options(struct super_block *sb, char *options,
+		int silent)
+{
+	struct pvfs2_sb_info_s *pvfs2_sb = PVFS2_SB(sb);
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+
+	sb->s_flags &= ~MS_POSIXACL;
+	pvfs2_sb->flags &= ~PVFS2_OPT_INTR;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_acl:
+			sb->s_flags |= MS_POSIXACL;
+			break;
+		case Opt_intr:
+			pvfs2_sb->flags |= PVFS2_OPT_INTR;
+			break;
+		default:
+			goto fail;
+		}
+	}
+
+	return 0;
+fail:
+	if (!silent)
+		gossip_err("Error: mount option [%s] is not supported.\n", p);
+	return -EINVAL;
+}
+
+static void pvfs2_inode_cache_ctor(void *req)
+{
+	struct pvfs2_inode_s *pvfs2_inode = req;
+
+	inode_init_once(&pvfs2_inode->vfs_inode);
+	init_rwsem(&pvfs2_inode->xattr_sem);
+
+	pvfs2_inode->vfs_inode.i_version = 1;
+}
+
+static struct inode *pvfs2_alloc_inode(struct super_block *sb)
+{
+	struct pvfs2_inode_s *pvfs2_inode;
+
+	pvfs2_inode = kmem_cache_alloc(pvfs2_inode_cache,
+				       PVFS2_CACHE_ALLOC_FLAGS);
+	if (pvfs2_inode == NULL) {
+		gossip_err("Failed to allocate pvfs2_inode\n");
+		return NULL;
+	}
+
+	/*
+	 * We want to clear everything except for rw_semaphore and the
+	 * vfs_inode.
+	 */
+	memset(&pvfs2_inode->refn.khandle, 0, 16);
+	pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL;
+	pvfs2_inode->last_failed_block_index_read = 0;
+	memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target));
+	pvfs2_inode->pinode_flags = 0;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_alloc_inode: allocated %p\n",
+		     &pvfs2_inode->vfs_inode);
+	return &pvfs2_inode->vfs_inode;
+}
+
+static void pvfs2_destroy_inode(struct inode *inode)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+			"%s: deallocated %p destroying inode %pU\n",
+			__func__, pvfs2_inode, get_khandle_from_ino(inode));
+
+	kmem_cache_free(pvfs2_inode_cache, pvfs2_inode);
+}
+
+/*
+ * NOTE: information filled in here is typically reflected in the
+ * output of the system command 'df'
+*/
+static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int ret = -ENOMEM;
+	struct pvfs2_kernel_op *new_op = NULL;
+	int flags = 0;
+	struct super_block *sb = NULL;
+
+	sb = dentry->d_sb;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_statfs: called on sb %p (fs_id is %d)\n",
+		     sb,
+		     (int)(PVFS2_SB(sb)->fs_id));
+
+	new_op = op_alloc(PVFS2_VFS_OP_STATFS);
+	if (!new_op)
+		return ret;
+	new_op->upcall.req.statfs.fs_id = PVFS2_SB(sb)->fs_id;
+
+	if (PVFS2_SB(sb)->flags & PVFS2_OPT_INTR)
+		flags = PVFS2_OP_INTERRUPTIBLE;
+
+	ret = service_operation(new_op, "pvfs2_statfs", flags);
+
+	if (new_op->downcall.status < 0)
+		goto out_op_release;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_statfs: got %ld blocks available | "
+		     "%ld blocks total | %ld block size\n",
+		     (long)new_op->downcall.resp.statfs.blocks_avail,
+		     (long)new_op->downcall.resp.statfs.blocks_total,
+		     (long)new_op->downcall.resp.statfs.block_size);
+
+	buf->f_type = sb->s_magic;
+	memcpy(&buf->f_fsid, &PVFS2_SB(sb)->fs_id, sizeof(buf->f_fsid));
+	buf->f_bsize = new_op->downcall.resp.statfs.block_size;
+	buf->f_namelen = PVFS2_NAME_LEN;
+
+	buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total;
+	buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+	buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+	buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total;
+	buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail;
+	buf->f_frsize = sb->s_blocksize;
+
+out_op_release:
+	op_release(new_op);
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_statfs: returning %d\n", ret);
+	return ret;
+}
+
+/*
+ * Remount as initiated by VFS layer.  We just need to reparse the mount
+ * options, no need to signal pvfs2-client-core about it.
+ */
+static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n");
+	return parse_mount_options(sb, data, 1);
+}
+
+/*
+ * Remount as initiated by pvfs2-client-core on restart.  This is used to
+ * repopulate mount information left from previous pvfs2-client-core.
+ *
+ * the idea here is that given a valid superblock, we're
+ * re-initializing the user space client with the initial mount
+ * information specified when the super block was first initialized.
+ * this is very different than the first initialization/creation of a
+ * superblock.  we use the special service_priority_operation to make
+ * sure that the mount gets ahead of any other pending operation that
+ * is waiting for servicing.  this means that the pvfs2-client won't
+ * fail to start several times for all other pending operations before
+ * the client regains all of the mount information from us.
+ * NOTE: this function assumes that the request_mutex is already acquired!
+ */
+int pvfs2_remount(struct super_block *sb)
+{
+	struct pvfs2_kernel_op *new_op;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount: called\n");
+
+	new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT);
+	if (!new_op)
+		return -ENOMEM;
+	strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server,
+		PVFS2_SB(sb)->devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Attempting PVFS2 Remount via host %s\n",
+		     new_op->upcall.req.fs_mount.pvfs2_config_server);
+
+	/*
+	 * we assume that the calling function has already acquire the
+	 * request_mutex to prevent other operations from bypassing
+	 * this one
+	 */
+	ret = service_operation(new_op, "pvfs2_remount",
+		PVFS2_OP_PRIORITY | PVFS2_OP_NO_SEMAPHORE);
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_remount: mount got return value of %d\n",
+		     ret);
+	if (ret == 0) {
+		/*
+		 * store the id assigned to this sb -- it's just a
+		 * short-lived mapping that the system interface uses
+		 * to map this superblock to a particular mount entry
+		 */
+		PVFS2_SB(sb)->id = new_op->downcall.resp.fs_mount.id;
+		PVFS2_SB(sb)->mount_pending = 0;
+	}
+
+	op_release(new_op);
+	return ret;
+}
+
+int fsid_key_table_initialize(void)
+{
+	return 0;
+}
+
+void fsid_key_table_finalize(void)
+{
+	return;
+}
+
+/* Called whenever the VFS dirties the inode in response to atime updates */
+static void pvfs2_dirty_inode(struct inode *inode, int flags)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_dirty_inode: %pU\n",
+		     get_khandle_from_ino(inode));
+	SetAtimeFlag(pvfs2_inode);
+}
+
+struct super_operations pvfs2_s_ops = {
+	.alloc_inode = pvfs2_alloc_inode,
+	.destroy_inode = pvfs2_destroy_inode,
+	.dirty_inode = pvfs2_dirty_inode,
+	.drop_inode = generic_delete_inode,
+	.statfs = pvfs2_statfs,
+	.remount_fs = pvfs2_remount_fs,
+	.show_options = generic_show_options,
+};
+
+struct dentry *pvfs2_fh_to_dentry(struct super_block *sb,
+				  struct fid *fid,
+				  int fh_len,
+				  int fh_type)
+{
+	PVFS_object_kref refn;
+
+	if (fh_len < 5 || fh_type > 2)
+		return NULL;
+
+	PVFS_khandle_from(&(refn.khandle), fid->raw, 16);
+	refn.fs_id = (u32) fid->raw[4];
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "fh_to_dentry: handle %pU, fs_id %d\n",
+		     &refn.khandle,
+		     refn.fs_id);
+
+	return d_obtain_alias(pvfs2_iget(sb, &refn));
+}
+
+int pvfs2_encode_fh(struct inode *inode,
+		    __u32 *fh,
+		    int *max_len,
+		    struct inode *parent)
+{
+	int len = parent ? 10 : 5;
+	int type = 1;
+	PVFS_object_kref refn;
+
+	if (*max_len < len) {
+		gossip_lerr("fh buffer is too small for encoding\n");
+		*max_len = len;
+		type = 255;
+		goto out;
+	}
+
+	refn = PVFS2_I(inode)->refn;
+	PVFS_khandle_to(&refn.khandle, fh, 16);
+	fh[4] = refn.fs_id;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Encoding fh: handle %pU, fsid %u\n",
+		     &refn.khandle,
+		     refn.fs_id);
+
+
+	if (parent) {
+		refn = PVFS2_I(parent)->refn;
+		PVFS_khandle_to(&refn.khandle, (char *) fh + 20, 16);
+		fh[9] = refn.fs_id;
+
+		type = 2;
+		gossip_debug(GOSSIP_SUPER_DEBUG,
+			     "Encoding parent: handle %pU, fsid %u\n",
+			     &refn.khandle,
+			     refn.fs_id);
+	}
+	*max_len = len;
+
+out:
+	return type;
+}
+
+static struct export_operations pvfs2_export_ops = {
+	.encode_fh = pvfs2_encode_fh,
+	.fh_to_dentry = pvfs2_fh_to_dentry,
+};
+
+int pvfs2_fill_sb(struct super_block *sb, void *data, int silent)
+{
+	int ret = -EINVAL;
+	struct inode *root = NULL;
+	struct dentry *root_dentry = NULL;
+	struct pvfs2_mount_sb_info_t *mount_sb_info =
+		(struct pvfs2_mount_sb_info_t *) data;
+	PVFS_object_kref root_object;
+
+	/* alloc and init our private pvfs2 sb info */
+	sb->s_fs_info =
+		kmalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS);
+	if (!PVFS2_SB(sb))
+		return -ENOMEM;
+	memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s));
+	PVFS2_SB(sb)->sb = sb;
+
+	PVFS2_SB(sb)->root_khandle = mount_sb_info->root_khandle;
+	PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id;
+	PVFS2_SB(sb)->id = mount_sb_info->id;
+
+	if (mount_sb_info->data) {
+		ret = parse_mount_options(sb, mount_sb_info->data,
+					  silent);
+		if (ret)
+			return ret;
+	}
+
+	/* Hang the xattr handlers off the superblock */
+	sb->s_xattr = pvfs2_xattr_handlers;
+	sb->s_magic = PVFS2_SUPER_MAGIC;
+	sb->s_op = &pvfs2_s_ops;
+	sb->s_d_op = &pvfs2_dentry_operations;
+
+	sb->s_blocksize = pvfs_bufmap_size_query();
+	sb->s_blocksize_bits = pvfs_bufmap_shift_query();
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+	root_object.khandle = PVFS2_SB(sb)->root_khandle;
+	root_object.fs_id = PVFS2_SB(sb)->fs_id;
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "get inode %pU, fsid %d\n",
+		     &root_object.khandle,
+		     root_object.fs_id);
+
+	root = pvfs2_iget(sb, &root_object);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Allocated root inode [%p] with mode %x\n",
+		     root,
+		     root->i_mode);
+
+	/* allocates and places root dentry in dcache */
+	root_dentry = d_make_root(root);
+	if (!root_dentry) {
+		iput(root);
+		return -ENOMEM;
+	}
+
+	sb->s_export_op = &pvfs2_export_ops;
+	sb->s_root = root_dentry;
+	return 0;
+}
+
+struct dentry *pvfs2_mount(struct file_system_type *fst,
+			   int flags,
+			   const char *devname,
+			   void *data)
+{
+	int ret = -EINVAL;
+	struct super_block *sb = ERR_PTR(-EINVAL);
+	struct pvfs2_kernel_op *new_op;
+	struct pvfs2_mount_sb_info_t mount_sb_info;
+	struct dentry *mnt_sb_d = ERR_PTR(-EINVAL);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_mount: called with devname %s\n",
+		     devname);
+
+	if (!devname) {
+		gossip_err("ERROR: device name not specified.\n");
+		return ERR_PTR(-EINVAL);
+	}
+	
+	new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT);
+	if (!new_op)
+		return ERR_PTR(-ENOMEM);
+
+	strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server,
+		devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Attempting PVFS2 Mount via host %s\n",
+		     new_op->upcall.req.fs_mount.pvfs2_config_server);
+
+	ret = service_operation(new_op, "pvfs2_mount", 0);
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_mount: mount got return value of %d\n", ret);
+	if (ret)
+		goto free_op;
+
+	if (new_op->downcall.resp.fs_mount.fs_id == PVFS_FS_ID_NULL) {
+		gossip_err("ERROR: Retrieved null fs_id\n");
+		ret = -EINVAL;
+		goto free_op;
+	}
+
+	/* fill in temporary structure passed to fill_sb method */
+	mount_sb_info.data = data;
+	mount_sb_info.root_khandle =
+		new_op->downcall.resp.fs_mount.root_khandle;
+	mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id;
+	mount_sb_info.id = new_op->downcall.resp.fs_mount.id;
+
+	/*
+	 * the mount_sb_info structure looks odd, but it's used because
+	 * the private sb info isn't allocated until we call
+	 * pvfs2_fill_sb, yet we have the info we need to fill it with
+	 * here.  so we store it temporarily and pass all of the info
+	 * to fill_sb where it's properly copied out
+	 */
+	mnt_sb_d = mount_nodev(fst,
+			       flags,
+			       (void *)&mount_sb_info,
+			       pvfs2_fill_sb);
+	if (IS_ERR(mnt_sb_d)) {
+		sb = ERR_CAST(mnt_sb_d);
+		goto free_op;
+	}
+
+	sb = mnt_sb_d->d_sb;
+
+	/*
+	 * on successful mount, store the devname and data
+	 * used
+	 */
+	strncpy(PVFS2_SB(sb)->devname,
+		devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	/* mount_pending must be cleared */
+	PVFS2_SB(sb)->mount_pending = 0;
+
+	/*
+	 * finally, add this sb to our list of known pvfs2
+	 * sb's
+	 */
+	add_pvfs2_sb(sb);
+	op_release(new_op);
+	return mnt_sb_d;
+
+free_op:
+	gossip_err("pvfs2_mount: mount request failed with %d\n", ret);
+	if (ret == -EINVAL) {
+		gossip_err("Ensure that all pvfs2-servers have the same FS configuration files\n");
+		gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n");
+	}
+
+	op_release(new_op);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_mount: returning dentry %p\n",
+		     mnt_sb_d);
+	return mnt_sb_d;
+}
+
+void pvfs2_kill_sb(struct super_block *sb)
+{
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_kill_sb: called\n");
+
+	/*
+	 * issue the unmount to userspace to tell it to remove the
+	 * dynamic mount info it has for this superblock
+	 */
+	pvfs2_unmount_sb(sb);
+
+	/* remove the sb from our list of pvfs2 specific sb's */
+	remove_pvfs2_sb(sb);
+
+	/* provided sb cleanup */
+	kill_anon_super(sb);
+
+	/* free the pvfs2 superblock private data */
+	kfree(PVFS2_SB(sb));
+}
+
+int pvfs2_inode_cache_initialize(void)
+{
+	pvfs2_inode_cache = kmem_cache_create("pvfs2_inode_cache",
+					      sizeof(struct pvfs2_inode_s),
+					      0,
+					      PVFS2_CACHE_CREATE_FLAGS,
+					      pvfs2_inode_cache_ctor);
+
+	if (!pvfs2_inode_cache) {
+		gossip_err("Cannot create pvfs2_inode_cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int pvfs2_inode_cache_finalize(void)
+{
+	kmem_cache_destroy(pvfs2_inode_cache);
+	return 0;
+}
diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c
new file mode 100644
index 0000000..7fed227
--- /dev/null
+++ b/fs/orangefs/symlink.c
@@ -0,0 +1,30 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+static void *pvfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *target =  PVFS2_I(dentry->d_inode)->link_target;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2: %s called on %s (target is %p)\n",
+		     __func__, (char *)dentry->d_name.name, target);
+
+	nd_set_link(nd, target);
+	return NULL;
+}
+
+struct inode_operations pvfs2_symlink_inode_operations = {
+	.readlink = generic_readlink,
+	.follow_link = pvfs2_follow_link,
+	.setattr = pvfs2_setattr,
+	.getattr = pvfs2_getattr,
+	.listxattr = pvfs2_listxattr,
+	.setxattr = generic_setxattr,
+};
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
new file mode 100644
index 0000000..d87301c
--- /dev/null
+++ b/fs/orangefs/xattr.c
@@ -0,0 +1,537 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS extended attribute operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+#define SYSTEM_PVFS2_KEY "system.pvfs2."
+#define SYSTEM_PVFS2_KEY_LEN 13
+
+/*
+ * this function returns
+ *   0 if the key corresponding to name is not meant to be printed as part
+ *     of a listxattr.
+ *   1 if the key corresponding to name is meant to be returned as part of
+ *     a listxattr.
+ * The ones that start SYSTEM_PVFS2_KEY are the ones to avoid printing.
+ */
+static int is_reserved_key(const char *key, size_t size)
+{
+
+	if (size < SYSTEM_PVFS2_KEY_LEN)
+		return 1;
+
+	return strncmp(key, SYSTEM_PVFS2_KEY, SYSTEM_PVFS2_KEY_LEN) ?
+		 1 :
+		 0 ;
+}
+
+static inline int convert_to_internal_xattr_flags(int setxattr_flags)
+{
+	int internal_flag = 0;
+
+	if (setxattr_flags & XATTR_REPLACE) {
+		/* Attribute must exist! */
+		internal_flag = PVFS_XATTR_REPLACE;
+	} else if (setxattr_flags & XATTR_CREATE) {
+		/* Attribute must not exist */
+		internal_flag = PVFS_XATTR_CREATE;
+	}
+	return internal_flag;
+}
+
+
+/*
+ * Tries to get a specified key's attributes of a given
+ * file into a user-specified buffer. Note that the getxattr
+ * interface allows for the users to probe the size of an
+ * extended attribute by passing in a value of 0 to size.
+ * Thus our return value is always the size of the attribute
+ * unless the key does not exist for the file and/or if
+ * there were errors in fetching the attribute value.
+ */
+ssize_t pvfs2_inode_getxattr(struct inode *inode, const char *prefix,
+		const char *name, void *buffer, size_t size)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op = NULL;
+	ssize_t ret = -ENOMEM;
+	ssize_t length = 0;
+	int fsuid;
+	int fsgid;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "%s: prefix %s name %s, buffer_size %zd\n",
+		     __func__, prefix, name, size);
+
+	if (name == NULL || (size > 0 && buffer == NULL)) {
+		gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n");
+		return -EINVAL;
+	}
+	if (size < 0 ||
+	    (strlen(name) + strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN) {
+		gossip_err("Invalid size (%d) or key length (%d)\n",
+			   (int)size,
+			   (int)(strlen(name) + strlen(prefix)));
+		return -EINVAL;
+	}
+
+	fsuid = from_kuid(current_user_ns(), current_fsuid());
+	fsgid = from_kgid(current_user_ns(), current_fsgid());
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "getxattr on inode %pU, name %s "
+		     "(uid %o, gid %o)\n",
+		     get_khandle_from_ino(inode),
+		     name,
+		     fsuid,
+		     fsgid);
+
+	down_read(&pvfs2_inode->xattr_sem);
+
+	new_op = op_alloc(PVFS2_VFS_OP_GETXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+	new_op->upcall.req.getxattr.refn = pvfs2_inode->refn;
+	ret = snprintf((char *)new_op->upcall.req.getxattr.key,
+		       PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
+
+	/*
+	 * NOTE: Although keys are meant to be NULL terminated textual
+	 * strings, I am going to explicitly pass the length just in case
+	 * we change this later on...
+	 */
+	new_op->upcall.req.getxattr.key_sz = ret + 1;
+
+	ret = service_operation(new_op, "pvfs2_inode_getxattr",
+				get_interruptible_flag(inode));
+	if (ret != 0) {
+		if (ret == -ENOENT) {
+			ret = -ENODATA;
+			gossip_debug(GOSSIP_XATTR_DEBUG,
+				     "pvfs2_inode_getxattr: inode %pU key %s"
+				     " does not exist!\n",
+				     get_khandle_from_ino(inode),
+				     (char *)new_op->upcall.req.getxattr.key);
+		}
+		goto out_release_op;
+	}
+
+	/*
+	 * Length returned includes null terminator.
+	 */
+	length = new_op->downcall.resp.getxattr.val_sz - 1;
+
+	/*
+	 * Just return the length of the queried attribute.
+	 */
+	if (size == 0) {
+		ret = length;
+		goto out_release_op;
+	}
+
+	/*
+	 * Check to see if key length is > provided buffer size.
+	 */
+	if (length > size) {
+		ret = -ERANGE;
+		goto out_release_op;
+	}
+
+	memset(buffer, 0, size);
+	memcpy(buffer, new_op->downcall.resp.getxattr.val, length);
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+	     "pvfs2_inode_getxattr: inode %pU "
+	     "key %s key_sz %d, val_len %d\n",
+	     get_khandle_from_ino(inode),
+	     (char *)new_op->
+		upcall.req.getxattr.key,
+		     (int)new_op->
+		upcall.req.getxattr.key_sz,
+	     (int)ret);
+
+	ret = length;
+
+out_release_op:
+	op_release(new_op);
+out_unlock:
+	up_read(&pvfs2_inode->xattr_sem);
+	return ret;
+}
+
+static int pvfs2_inode_removexattr(struct inode *inode,
+			    const char *prefix,
+			    const char *name,
+			    int flags)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op = NULL;
+	int ret = -ENOMEM;
+
+	down_write(&pvfs2_inode->xattr_sem);
+	new_op = op_alloc(PVFS2_VFS_OP_REMOVEXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+	new_op->upcall.req.removexattr.refn = pvfs2_inode->refn;
+	/*
+	 * NOTE: Although keys are meant to be NULL terminated
+	 * textual strings, I am going to explicitly pass the
+	 * length just in case we change this later on...
+	 */
+	ret = snprintf((char *)new_op->upcall.req.removexattr.key,
+		       PVFS_MAX_XATTR_NAMELEN,
+		       "%s%s",
+		       (prefix ? prefix : ""),
+		       name);
+	new_op->upcall.req.removexattr.key_sz = ret + 1;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "pvfs2_inode_removexattr: key %s, key_sz %d\n",
+		     (char *)new_op->upcall.req.removexattr.key,
+		     (int)new_op->upcall.req.removexattr.key_sz);
+
+	ret = service_operation(new_op,
+				"pvfs2_inode_removexattr",
+				get_interruptible_flag(inode));
+	if (ret == -ENOENT) {
+		/*
+		 * Request to replace a non-existent attribute is an error.
+		 */
+		if (flags & XATTR_REPLACE)
+			ret = -ENODATA;
+		else
+			ret = 0;
+	}
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "pvfs2_inode_removexattr: returning %d\n", ret);
+
+	op_release(new_op);
+out_unlock:
+	up_write(&pvfs2_inode->xattr_sem);
+	return ret;
+}
+
+/*
+ * Tries to set an attribute for a given key on a file.
+ *
+ * Returns a -ve number on error and 0 on success.  Key is text, but value
+ * can be binary!
+ */
+int pvfs2_inode_setxattr(struct inode *inode, const char *prefix,
+		const char *name, const void *value, size_t size, int flags)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	int internal_flag = 0;
+	int ret = -ENOMEM;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "%s: prefix %s, name %s, buffer_size %zd\n",
+		     __func__, prefix, name, size);
+
+	if (size < 0 ||
+	    size >= PVFS_MAX_XATTR_VALUELEN ||
+	    flags < 0) {
+		gossip_err("pvfs2_inode_setxattr: bogus values of size(%d), flags(%d)\n",
+			   (int)size,
+			   flags);
+		return -EINVAL;
+	}
+
+	if (name == NULL ||
+	    (size > 0 && value == NULL)) {
+		gossip_err("pvfs2_inode_setxattr: bogus NULL pointers!\n");
+		return -EINVAL;
+	}
+
+	internal_flag = convert_to_internal_xattr_flags(flags);
+
+	if (prefix) {
+		if (strlen(name) + strlen(prefix) >= PVFS_MAX_XATTR_NAMELEN) {
+			gossip_err
+			    ("pvfs2_inode_setxattr: bogus key size (%d)\n",
+			     (int)(strlen(name) + strlen(prefix)));
+			return -EINVAL;
+		}
+	} else {
+		if (strlen(name) >= PVFS_MAX_XATTR_NAMELEN) {
+			gossip_err
+			    ("pvfs2_inode_setxattr: bogus key size (%d)\n",
+			     (int)(strlen(name)));
+			return -EINVAL;
+		}
+	}
+
+	/* This is equivalent to a removexattr */
+	if (size == 0 && value == NULL) {
+		gossip_debug(GOSSIP_XATTR_DEBUG,
+			     "removing xattr (%s%s)\n",
+			     prefix,
+			     name);
+		return pvfs2_inode_removexattr(inode, prefix, name, flags);
+	}
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "setxattr on inode %pU, name %s\n",
+		     get_khandle_from_ino(inode),
+		     name);
+
+	down_write(&pvfs2_inode->xattr_sem);
+	new_op = op_alloc(PVFS2_VFS_OP_SETXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+
+	new_op->upcall.req.setxattr.refn = pvfs2_inode->refn;
+	new_op->upcall.req.setxattr.flags = internal_flag;
+	/*
+	 * NOTE: Although keys are meant to be NULL terminated textual
+	 * strings, I am going to explicitly pass the length just in
+	 * case we change this later on...
+	 */
+	ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key,
+		       PVFS_MAX_XATTR_NAMELEN,
+		       "%s%s",
+		       prefix, name);
+	new_op->upcall.req.setxattr.keyval.key_sz = ret + 1;
+	memcpy(new_op->upcall.req.setxattr.keyval.val, value, size);
+	new_op->upcall.req.setxattr.keyval.val[size] = '\0';
+	/* For some reason, val_sz should include the \0 at the end
+	 * as well.
+	 */
+	new_op->upcall.req.setxattr.keyval.val_sz = size + 1;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "pvfs2_inode_setxattr: key %s, key_sz %d "
+		     " value size %zd\n",
+		     (char *)new_op->upcall.req.setxattr.keyval.key,
+		     (int)new_op->upcall.req.setxattr.keyval.key_sz,
+		     size + 1);
+
+	ret = service_operation(new_op,
+				"pvfs2_inode_setxattr",
+				get_interruptible_flag(inode));
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "pvfs2_inode_setxattr: returning %d\n",
+		     ret);
+
+	/* when request is serviced properly, free req op struct */
+	op_release(new_op);
+out_unlock:
+	up_write(&pvfs2_inode->xattr_sem);
+	return ret;
+}
+
+/*
+ * Tries to get a specified object's keys into a user-specified buffer of a
+ * given size.  Note that like the previous instances of xattr routines, this
+ * also allows you to pass in a NULL pointer and 0 size to probe the size for
+ * subsequent memory allocations. Thus our return value is always the size of
+ * all the keys unless there were errors in fetching the keys!
+ */
+ssize_t pvfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	uint64_t token = PVFS_ITERATE_START;
+	ssize_t ret = -ENOMEM;
+	ssize_t total = 0;
+	ssize_t length = 0;
+	int count_keys = 0;
+	int key_size;
+	int i = 0;
+
+	if (size > 0 && buffer == NULL) {
+		gossip_err("%s: bogus NULL pointers\n", __func__);
+		return -EINVAL;
+	}
+	if (size < 0) {
+		gossip_err("Invalid size (%d)\n", (int)size);
+		return -EINVAL;
+	}
+	
+
+	down_read(&pvfs2_inode->xattr_sem);
+	new_op = op_alloc(PVFS2_VFS_OP_LISTXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+	if (buffer && size > 0)
+		memset(buffer, 0, size);
+
+try_again:
+	key_size = 0;
+	new_op->upcall.req.listxattr.refn = pvfs2_inode->refn;
+	new_op->upcall.req.listxattr.token = token;
+	new_op->upcall.req.listxattr.requested_count =
+	    (size == 0) ? 0 : PVFS_MAX_XATTR_LISTLEN;
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+	if (ret != 0)
+		goto done;
+
+	if (size == 0) {
+		/*
+		 * This is a bit of a big upper limit, but I did not want to
+		 * spend too much time getting this correct, since users end
+		 * up allocating memory rather than us...
+		 */
+		total = new_op->downcall.resp.listxattr.returned_count *
+			PVFS_MAX_XATTR_NAMELEN;
+		goto done;
+	}
+
+	length = new_op->downcall.resp.listxattr.keylen;
+	if (length == 0)
+		goto done;
+
+	/*
+	 * Check to see how much can be fit in the buffer. Fit only whole keys.
+	 */
+	for (i = 0; i < new_op->downcall.resp.listxattr.returned_count; i++) {
+		if (total + new_op->downcall.resp.listxattr.lengths[i] > size)
+			goto done;
+
+		/*
+		 * Since many dumb programs try to setxattr() on our reserved
+		 * xattrs this is a feeble attempt at defeating those by not
+		 * listing them in the output of listxattr.. sigh
+		 */
+		if (is_reserved_key(new_op->downcall.resp.listxattr.key + key_size,
+				new_op->downcall.resp.listxattr.lengths[i])) {
+			gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n",
+					i, new_op->downcall.resp.listxattr.key +
+						key_size);
+			memcpy(buffer + total,
+				new_op->downcall.resp.listxattr.key + key_size,
+				new_op->downcall.resp.listxattr.lengths[i]);
+			total += new_op->downcall.resp.listxattr.lengths[i];
+			count_keys++;
+		} else {
+			gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n",
+					i, new_op->downcall.resp.listxattr.key +
+						key_size);
+		}
+		key_size += new_op->downcall.resp.listxattr.lengths[i];
+	}
+
+	/*
+	 * Since the buffer was large enough, we might have to continue
+	 * fetching more keys!
+	 */
+	token = new_op->downcall.resp.listxattr.token;
+	if (token != PVFS_ITERATE_END)
+		goto try_again;
+
+done:
+	gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d"
+		     " [size of buffer %ld] (filled in %d keys)\n",
+		     __func__,
+		     ret ? (int)ret : (int)total,
+		     (long)size,
+		     count_keys);
+	op_release(new_op);
+	if (ret == 0)
+		ret = total;
+out_unlock:
+	up_read(&pvfs2_inode->xattr_sem);
+	return ret;
+}
+
+int pvfs2_xattr_set_default(struct dentry *dentry,
+			    const char *name,
+			    const void *buffer,
+			    size_t size,
+			    int flags,
+			    int handler_flags)
+{
+	return pvfs2_inode_setxattr(dentry->d_inode,
+				    PVFS2_XATTR_NAME_DEFAULT_PREFIX,
+				    name,
+				    buffer,
+				    size,
+				    flags);
+}
+
+int pvfs2_xattr_get_default(struct dentry *dentry,
+			    const char *name,
+			    void *buffer,
+			    size_t size,
+			    int handler_flags)
+{
+	return pvfs2_inode_getxattr(dentry->d_inode,
+				    PVFS2_XATTR_NAME_DEFAULT_PREFIX,
+				    name,
+				    buffer,
+				    size);
+
+}
+
+static int pvfs2_xattr_set_trusted(struct dentry *dentry,
+			    const char *name,
+			    const void *buffer,
+			    size_t size,
+			    int flags,
+			    int handler_flags)
+{
+	return pvfs2_inode_setxattr(dentry->d_inode,
+				    PVFS2_XATTR_NAME_TRUSTED_PREFIX,
+				    name,
+				    buffer,
+				    size,
+				    flags);
+}
+
+static int pvfs2_xattr_get_trusted(struct dentry *dentry,
+			    const char *name,
+			    void *buffer,
+			    size_t size,
+			    int handler_flags)
+{
+	return pvfs2_inode_getxattr(dentry->d_inode,
+				    PVFS2_XATTR_NAME_TRUSTED_PREFIX,
+				    name,
+				    buffer,
+				    size);
+}
+
+static struct xattr_handler pvfs2_xattr_trusted_handler = {
+	.prefix = PVFS2_XATTR_NAME_TRUSTED_PREFIX,
+	.get = pvfs2_xattr_get_trusted,
+	.set = pvfs2_xattr_set_trusted,
+};
+
+static struct xattr_handler pvfs2_xattr_default_handler = {
+	/*
+	 * NOTE: this is set to be the empty string.
+	 * so that all un-prefixed xattrs keys get caught
+	 * here!
+	 */
+	.prefix = PVFS2_XATTR_NAME_DEFAULT_PREFIX,
+	.get = pvfs2_xattr_get_default,
+	.set = pvfs2_xattr_set_default,
+};
+
+const struct xattr_handler *pvfs2_xattr_handlers[] = {
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+	&pvfs2_xattr_trusted_handler,
+	&pvfs2_xattr_default_handler,
+	NULL
+};
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html