[PATCH V2 3/5] Orangefs: hooks and call-outs

Mike Marshall <hubcap@xxxxxxxxxxxx> · Tue, 20 Jan 2015 15:51:49 -0500

From: Mike Marshall <hubcap@xxxxxxxxxxxx>

Signed-off-by: Mike Marshall <hubcap@xxxxxxxxxxxx>
---
 fs/orangefs/acl.c         | 176 +++++++++
 fs/orangefs/dir.c         | 395 ++++++++++++++++++++
 fs/orangefs/inode.c       | 468 ++++++++++++++++++++++++
 fs/orangefs/namei.c       | 473 ++++++++++++++++++++++++
 fs/orangefs/pvfs2-utils.c | 914 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/super.c       | 548 +++++++++++++++++++++++++++
 fs/orangefs/symlink.c     |  30 ++
 7 files changed, 3004 insertions(+)
 create mode 100644 fs/orangefs/acl.c
 create mode 100644 fs/orangefs/dir.c
 create mode 100644 fs/orangefs/inode.c
 create mode 100644 fs/orangefs/namei.c
 create mode 100644 fs/orangefs/pvfs2-utils.c
 create mode 100644 fs/orangefs/super.c
 create mode 100644 fs/orangefs/symlink.c

diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
new file mode 100644
index 0000000..af18cf1
--- /dev/null
+++ b/fs/orangefs/acl.c
@@ -0,0 +1,176 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/fs_struct.h>
+
+struct posix_acl *pvfs2_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	int ret;
+	char *key = NULL, *value = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		key = PVFS2_XATTR_NAME_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		key = PVFS2_XATTR_NAME_ACL_DEFAULT;
+		break;
+	default:
+		gossip_err("pvfs2_get_acl: bogus value of type %d\n", type);
+		return ERR_PTR(-EINVAL);
+	}
+	/*
+	 * Rather than incurring a network call just to determine the exact
+	 * length of the attribute, I just allocate a max length to save on
+	 * the network call. Conceivably, we could pass NULL to
+	 * pvfs2_inode_getxattr() to probe the length of the value, but
+	 * I don't do that for now.
+	 */
+	value = kmalloc(PVFS_MAX_XATTR_VALUELEN, GFP_KERNEL);
+	if (value == NULL) {
+		gossip_err("pvfs2_get_acl: Could not allocate value ptr\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "inode %pU, key %s, type %d\n",
+		     get_khandle_from_ino(inode),
+		     key,
+		     type);
+	ret = pvfs2_inode_getxattr(inode,
+				   "",
+				   key,
+				   value,
+				   PVFS_MAX_XATTR_VALUELEN);
+	/* if the key exists, convert it to an in-memory rep */
+	if (ret > 0) {
+		acl = posix_acl_from_xattr(&init_user_ns, value, ret);
+	} else if (ret == -ENODATA || ret == -ENOSYS) {
+		acl = NULL;
+	} else {
+		gossip_err("inode %pU retrieving acl's failed with error %d\n",
+			   get_khandle_from_ino(inode),
+			   ret);
+		acl = ERR_PTR(ret);
+	}
+	/* kfree(NULL) is safe, so don't worry if value ever got used */
+	kfree(value);
+	return acl;
+}
+
+int pvfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	int error = 0;
+	void *value = NULL;
+	size_t size = 0;
+	const char *name = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = PVFS2_XATTR_NAME_ACL_ACCESS;
+		if (acl) {
+			umode_t mode = inode->i_mode;
+			/*
+			 * can we represent this with the traditional file
+			 * mode permission bits?
+			 */
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0) {
+				gossip_err("%s: posix_acl_equiv_mode err: %d\n",
+					   __func__,
+					   error);
+				return error;
+			}
+
+			if (inode->i_mode != mode)
+				SetModeFlag(pvfs2_inode);
+			inode->i_mode = mode;
+			mark_inode_dirty_sync(inode);
+			if (error == 0)
+				acl = NULL;
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = PVFS2_XATTR_NAME_ACL_DEFAULT;
+		break;
+	default:
+		gossip_err("%s: invalid type %d!\n", __func__, type);
+		return -EINVAL;
+	}
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "%s: inode %pU, key %s type %d\n",
+		     __func__, get_khandle_from_ino(inode),
+		     name,
+		     type);
+
+	if (acl) {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+
+		error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+		if (error < 0)
+			goto out;
+	}
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "%s: name %s, value %p, size %zd, acl %p\n",
+		     __func__, name, value, size, acl);
+	/*
+	 * Go ahead and set the extended attribute now. NOTE: Suppose acl
+	 * was NULL, then value will be NULL and size will be 0 and that
+	 * will xlate to a removexattr. However, we don't want removexattr
+	 * complain if attributes does not exist.
+	 */
+	error = pvfs2_inode_setxattr(inode, "", name, value, size, 0);
+
+out:
+	kfree(value);
+	if (!error)
+		set_cached_acl(inode, type, acl);
+	return error;
+}
+
+int pvfs2_init_acl(struct inode *inode, struct inode *dir)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct posix_acl *default_acl, *acl;
+	umode_t mode = inode->i_mode;
+	int error = 0;
+
+	ClearModeFlag(pvfs2_inode);
+
+	error = posix_acl_create(dir, &mode, &default_acl, &acl);
+	if (error)
+		return error;
+
+	if (default_acl) {
+		error = pvfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		posix_acl_release(default_acl);
+	}
+
+	if (acl) {
+		if (!error)
+			error = pvfs2_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		posix_acl_release(acl);
+	}
+
+	/* If mode of the inode was changed, then do a forcible ->setattr */
+	if (mode != inode->i_mode) {
+		SetModeFlag(pvfs2_inode);
+		inode->i_mode = mode;
+		pvfs2_flush_inode(inode);
+	}
+
+	return error;
+}
diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
new file mode 100644
index 0000000..91d1e62
--- /dev/null
+++ b/fs/orangefs/dir.c
@@ -0,0 +1,395 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+struct readdir_handle_t {
+	int buffer_index;
+	struct pvfs2_readdir_response readdir_response;
+	void *dents_buf;
+};
+
+/*
+ * decode routine needed by kmod to make sense of the shared page for readdirs.
+ */
+static long decode_dirents(char *ptr, struct pvfs2_readdir_response *readdir)
+{
+	int i;
+	struct pvfs2_readdir_response *rd =
+		(struct pvfs2_readdir_response *) ptr;
+	char *buf = ptr;
+	char **pptr = &buf;
+
+	readdir->token = rd->token;
+	readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount;
+	readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount *
+					sizeof(*readdir->dirent_array),
+					GFP_KERNEL);
+	if (readdir->dirent_array == NULL)
+		return -ENOMEM;
+	*pptr += offsetof(struct pvfs2_readdir_response, dirent_array);
+	for (i = 0; i < readdir->pvfs_dirent_outcount; i++) {
+		dec_string(pptr, &readdir->dirent_array[i].d_name,
+			   &readdir->dirent_array[i].d_length);
+		readdir->dirent_array[i].khandle =
+			*(struct pvfs2_khandle *) *pptr;
+		*pptr += 16;
+	}
+	return (unsigned long)*pptr - (unsigned long)ptr;
+}
+
+static long readdir_handle_ctor(struct readdir_handle_t *rhandle, void *buf,
+				int buffer_index)
+{
+	long ret;
+
+	if (buf == NULL) {
+		gossip_err
+		    ("Invalid NULL buffer specified in readdir_handle_ctor\n");
+		return -ENOMEM;
+	}
+	if (buffer_index < 0) {
+		gossip_err
+		    ("Invalid buffer index specified in readdir_handle_ctor\n");
+		return -EINVAL;
+	}
+	rhandle->buffer_index = buffer_index;
+	rhandle->dents_buf = buf;
+	ret = decode_dirents(buf, &rhandle->readdir_response);
+	if (ret < 0) {
+		gossip_err("Could not decode readdir from buffer %ld\n", ret);
+		rhandle->buffer_index = -1;
+		gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf);
+		vfree(buf);
+		rhandle->dents_buf = NULL;
+	}
+	return ret;
+}
+
+static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap,
+		struct readdir_handle_t *rhandle)
+{
+	if (rhandle == NULL)
+		return;
+
+	/* kfree(NULL) is safe */
+	kfree(rhandle->readdir_response.dirent_array);
+	rhandle->readdir_response.dirent_array = NULL;
+
+	if (rhandle->buffer_index >= 0) {
+		readdir_index_put(bufmap, rhandle->buffer_index);
+		rhandle->buffer_index = -1;
+	}
+	if (rhandle->dents_buf) {
+		gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n",
+			     rhandle->dents_buf);
+		vfree(rhandle->dents_buf);
+		rhandle->dents_buf = NULL;
+	}
+	return;
+}
+
+/*
+ * Read directory entries from an instance of an open directory.
+ *
+ * \note This routine was converted for the readdir to iterate change
+ *       in "struct file_operations". "converted" mostly amounts to
+ *       changing occurrences of "readdir" and "filldir" in the
+ *       comments to "iterate" and "dir_emit". Also filldir calls
+ *       were changed to dir_emit calls.
+ *
+ * \param dir_emit callback function called for each entry read.
+ *
+ * \retval <0 on error
+ * \retval 0  when directory has been completely traversed
+ * \retval >0 if we don't call dir_emit for all entries
+ *
+ * \note If the dir_emit call-back returns non-zero, then iterate should
+ *       assume that it has had enough, and should return as well.
+ */
+static int pvfs2_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct pvfs2_bufmap *bufmap = NULL;
+	int ret = 0;
+	int buffer_index;
+	uint64_t *ptoken = file->private_data;
+	uint64_t pos = 0;
+	ino_t ino = 0;
+	struct dentry *dentry = file->f_path.dentry;
+	struct pvfs2_kernel_op *new_op = NULL;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode);
+	int buffer_full = 0;
+	struct readdir_handle_t rhandle;
+	int i = 0;
+	int len = 0;
+	ino_t current_ino = 0;
+	char *current_entry = NULL;
+	long bytes_decoded;
+
+	gossip_ldebug(GOSSIP_DIR_DEBUG,
+		      "%s: ctx->pos:%lld, token = %llu\n",
+		      __func__,
+		      lld(ctx->pos),
+		      llu(*ptoken));
+
+	pos = (uint64_t) ctx->pos;
+
+	/* are we done? */
+	if (pos == PVFS_READDIR_END) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "Skipping to termination path\n");
+		return 0;
+	}
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "pvfs2_readdir called on %s (pos=%llu)\n",
+		     dentry->d_name.name, llu(pos));
+
+	rhandle.buffer_index = -1;
+	rhandle.dents_buf = NULL;
+	memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response));
+
+	new_op = op_alloc(PVFS2_VFS_OP_READDIR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->uses_shared_memory = 1;
+	new_op->upcall.req.readdir.refn = pvfs2_inode->refn;
+	new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR;
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
+		     __func__,
+		     &new_op->upcall.req.readdir.refn.khandle);
+
+	/*
+	 * NOTE: the position we send to the readdir upcall is out of
+	 * sync with ctx->pos since:
+	 * 1. pvfs2 doesn't include the "." and ".." entries that are
+	 *    added below.
+	 * 2. the introduction of distributed directory logic makes token no
+	 *    longer be related to f_pos and pos. Instead an independent
+	 *    variable is used inside the function and stored in the
+	 *    private_data of the file structure.
+	 */
+	new_op->upcall.req.readdir.token = *ptoken;
+
+get_new_buffer_index:
+	ret = readdir_index_get(&bufmap, &buffer_index);
+	if (ret < 0) {
+		gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n",
+			    ret);
+		goto out_free_op;
+	}
+	new_op->upcall.req.readdir.buf_index = buffer_index;
+
+	ret = service_operation(new_op,
+				"pvfs2_readdir",
+				get_interruptible_flag(dentry->d_inode));
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "Readdir downcall status is %d.  ret:%d\n",
+		     new_op->downcall.status,
+		     ret);
+
+	if (ret == -EAGAIN && op_state_purged(new_op)) {
+		/*
+		 * readdir shared memory aread has been wiped due to
+		 * pvfs2-client-core restarting, so we must get a new
+		 * index into the shared memory.
+		 */
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			"%s: Getting new buffer_index for retry of readdir..\n",
+			 __func__);
+		readdir_index_put(bufmap, buffer_index);
+		goto get_new_buffer_index;
+	}
+
+	if (ret == -EIO && op_state_purged(new_op)) {
+		gossip_err("%s: Client is down. Aborting readdir call.\n",
+			__func__);
+		readdir_index_put(bufmap, buffer_index);
+		goto out_free_op;
+	}
+
+	if (ret < 0 || new_op->downcall.status != 0) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "Readdir request failed.  Status:%d\n",
+			     new_op->downcall.status);
+		readdir_index_put(bufmap, buffer_index);
+		if (ret >= 0)
+			ret = new_op->downcall.status;
+		goto out_free_op;
+	}
+
+	bytes_decoded =
+		readdir_handle_ctor(&rhandle,
+				    new_op->downcall.trailer_buf,
+				    buffer_index);
+	if (bytes_decoded < 0) {
+		gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n",
+			ret);
+		ret = bytes_decoded;
+		readdir_index_put(bufmap, buffer_index);
+		goto out_free_op;
+	}
+
+	if (bytes_decoded != new_op->downcall.trailer_size) {
+		gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n",
+			bytes_decoded,
+			(long)new_op->downcall.trailer_size);
+		ret = -EINVAL;
+		goto out_destroy_handle;
+	}
+
+	if (pos == 0) {
+		ino = get_ino_from_khandle(dentry->d_inode);
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: calling dir_emit of \".\" with pos = %llu\n",
+			     __func__,
+			     llu(pos));
+		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
+		if (ret < 0)
+			goto out_destroy_handle;
+		ctx->pos++;
+		gossip_ldebug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+		pos++;
+	}
+
+	if (pos == 1) {
+		ino = get_parent_ino_from_dentry(dentry);
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
+			     __func__,
+			     llu(pos));
+		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
+		if (ret < 0)
+			goto out_destroy_handle;
+		ctx->pos++;
+		gossip_ldebug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+		pos++;
+	}
+
+	for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) {
+		len = rhandle.readdir_response.dirent_array[i].d_length;
+		current_entry = rhandle.readdir_response.dirent_array[i].d_name;
+		current_ino = pvfs2_khandle_to_ino(
+			&(rhandle.readdir_response.dirent_array[i].khandle));
+
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "calling dir_emit for %s with len %d, pos %ld\n",
+			     current_entry,
+			     len,
+			     (unsigned long)pos);
+		ret =
+		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
+		if (ret < 0) {
+			gossip_debug(GOSSIP_DIR_DEBUG,
+				     "dir_emit() failed. ret:%d\n",
+				     ret);
+			if (i < 2) {
+				gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n");
+				gossip_err("Duplicate entries may appear.\n");
+			}
+			buffer_full = 1;
+			break;
+		}
+		ctx->pos++;
+		gossip_ldebug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+
+		pos++;
+	}
+
+	/* this means that all of the dir_emit calls succeeded */
+	if (i == rhandle.readdir_response.pvfs_dirent_outcount) {
+		/* update token */
+		*ptoken = rhandle.readdir_response.token;
+	} else {
+		/* this means a dir_emit call failed */
+		if (rhandle.readdir_response.token == PVFS_READDIR_END) {
+			/*
+			 * If PVFS hit end of directory, then there
+			 * is no way to do math on the token that it
+			 * returned. Instead we go by ctx->pos but
+			 * back up to account for the artificial .
+			 * and .. entries.
+			 */
+			ctx->pos -= 3;
+		} else {
+			/*
+			 * this means a dir_emit call failed. !!! need to set
+			 * back to previous ctx->pos, no middle value allowed
+			 */
+			pos -= (i - 1);
+			ctx->pos -= (i - 1);
+		}
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			"at least one dir_emit call failed. Setting ctx->pos to: %lld\n",
+			lld(ctx->pos));
+	}
+
+	/*
+	 * Did we hit the end of the directory?
+	 */
+	if (rhandle.readdir_response.token == PVFS_READDIR_END &&
+	    !buffer_full) {
+		gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n");
+		ctx->pos = PVFS_READDIR_END;
+	}
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "pos = %llu, token = %llu"
+		     ", ctx->pos should have been %lld\n",
+		     llu(pos),
+		     llu(*ptoken),
+		     lld(ctx->pos));
+
+out_destroy_handle:
+	readdir_handle_dtor(bufmap, &rhandle);
+out_free_op:
+	op_release(new_op);
+	gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret);
+	return ret;
+}
+
+static int pvfs2_dir_open(struct inode *inode, struct file *file)
+{
+	uint64_t *ptoken;
+
+	file->private_data = kmalloc(sizeof(uint64_t), GFP_KERNEL);
+	if (!file->private_data)
+		return -ENOMEM;
+
+	ptoken = file->private_data;
+	*ptoken = PVFS_READDIR_START;
+	return 0;
+}
+
+static int pvfs2_dir_release(struct inode *inode, struct file *file)
+{
+	pvfs2_flush_inode(inode);
+	kfree(file->private_data);
+	return 0;
+}
+
+/** PVFS2 implementation of VFS directory operations */
+const struct file_operations pvfs2_dir_operations = {
+	.read = generic_read_dir,
+	.iterate = pvfs2_readdir,
+	.open = pvfs2_dir_open,
+	.release = pvfs2_dir_release,
+};
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
new file mode 100644
index 0000000..5da2a20
--- /dev/null
+++ b/fs/orangefs/inode.c
@@ -0,0 +1,468 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS inode operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+static int read_one_page(struct page *page)
+{
+	void *page_data;
+	int ret;
+	int max_block;
+	ssize_t bytes_read = 0;
+	struct inode *inode = page->mapping->host;
+	const uint32_t blocksize = PAGE_CACHE_SIZE;	/* inode->i_blksize */
+	const uint32_t blockbits = PAGE_CACHE_SHIFT;	/* inode->i_blkbits */
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		    "pvfs2_readpage called with page %p\n",
+		     page);
+	page_data = pvfs2_kmap(page);
+
+	max_block = ((inode->i_size / blocksize) + 1);
+
+	if (page->index < max_block) {
+		loff_t blockptr_offset = (((loff_t) page->index) << blockbits);
+		bytes_read = pvfs2_inode_read(inode,
+					      page_data,
+					      blocksize,
+					      &blockptr_offset,
+					      inode->i_size);
+	}
+	/* only zero remaining unread portions of the page data */
+	if (bytes_read > 0)
+		memset(page_data + bytes_read, 0, blocksize - bytes_read);
+	else
+		memset(page_data, 0, blocksize);
+	/* takes care of potential aliasing */
+	flush_dcache_page(page);
+	if (bytes_read < 0) {
+		ret = bytes_read;
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		if (PageError(page))
+			ClearPageError(page);
+		ret = 0;
+	}
+	pvfs2_kunmap(page);
+	/* unlock the page after the ->readpage() routine completes */
+	unlock_page(page);
+	return ret;
+}
+
+static int pvfs2_readpage(struct file *file, struct page *page)
+{
+	return read_one_page(page);
+}
+
+static int pvfs2_readpages(struct file *file,
+			   struct address_space *mapping,
+			   struct list_head *pages,
+			   unsigned nr_pages)
+{
+	int page_idx;
+	int ret;
+
+	gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_readpages called\n");
+
+	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+		struct page *page;
+		page = list_entry(pages->prev, struct page, lru);
+		list_del(&page->lru);
+		if (!add_to_page_cache(page,
+				       mapping,
+				       page->index,
+				       GFP_KERNEL)) {
+			ret = read_one_page(page);
+			gossip_debug(GOSSIP_INODE_DEBUG,
+				"failure adding page to cache, read_one_page returned: %d\n",
+				ret);
+	      } else {
+			page_cache_release(page);
+	      }
+	}
+	BUG_ON(!list_empty(pages));
+	return 0;
+}
+
+static void pvfs2_invalidatepage(struct page *page,
+				 unsigned int offset,
+				 unsigned int length)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_invalidatepage called on page %p "
+		     "(offset is %u)\n",
+		     page,
+		     offset);
+
+	ClearPageUptodate(page);
+	ClearPageMappedToDisk(page);
+	return;
+
+}
+
+static int pvfs2_releasepage(struct page *page, gfp_t foo)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_releasepage called on page %p\n",
+		     page);
+	return 0;
+}
+
+/*
+ * Having a direct_IO entry point in the address_space_operations
+ * struct causes the kernel to allows us to use O_DIRECT on
+ * open. Nothing will ever call this thing, but in the future we
+ * will need to be able to use O_DIRECT on open in order to support
+ * AIO. Modeled after NFS, they do this too.
+ */
+/*
+static ssize_t pvfs2_direct_IO(int rw,
+			struct kiocb *iocb,
+			struct iov_iter *iter,
+                        loff_t offset)
+{
+        gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_direct_IO: %s\n",
+                      iocb->ki_filp->f_path.dentry->d_name.name);
+
+        return -EINVAL;
+}
+*/
+
+struct backing_dev_info pvfs2_backing_dev_info = {
+	.name = "pvfs2",
+	.ra_pages = 0,
+	.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+/** PVFS2 implementation of address space operations */
+const struct address_space_operations pvfs2_address_operations = {
+	.readpage = pvfs2_readpage,
+	.readpages = pvfs2_readpages,
+	.invalidatepage = pvfs2_invalidatepage,
+	.releasepage = pvfs2_releasepage,
+/*	.direct_IO = pvfs2_direct_IO */
+};
+
+static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	loff_t orig_size = i_size_read(inode);
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
+		     __func__,
+		     get_khandle_from_ino(inode),
+		     &pvfs2_inode->refn.khandle,
+		     pvfs2_inode->refn.fs_id,
+		     iattr->ia_size);
+
+	truncate_setsize(inode, iattr->ia_size);
+
+	new_op = op_alloc(PVFS2_VFS_OP_TRUNCATE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.truncate.refn = pvfs2_inode->refn;
+	new_op->upcall.req.truncate.size = (int64_t) iattr->ia_size;
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+
+	/*
+	 * the truncate has no downcall members to retrieve, but
+	 * the status value tells us if it went through ok or not
+	 */
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2: pvfs2_truncate got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Only change the c/mtime if we are changing the size or we are
+	 * explicitly asked to change it.  This handles the semantic difference
+	 * between truncate() and ftruncate() as implemented in the VFS.
+	 *
+	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+	 * special case where we need to update the times despite not having
+	 * these flags set.  For all other operations the VFS set these flags
+	 * explicitly if it wants a timestamp update.
+	 */
+	if (orig_size != i_size_read(inode) &&
+	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
+		iattr->ia_ctime = iattr->ia_mtime =
+			current_fs_time(inode->i_sb);
+		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+	}
+
+	return ret;
+}
+
+/*
+ * Change attributes of an object referenced by dentry.
+ */
+int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	int ret = -EINVAL;
+	struct inode *inode = dentry->d_inode;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_setattr: called on %s\n",
+		     dentry->d_name.name);
+
+	ret = inode_change_ok(inode, iattr);
+	if (ret)
+		goto out;
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		ret = pvfs2_setattr_size(inode, iattr);
+		if (ret)
+			goto out;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+
+	ret = pvfs2_inode_setattr(inode, iattr);
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_setattr: inode_setattr returned %d\n",
+		     ret);
+
+	if (!ret && (iattr->ia_valid & ATTR_MODE))
+		/* change mod on a file that has ACLs */
+		ret = posix_acl_chmod(inode, inode->i_mode);
+
+out:
+	gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: returning %d\n", ret);
+	return ret;
+}
+
+/*
+ * Obtain attributes of an object given a dentry
+ */
+int pvfs2_getattr(struct vfsmount *mnt,
+		  struct dentry *dentry,
+		  struct kstat *kstat)
+{
+	int ret = -ENOENT;
+	struct inode *inode = dentry->d_inode;
+	struct pvfs2_inode_s *pvfs2_inode = NULL;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_getattr: called on %s\n",
+		     dentry->d_name.name);
+
+	/*
+	 * Similar to the above comment, a getattr also expects that all
+	 * fields/attributes of the inode would be refreshed. So again, we
+	 * dont have too much of a choice but refresh all the attributes.
+	 */
+	ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	if (ret == 0) {
+		generic_fillattr(inode, kstat);
+		/* override block size reported to stat */
+		pvfs2_inode = PVFS2_I(inode);
+		kstat->blksize = pvfs2_inode->blksize;
+	} else {
+		/* assume an I/O error and flag inode as bad */
+		gossip_debug(GOSSIP_INODE_DEBUG,
+			     "%s:%s:%d calling make bad inode\n",
+			     __FILE__,
+			     __func__,
+			     __LINE__);
+		pvfs2_make_bad_inode(inode);
+	}
+	return ret;
+}
+
+/* PVFS2 implementation of VFS inode operations for files */
+struct inode_operations pvfs2_file_inode_operations = {
+	.get_acl = pvfs2_get_acl,
+	.set_acl = pvfs2_set_acl,
+	.setattr = pvfs2_setattr,
+	.getattr = pvfs2_getattr,
+	.setxattr = generic_setxattr,
+	.getxattr = generic_getxattr,
+	.listxattr = pvfs2_listxattr,
+	.removexattr = generic_removexattr,
+};
+
+static int pvfs2_init_iops(struct inode *inode)
+{
+	inode->i_mapping->a_ops = &pvfs2_address_operations;
+	inode->i_mapping->backing_dev_info = &pvfs2_backing_dev_info;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &pvfs2_file_inode_operations;
+		inode->i_fop = &pvfs2_file_operations;
+		inode->i_blkbits = PAGE_CACHE_SHIFT;
+		break;
+	case S_IFLNK:
+		inode->i_op = &pvfs2_symlink_inode_operations;
+		break;
+	case S_IFDIR:
+		inode->i_op = &pvfs2_dir_inode_operations;
+		inode->i_fop = &pvfs2_dir_operations;
+		break;
+	default:
+		gossip_debug(GOSSIP_INODE_DEBUG,
+			     "%s: unsupported mode\n",
+			     __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Given a PVFS2 object identifier (fsid, handle), convert it into a ino_t type
+ * that will be used as a hash-index from where the handle will
+ * be searched for in the VFS hash table of inodes.
+ */
+static inline ino_t pvfs2_handle_hash(PVFS_object_kref *ref)
+{
+	if (!ref)
+		return 0;
+	return pvfs2_khandle_to_ino(&(ref->khandle));
+}
+
+/*
+ * Called to set up an inode from iget5_locked.
+ */
+static int pvfs2_set_inode(struct inode *inode, void *data)
+{
+	PVFS_object_kref *ref = (PVFS_object_kref *) data;
+	struct pvfs2_inode_s *pvfs2_inode = NULL;
+
+	/* Make sure that we have sane parameters */
+	if (!data || !inode)
+		return 0;
+	pvfs2_inode = PVFS2_I(inode);
+	if (!pvfs2_inode)
+		return 0;
+	pvfs2_inode->refn.fs_id = ref->fs_id;
+	pvfs2_inode->refn.khandle = ref->khandle;
+	return 0;
+}
+
+/*
+ * Called to determine if handles match.
+ */
+static int pvfs2_test_inode(struct inode *inode, void *data)
+{
+	PVFS_object_kref *ref = (PVFS_object_kref *) data;
+	struct pvfs2_inode_s *pvfs2_inode = NULL;
+
+	pvfs2_inode = PVFS2_I(inode);
+	return (!PVFS_khandle_cmp(&(pvfs2_inode->refn.khandle), &(ref->khandle))
+		&& pvfs2_inode->refn.fs_id == ref->fs_id);
+}
+
+/*
+ * Front-end to lookup the inode-cache maintained by the VFS using the PVFS2
+ * file handle.
+ *
+ * @sb: the file system super block instance.
+ * @ref: The PVFS2 object for which we are trying to locate an inode structure.
+ */
+struct inode *pvfs2_iget(struct super_block *sb, PVFS_object_kref *ref)
+{
+	struct inode *inode = NULL;
+	unsigned long hash;
+	int error;
+
+	hash = pvfs2_handle_hash(ref);
+	inode = iget5_locked(sb, hash, pvfs2_test_inode, pvfs2_set_inode, ref);
+	if (!inode || !(inode->i_state & I_NEW))
+		return inode;
+
+	error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	if (error) {
+		iget_failed(inode);
+		return ERR_PTR(error);
+	}
+
+	inode->i_ino = hash;	/* needed for stat etc */
+	pvfs2_init_iops(inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
+		     &ref->khandle,
+		     ref->fs_id,
+		     hash,
+		     inode->i_ino);
+
+	return inode;
+}
+
+/*
+ * Allocate an inode for a newly created file and insert it into the inode hash.
+ */
+struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir,
+		int mode, dev_t dev, PVFS_object_kref *ref)
+{
+	unsigned long hash = pvfs2_handle_hash(ref);
+	struct inode *inode;
+	int error;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2_get_custom_inode_common: called\n"
+		     "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
+		     sb,
+		     MAJOR(dev),
+		     MINOR(dev),
+		     mode);
+
+	inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+
+	pvfs2_set_inode(inode, ref);
+	inode->i_ino = hash;	/* needed for stat etc */
+
+	error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT);
+	if (error)
+		goto out_iput;
+
+	pvfs2_init_iops(inode);
+
+	inode->i_mode = mode;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_size = PAGE_CACHE_SIZE;
+	inode->i_rdev = dev;
+
+	error = insert_inode_locked4(inode, hash, pvfs2_test_inode, ref);
+	if (error < 0)
+		goto out_iput;
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "Initializing ACL's for inode %pU\n",
+		     get_khandle_from_ino(inode));
+	pvfs2_init_acl(inode, dir);
+	return inode;
+
+out_iput:
+	iput(inode);
+	return ERR_PTR(error);
+}
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
new file mode 100644
index 0000000..8f69dd2
--- /dev/null
+++ b/fs/orangefs/namei.c
@@ -0,0 +1,473 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS namei operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+
+/*
+ * Get a newly allocated inode to go with a negative dentry.
+ */
+static int pvfs2_create(struct inode *dir,
+			struct dentry *dentry,
+			umode_t mode,
+			bool exclusive)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
+
+	new_op = op_alloc(PVFS2_VFS_OP_CREATE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.create.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.create.attributes,
+			       PVFS_TYPE_METAFILE, mode);
+
+	strncpy(new_op->upcall.req.create.d_name,
+		dentry->d_name.name, PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Create Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.create.refn.khandle,
+		     new_op->downcall.resp.create.refn.fs_id, ret);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "%s: failed with error code %d\n",
+			     __func__, ret);
+		goto out;
+	}
+
+	inode = pvfs2_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
+				&new_op->downcall.resp.create.refn);
+	if (IS_ERR(inode)) {
+		gossip_err("*** Failed to allocate pvfs2 file inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Assigned file inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Inode (Regular File) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+	ret = 0;
+out:
+	op_release(new_op);
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: returning %d\n", __func__, ret);
+	return ret;
+}
+
+/*
+ * Attempt to resolve an object name (dentry->d_name), parent handle, and
+ * fsid into a handle for the object.
+ */
+static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry,
+				   unsigned int flags)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	struct dentry *res;
+	int ret = -EINVAL;
+
+	/*
+	 * in theory we could skip a lookup here (if the intent is to
+	 * create) in order to avoid a potentially failed lookup, but
+	 * leaving it in can skip a valid lookup and try to create a file
+	 * that already exists (e.g. the vfs already handles checking for
+	 * -EEXIST on O_EXCL opens, which is broken if we skip this lookup
+	 * in the create path)
+	 */
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
+		     __func__, dentry->d_name.name);
+
+	if (dentry->d_name.len > (PVFS2_NAME_LEN - 1))
+		return ERR_PTR(-ENAMETOOLONG);
+
+	new_op = op_alloc(PVFS2_VFS_OP_LOOKUP);
+	if (!new_op)
+		return ERR_PTR(-ENOMEM);
+
+	new_op->upcall.req.lookup.sym_follow = flags & LOOKUP_FOLLOW;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     &parent->refn.khandle);
+	new_op->upcall.req.lookup.parent_refn = parent->refn;
+
+	strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
+		PVFS2_NAME_LEN);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: doing lookup on %s under %pU,%d (follow=%s)\n",
+		     __func__,
+		     new_op->upcall.req.lookup.d_name,
+		     &new_op->upcall.req.lookup.parent_refn.khandle,
+		     new_op->upcall.req.lookup.parent_refn.fs_id,
+		     ((new_op->upcall.req.lookup.sym_follow ==
+		       PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no"));
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Lookup Got %pU, fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.lookup.refn.khandle,
+		     new_op->downcall.resp.lookup.refn.fs_id,
+		     ret);
+
+	if (ret < 0) {
+		if (ret == -ENOENT) {
+			/*
+			 * if no inode was found, add a negative dentry to
+			 * dcache anyway; if we don't, we don't hold expected
+			 * lookup semantics and we most noticeably break
+			 * during directory renames.
+			 *
+			 * however, if the operation failed or exited, do not
+			 * add the dentry (e.g. in the case that a touch is
+			 * issued on a file that already exists that was
+			 * interrupted during this lookup -- no need to add
+			 * another negative dentry for an existing file)
+			 */
+
+			gossip_debug(GOSSIP_NAME_DEBUG,
+				     "pvfs2_lookup: Adding *negative* dentry "
+				     "%p for %s\n",
+				     dentry,
+				     dentry->d_name.name);
+
+			d_add(dentry, NULL);
+			res = NULL;
+			goto out;
+		}
+
+		/* must be a non-recoverable error */
+		res = ERR_PTR(ret);
+		goto out;
+	}
+
+	inode = pvfs2_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+	if (IS_ERR(inode)) {
+		gossip_debug(GOSSIP_NAME_DEBUG,
+			"error %ld from iget\n", PTR_ERR(inode));
+		res = ERR_CAST(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s:%s:%d "
+		     "Found good inode [%lu] with count [%d]\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     inode->i_ino,
+		     (int)atomic_read(&inode->i_count));
+
+	/* update dentry/inode pair into dcache */
+	res = d_splice_alias(inode, dentry);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Lookup success (inode ct = %d)\n",
+		     (int)atomic_read(&inode->i_count));
+out:
+	op_release(new_op);
+	return res;
+}
+
+/* return 0 on success; non-zero otherwise */
+static int pvfs2_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: called on %s\n"
+		     "  (inode %pU): Parent is %pU | fs_id %d\n",
+		     __func__,
+		     dentry->d_name.name,
+		     get_khandle_from_ino(inode),
+		     &parent->refn.khandle,
+		     parent->refn.fs_id);
+
+	new_op = op_alloc(PVFS2_VFS_OP_REMOVE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.remove.parent_refn = parent->refn;
+	strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
+		PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, "pvfs2_unlink",
+				get_interruptible_flag(inode));
+
+	/* when request is serviced properly, free req op struct */
+	op_release(new_op);
+
+	if (!ret) {
+		drop_nlink(inode);
+
+		SetMtimeFlag(parent);
+		dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+		mark_inode_dirty_sync(dir);
+	}
+	return ret;
+}
+
+/*
+ * pvfs2_link() is only implemented here to make sure that we return a
+ * reasonable error code (the kernel will return a misleading EPERM
+ * otherwise).  PVFS2 does not support hard links.
+ */
+static int pvfs2_link(struct dentry *old_dentry,
+		      struct inode *dir,
+		      struct dentry *dentry)
+{
+	return -EOPNOTSUPP;
+}
+
+/*
+ * pvfs2_mknod() is only implemented here to make sure that we return a
+ * reasonable error code (the kernel will return a misleading EPERM
+ * otherwise).  PVFS2 does not support special files such as fifos or devices.
+ */
+static int pvfs2_mknod(struct inode *dir,
+		       struct dentry *dentry,
+		       umode_t mode,
+		       dev_t rdev)
+{
+	return -EOPNOTSUPP;
+}
+
+static int pvfs2_symlink(struct inode *dir,
+			 struct dentry *dentry,
+			 const char *symname)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	int mode = 755;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
+
+	if (!symname)
+		return -EINVAL;
+
+	new_op = op_alloc(PVFS2_VFS_OP_SYMLINK);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.sym.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
+			       PVFS_TYPE_SYMLINK,
+			       mode);
+
+	strncpy(new_op->upcall.req.sym.entry_name,
+		dentry->d_name.name,
+		PVFS2_NAME_LEN);
+	strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Symlink Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.sym.refn.khandle,
+		     new_op->downcall.resp.sym.refn.fs_id, ret);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			    "%s: failed with error code %d\n",
+			    __func__, ret);
+		goto out;
+	}
+
+	inode = pvfs2_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
+				&new_op->downcall.resp.sym.refn);
+	if (IS_ERR(inode)) {
+		gossip_err
+		    ("*** Failed to allocate pvfs2 symlink inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Assigned symlink inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Inode (Symlink) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+	ret = 0;
+out:
+	op_release(new_op);
+	return ret;
+}
+
+static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct pvfs2_inode_s *parent = PVFS2_I(dir);
+	struct pvfs2_kernel_op *new_op;
+	struct inode *inode;
+	int ret;
+
+	new_op = op_alloc(PVFS2_VFS_OP_MKDIR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.mkdir.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
+			       PVFS_TYPE_DIRECTORY, mode);
+
+	strncpy(new_op->upcall.req.mkdir.d_name,
+		dentry->d_name.name, PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Mkdir Got PVFS2 handle %pU on fsid %d\n",
+		     &new_op->downcall.resp.mkdir.refn.khandle,
+		     new_op->downcall.resp.mkdir.refn.fs_id);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "%s: failed with error code %d\n",
+			     __func__, ret);
+		goto out;
+	}
+
+	inode = pvfs2_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
+				&new_op->downcall.resp.mkdir.refn);
+	if (IS_ERR(inode)) {
+		gossip_err("*** Failed to allocate pvfs2 dir inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Assigned dir inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Inode (Directory) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	/*
+	 * NOTE: we have no good way to keep nlink consistent for directories
+	 * across clients; keep constant at 1.
+	 */
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+out:
+	op_release(new_op);
+	return ret;
+}
+
+static int pvfs2_rename(struct inode *old_dir,
+			struct dentry *old_dentry,
+			struct inode *new_dir,
+			struct dentry *new_dentry)
+{
+	struct pvfs2_kernel_op *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n",
+		     old_dentry->d_parent->d_name.name,
+		     old_dentry->d_name.name,
+		     new_dentry->d_parent->d_name.name,
+		     new_dentry->d_name.name,
+		     d_count(new_dentry));
+
+	new_op = op_alloc(PVFS2_VFS_OP_RENAME);
+	if (!new_op)
+		return -EINVAL;
+
+	new_op->upcall.req.rename.old_parent_refn = PVFS2_I(old_dir)->refn;
+	new_op->upcall.req.rename.new_parent_refn = PVFS2_I(new_dir)->refn;
+
+	strncpy(new_op->upcall.req.rename.d_old_name,
+		old_dentry->d_name.name,
+		PVFS2_NAME_LEN);
+	strncpy(new_op->upcall.req.rename.d_new_name,
+		new_dentry->d_name.name,
+		PVFS2_NAME_LEN);
+
+	ret = service_operation(new_op,
+				"pvfs2_rename",
+				get_interruptible_flag(old_dentry->d_inode));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "pvfs2_rename: got downcall status %d\n",
+		     ret);
+
+	if (new_dentry->d_inode)
+		new_dentry->d_inode->i_ctime = CURRENT_TIME;
+
+	op_release(new_op);
+	return ret;
+}
+
+/* PVFS2 implementation of VFS inode operations for directories */
+struct inode_operations pvfs2_dir_inode_operations = {
+	.lookup = pvfs2_lookup,
+	.get_acl = pvfs2_get_acl,
+	.set_acl = pvfs2_set_acl,
+	.create = pvfs2_create,
+	.link = pvfs2_link,
+	.unlink = pvfs2_unlink,
+	.symlink = pvfs2_symlink,
+	.mkdir = pvfs2_mkdir,
+	.rmdir = pvfs2_unlink,
+	.mknod = pvfs2_mknod,
+	.rename = pvfs2_rename,
+	.setattr = pvfs2_setattr,
+	.getattr = pvfs2_getattr,
+	.setxattr = generic_setxattr,
+	.getxattr = generic_getxattr,
+	.removexattr = generic_removexattr,
+	.listxattr = pvfs2_listxattr,
+};
diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c
new file mode 100644
index 0000000..42c5f3f
--- /dev/null
+++ b/fs/orangefs/pvfs2-utils.c
@@ -0,0 +1,914 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-dev-proto.h"
+#include "pvfs2-bufmap.h"
+
+int32_t fsid_of_op(struct pvfs2_kernel_op *op)
+{
+	int32_t fsid = PVFS_FS_ID_NULL;
+	if (op) {
+		switch (op->upcall.type) {
+		case PVFS2_VFS_OP_FILE_IO:
+			fsid = op->upcall.req.io.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_LOOKUP:
+			fsid = op->upcall.req.lookup.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_CREATE:
+			fsid = op->upcall.req.create.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_GETATTR:
+			fsid = op->upcall.req.getattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_REMOVE:
+			fsid = op->upcall.req.remove.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_MKDIR:
+			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_READDIR:
+			fsid = op->upcall.req.readdir.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_SETATTR:
+			fsid = op->upcall.req.setattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_SYMLINK:
+			fsid = op->upcall.req.sym.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_RENAME:
+			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_STATFS:
+			fsid = op->upcall.req.statfs.fs_id;
+			break;
+		case PVFS2_VFS_OP_TRUNCATE:
+			fsid = op->upcall.req.truncate.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_MMAP_RA_FLUSH:
+			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_FS_UMOUNT:
+			fsid = op->upcall.req.fs_umount.fs_id;
+			break;
+		case PVFS2_VFS_OP_GETXATTR:
+			fsid = op->upcall.req.getxattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_SETXATTR:
+			fsid = op->upcall.req.setxattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_LISTXATTR:
+			fsid = op->upcall.req.listxattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_REMOVEXATTR:
+			fsid = op->upcall.req.removexattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_FSYNC:
+			fsid = op->upcall.req.fsync.refn.fs_id;
+			break;
+		default:
+			break;
+		}
+	}
+	return fsid;
+}
+
+static void pvfs2_set_inode_flags(struct inode *inode,
+				  struct PVFS_sys_attr_s *attrs)
+{
+	if (attrs->flags & PVFS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+
+	if (attrs->flags & PVFS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+
+	if (attrs->flags & PVFS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+
+	return;
+}
+
+/* NOTE: symname is ignored unless the inode is a sym link */
+static int copy_attributes_to_inode(struct inode *inode,
+				    struct PVFS_sys_attr_s *attrs,
+				    char *symname)
+{
+	int ret = -1;
+	int perm_mode = 0;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	loff_t inode_size = 0;
+	loff_t rounded_up_size = 0;
+
+
+	/*
+	   arbitrarily set the inode block size; FIXME: we need to
+	   resolve the difference between the reported inode blocksize
+	   and the PAGE_CACHE_SIZE, since our block count will always
+	   be wrong.
+
+	   For now, we're setting the block count to be the proper
+	   number assuming the block size is 512 bytes, and the size is
+	   rounded up to the nearest 4K.  This is apparently required
+	   to get proper size reports from the 'du' shell utility.
+
+	   changing the inode->i_blkbits to something other than
+	   PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that.
+	 */
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "attrs->mask = %x (objtype = %s)\n",
+		     attrs->mask,
+		     attrs->objtype == PVFS_TYPE_METAFILE ? "file" :
+		     attrs->objtype == PVFS_TYPE_DIRECTORY ? "directory" :
+		     attrs->objtype == PVFS_TYPE_SYMLINK ? "symlink" :
+			"invalid/unknown");
+
+	switch (attrs->objtype) {
+	case PVFS_TYPE_METAFILE:
+		pvfs2_set_inode_flags(inode, attrs);
+		if (attrs->mask & PVFS_ATTR_SYS_SIZE) {
+			inode_size = (loff_t) attrs->size;
+			rounded_up_size =
+			    (inode_size + (4096 - (inode_size % 4096)));
+
+			pvfs2_lock_inode(inode);
+			inode->i_bytes = inode_size;
+			inode->i_blocks =
+			    (unsigned long)(rounded_up_size / 512);
+			pvfs2_unlock_inode(inode);
+
+			/*
+			 * NOTE: make sure all the places we're called
+			 * from have the inode->i_sem lock. We're fine
+			 * in 99% of the cases since we're mostly
+			 * called from a lookup.
+			 */
+			inode->i_size = inode_size;
+		}
+		break;
+	case PVFS_TYPE_SYMLINK:
+		if (symname != NULL) {
+			inode->i_size = (loff_t) strlen(symname);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
+		pvfs2_lock_inode(inode);
+		inode->i_bytes = PAGE_CACHE_SIZE;
+		inode->i_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512);
+		pvfs2_unlock_inode(inode);
+
+		inode->i_size = PAGE_CACHE_SIZE;
+		break;
+	}
+
+	inode->i_uid = make_kuid(&init_user_ns, attrs->owner);
+	inode->i_gid = make_kgid(&init_user_ns, attrs->group);
+	inode->i_atime.tv_sec = (time_t) attrs->atime;
+	inode->i_mtime.tv_sec = (time_t) attrs->mtime;
+	inode->i_ctime.tv_sec = (time_t) attrs->ctime;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_nsec = 0;
+
+	if (attrs->perms & PVFS_O_EXECUTE)
+		perm_mode |= S_IXOTH;
+	if (attrs->perms & PVFS_O_WRITE)
+		perm_mode |= S_IWOTH;
+	if (attrs->perms & PVFS_O_READ)
+		perm_mode |= S_IROTH;
+
+	if (attrs->perms & PVFS_G_EXECUTE)
+		perm_mode |= S_IXGRP;
+	if (attrs->perms & PVFS_G_WRITE)
+		perm_mode |= S_IWGRP;
+	if (attrs->perms & PVFS_G_READ)
+		perm_mode |= S_IRGRP;
+
+	if (attrs->perms & PVFS_U_EXECUTE)
+		perm_mode |= S_IXUSR;
+	if (attrs->perms & PVFS_U_WRITE)
+		perm_mode |= S_IWUSR;
+	if (attrs->perms & PVFS_U_READ)
+		perm_mode |= S_IRUSR;
+
+	if (attrs->perms & PVFS_G_SGID)
+		perm_mode |= S_ISGID;
+	if (attrs->perms & PVFS_U_SUID)
+		perm_mode |= S_ISUID;
+
+	inode->i_mode = perm_mode;
+
+	if (is_root_handle(inode)) {
+		/* special case: mark the root inode as sticky */
+		inode->i_mode |= S_ISVTX;
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "Marking inode %pU as sticky\n",
+			     get_khandle_from_ino(inode));
+	}
+
+	switch (attrs->objtype) {
+	case PVFS_TYPE_METAFILE:
+		inode->i_mode |= S_IFREG;
+		ret = 0;
+		break;
+	case PVFS_TYPE_DIRECTORY:
+		inode->i_mode |= S_IFDIR;
+		/* NOTE: we have no good way to keep nlink consistent
+		 * for directories across clients; keep constant at 1.
+		 * Why 1?  If we go with 2, then find(1) gets confused
+		 * and won't work properly withouth the -noleaf option
+		 */
+		set_nlink(inode, 1);
+		ret = 0;
+		break;
+	case PVFS_TYPE_SYMLINK:
+		inode->i_mode |= S_IFLNK;
+
+		/* copy link target to inode private data */
+		if (pvfs2_inode && symname) {
+			strncpy(pvfs2_inode->link_target,
+				symname,
+				PVFS_NAME_MAX);
+			gossip_debug(GOSSIP_UTILS_DEBUG,
+				     "Copied attr link target %s\n",
+				     pvfs2_inode->link_target);
+		}
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "symlink mode %o\n",
+			     inode->i_mode);
+		ret = 0;
+		break;
+	default:
+		gossip_err("pvfs2: copy_attributes_to_inode: got invalid attribute type %x\n",
+			attrs->objtype);
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n",
+		     inode->i_mode,
+		     (unsigned long)i_size_read(inode));
+
+	return ret;
+}
+
+/*
+ * NOTE: in kernel land, we never use the sys_attr->link_target for
+ * anything, so don't bother copying it into the sys_attr object here.
+ */
+static inline int copy_attributes_from_inode(struct inode *inode,
+					     struct PVFS_sys_attr_s *attrs,
+					     struct iattr *iattr)
+{
+	umode_t tmp_mode;
+
+	if (!iattr || !inode || !attrs) {
+		gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
+			   "in copy_attributes_from_inode!\n",
+			   iattr,
+			   inode,
+			   attrs);
+		return -EINVAL;
+	}
+	/*
+	 * We need to be careful to only copy the attributes out of the
+	 * iattr object that we know are valid.
+	 */
+	attrs->mask = 0;
+	if (iattr->ia_valid & ATTR_UID) {
+		attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid);
+		attrs->mask |= PVFS_ATTR_SYS_UID;
+		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
+	}
+	if (iattr->ia_valid & ATTR_GID) {
+		attrs->group = from_kgid(current_user_ns(), iattr->ia_gid);
+		attrs->mask |= PVFS_ATTR_SYS_GID;
+		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
+	}
+
+	if (iattr->ia_valid & ATTR_ATIME) {
+		attrs->mask |= PVFS_ATTR_SYS_ATIME;
+		if (iattr->ia_valid & ATTR_ATIME_SET) {
+			attrs->atime =
+			    pvfs2_convert_time_field((void *)&iattr->ia_atime);
+			attrs->mask |= PVFS_ATTR_SYS_ATIME_SET;
+		}
+	}
+	if (iattr->ia_valid & ATTR_MTIME) {
+		attrs->mask |= PVFS_ATTR_SYS_MTIME;
+		if (iattr->ia_valid & ATTR_MTIME_SET) {
+			attrs->mtime =
+			    pvfs2_convert_time_field((void *)&iattr->ia_mtime);
+			attrs->mask |= PVFS_ATTR_SYS_MTIME_SET;
+		}
+	}
+	if (iattr->ia_valid & ATTR_CTIME)
+		attrs->mask |= PVFS_ATTR_SYS_CTIME;
+
+	/*
+	 * PVFS2 cannot set size with a setattr operation.  Probably not likely
+	 * to be requested through the VFS, but just in case, don't worry about
+	 * ATTR_SIZE
+	 */
+
+	if (iattr->ia_valid & ATTR_MODE) {
+		tmp_mode = iattr->ia_mode;
+		if (tmp_mode & (S_ISVTX)) {
+			if (is_root_handle(inode)) {
+				/*
+				 * allow sticky bit to be set on root (since
+				 * it shows up that way by default anyhow),
+				 * but don't show it to the server
+				 */
+				tmp_mode -= S_ISVTX;
+			} else {
+				gossip_debug(GOSSIP_UTILS_DEBUG,
+					     "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
+				return -EINVAL;
+			}
+		}
+
+		if (tmp_mode & (S_ISUID)) {
+			gossip_debug(GOSSIP_UTILS_DEBUG,
+				     "Attempting to set setuid bit (not supported); returning EINVAL.\n");
+			return -EINVAL;
+		}
+
+		attrs->perms = PVFS_util_translate_mode(tmp_mode);
+		attrs->mask |= PVFS_ATTR_SYS_PERM;
+	}
+
+	return 0;
+}
+
+/*
+ * issues a pvfs2 getattr request and fills in the appropriate inode
+ * attributes if successful.  returns 0 on success; -errno otherwise
+ */
+int pvfs2_inode_getattr(struct inode *inode, uint32_t getattr_mask)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "%s: called on inode %pU\n",
+		     __func__,
+		     get_khandle_from_ino(inode));
+
+	new_op = op_alloc(PVFS2_VFS_OP_GETATTR);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.getattr.refn = pvfs2_inode->refn;
+	new_op->upcall.req.getattr.mask = getattr_mask;
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+	if (ret != 0)
+		goto out;
+
+	if (copy_attributes_to_inode(inode,
+			&new_op->downcall.resp.getattr.attributes,
+			new_op->downcall.resp.getattr.link_target)) {
+		gossip_err("%s: failed to copy attributes\n", __func__);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * Store blksize in pvfs2 specific part of inode structure; we are
+	 * only going to use this to report to stat to make sure it doesn't
+	 * perturb any inode related code paths.
+	 */
+	if (new_op->downcall.resp.getattr.attributes.objtype ==
+			PVFS_TYPE_METAFILE) {
+		pvfs2_inode->blksize =
+			new_op->downcall.resp.getattr.attributes.blksize;
+	} else {
+		/* mimic behavior of generic_fillattr() for other types. */
+		pvfs2_inode->blksize = (1 << inode->i_blkbits);
+
+	}
+
+out:
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Getattr on handle %pU, "
+		     "fsid %d\n  (inode ct = %d) returned %d\n",
+		     &pvfs2_inode->refn.khandle,
+		     pvfs2_inode->refn.fs_id,
+		     (int)atomic_read(&inode->i_count),
+		     ret);
+
+	op_release(new_op);
+	return ret;
+}
+
+/*
+ * issues a pvfs2 setattr request to make sure the new attribute values
+ * take effect if successful.  returns 0 on success; -errno otherwise
+ */
+int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	int ret;
+
+	new_op = op_alloc(PVFS2_VFS_OP_SETATTR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.setattr.refn = pvfs2_inode->refn;
+	ret = copy_attributes_from_inode(inode,
+		       &new_op->upcall.req.setattr.attributes,
+		       iattr);
+	if (ret < 0) {
+		op_release(new_op);
+		return ret;
+	}
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_inode_setattr: returning %d\n",
+		     ret);
+
+	/* when request is serviced properly, free req op struct */
+	op_release(new_op);
+
+	/*
+	 * successful setattr should clear the atime, mtime and
+	 * ctime flags.
+	 */
+	if (ret == 0) {
+		ClearAtimeFlag(pvfs2_inode);
+		ClearMtimeFlag(pvfs2_inode);
+		ClearCtimeFlag(pvfs2_inode);
+		ClearModeFlag(pvfs2_inode);
+	}
+
+	return ret;
+}
+
+int pvfs2_flush_inode(struct inode *inode)
+{
+	/*
+	 * If it is a dirty inode, this function gets called.
+	 * Gather all the information that needs to be setattr'ed
+	 * Right now, this will only be used for mode, atime, mtime
+	 * and/or ctime.
+	 */
+	struct iattr wbattr;
+	int ret;
+	int mtime_flag;
+	int ctime_flag;
+	int atime_flag;
+	int mode_flag;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+
+	memset(&wbattr, 0, sizeof(wbattr));
+
+	/*
+	 * check inode flags up front, and clear them if they are set.  This
+	 * will prevent multiple processes from all trying to flush the same
+	 * inode if they call close() simultaneously
+	 */
+	mtime_flag = MtimeFlag(pvfs2_inode);
+	ClearMtimeFlag(pvfs2_inode);
+	ctime_flag = CtimeFlag(pvfs2_inode);
+	ClearCtimeFlag(pvfs2_inode);
+	atime_flag = AtimeFlag(pvfs2_inode);
+	ClearAtimeFlag(pvfs2_inode);
+	mode_flag = ModeFlag(pvfs2_inode);
+	ClearModeFlag(pvfs2_inode);
+
+	/*  -- Lazy atime,mtime and ctime update --
+	 * Note: all times are dictated by server in the new scheme
+	 * and not by the clients
+	 *
+	 * Also mode updates are being handled now..
+	 */
+
+	if (mtime_flag)
+		wbattr.ia_valid |= ATTR_MTIME;
+	if (ctime_flag)
+		wbattr.ia_valid |= ATTR_CTIME;
+	if (atime_flag)
+		wbattr.ia_valid |= ATTR_ATIME;
+
+	if (mode_flag) {
+		wbattr.ia_mode = inode->i_mode;
+		wbattr.ia_valid |= ATTR_MODE;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "*********** pvfs2_flush_inode: %pU "
+		     "(ia_valid %d)\n",
+		     get_khandle_from_ino(inode),
+		     wbattr.ia_valid);
+	if (wbattr.ia_valid == 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "pvfs2_flush_inode skipping setattr()\n");
+		return 0;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_flush_inode (%pU) writing mode %o\n",
+		     get_khandle_from_ino(inode),
+		     inode->i_mode);
+
+	ret = pvfs2_inode_setattr(inode, &wbattr);
+
+	return ret;
+}
+
+int pvfs2_unmount_sb(struct super_block *sb)
+{
+	int ret = -EINVAL;
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_unmount_sb called on sb %p\n",
+		     sb);
+
+	new_op = op_alloc(PVFS2_VFS_OP_FS_UMOUNT);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.fs_umount.id = PVFS2_SB(sb)->id;
+	new_op->upcall.req.fs_umount.fs_id = PVFS2_SB(sb)->fs_id;
+	strncpy(new_op->upcall.req.fs_umount.pvfs2_config_server,
+		PVFS2_SB(sb)->devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Attempting PVFS2 Unmount via host %s\n",
+		     new_op->upcall.req.fs_umount.pvfs2_config_server);
+
+	ret = service_operation(new_op, "pvfs2_fs_umount", 0);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_unmount: got return value of %d\n", ret);
+	if (ret)
+		sb = ERR_PTR(ret);
+	else
+		PVFS2_SB(sb)->mount_pending = 1;
+
+	op_release(new_op);
+	return ret;
+}
+
+/*
+ * NOTE: on successful cancellation, be sure to return -EINTR, as
+ * that's the return value the caller expects
+ */
+int pvfs2_cancel_op_in_progress(uint64_t tag)
+{
+	int ret = -EINVAL;
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_cancel_op_in_progress called on tag %llu\n",
+		     llu(tag));
+
+	new_op = op_alloc(PVFS2_VFS_OP_CANCEL);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.cancel.op_tag = tag;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Attempting PVFS2 operation cancellation of tag %llu\n",
+		     llu(new_op->upcall.req.cancel.op_tag));
+
+	ret = service_operation(new_op, "pvfs2_cancel", PVFS2_OP_CANCELLATION);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_cancel_op_in_progress: got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+	return ret;
+}
+
+void pvfs2_op_initialize(struct pvfs2_kernel_op *op)
+{
+	if (op) {
+		spin_lock(&op->lock);
+		op->io_completed = 0;
+
+		op->upcall.type = PVFS2_VFS_OP_INVALID;
+		op->downcall.type = PVFS2_VFS_OP_INVALID;
+		op->downcall.status = -1;
+
+		op->op_state = OP_VFS_STATE_UNKNOWN;
+		op->tag = 0;
+		spin_unlock(&op->lock);
+	}
+}
+
+void pvfs2_make_bad_inode(struct inode *inode)
+{
+	if (is_root_handle(inode)) {
+		/*
+		 * if this occurs, the pvfs2-client-core was killed but we
+		 * can't afford to lose the inode operations and such
+		 * associated with the root handle in any case.
+		 */
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** NOT making bad root inode %pU\n",
+			     get_khandle_from_ino(inode));
+	} else {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** making bad inode %pU\n",
+			     get_khandle_from_ino(inode));
+		make_bad_inode(inode);
+	}
+}
+
+/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigmask */
+void mask_blocked_signals(sigset_t *orig_sigset)
+{
+	unsigned long sigallow = sigmask(SIGKILL);
+	unsigned long irqflags = 0;
+	struct k_sigaction *action = pvfs2_current_sigaction;
+
+	sigallow |= ((action[SIGINT - 1].sa.sa_handler == SIG_DFL) ?
+		     sigmask(SIGINT) :
+		     0);
+	sigallow |= ((action[SIGQUIT - 1].sa.sa_handler == SIG_DFL) ?
+		     sigmask(SIGQUIT) :
+		     0);
+
+	spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags);
+	*orig_sigset = current->blocked;
+	siginitsetinv(&current->blocked, sigallow & ~orig_sigset->sig[0]);
+	recalc_sigpending();
+	spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags);
+}
+
+/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigunmask */
+void unmask_blocked_signals(sigset_t *orig_sigset)
+{
+	unsigned long irqflags = 0;
+
+	spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags);
+	current->blocked = *orig_sigset;
+	recalc_sigpending();
+	spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags);
+}
+
+uint64_t pvfs2_convert_time_field(void *time_ptr)
+{
+	uint64_t pvfs2_time;
+	struct timespec *tspec = (struct timespec *)time_ptr;
+	pvfs2_time = (uint64_t) ((time_t) tspec->tv_sec);
+	return pvfs2_time;
+}
+
+/* macro defined in include/pvfs2-types.h */
+DECLARE_ERRNO_MAPPING_AND_FN();
+
+int pvfs2_normalize_to_errno(int32_t error_code)
+{
+	if (error_code > 0) {
+		gossip_err("pvfs2: error status receieved.\n");
+		gossip_err("pvfs2: assuming error code is inverted.\n");
+		error_code = -error_code;
+	}
+
+	/* convert any error codes that are in pvfs2 format */
+	if (IS_PVFS_NON_ERRNO_ERROR(-error_code)) {
+		if (PVFS_NON_ERRNO_ERROR_CODE(-error_code) == PVFS_ECANCEL) {
+			/*
+			 * cancellation error codes generally correspond to
+			 * a timeout from the client's perspective
+			 */
+			error_code = -ETIMEDOUT;
+		} else {
+			/* assume a default error code */
+			gossip_err("pvfs2: warning: got error code without errno equivalent: %d.\n",
+				   error_code);
+			error_code = -EINVAL;
+		}
+	} else if (IS_PVFS_ERROR(-error_code)) {
+		error_code = -PVFS_ERROR_TO_ERRNO(-error_code);
+	}
+	return error_code;
+}
+
+#define NUM_MODES 11
+int32_t PVFS_util_translate_mode(int mode)
+{
+	int ret = 0;
+	int i = 0;
+	static int modes[NUM_MODES] = {
+		S_IXOTH, S_IWOTH, S_IROTH,
+		S_IXGRP, S_IWGRP, S_IRGRP,
+		S_IXUSR, S_IWUSR, S_IRUSR,
+		S_ISGID, S_ISUID
+	};
+	static int pvfs2_modes[NUM_MODES] = {
+		PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ,
+		PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ,
+		PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ,
+		PVFS_G_SGID, PVFS_U_SUID
+	};
+
+	for (i = 0; i < NUM_MODES; i++)
+		if (mode & modes[i])
+			ret |= pvfs2_modes[i];
+
+	return ret;
+}
+#undef NUM_MODES
+
+static char *pvfs2_strtok(char *s, const char *toks)
+{
+	/* original string */
+	static char *in_string_p;
+	/* starting value of in_string_p during this iteration. */
+	char *this_string_p;
+	/* # of tokens */
+	uint32_t toks_len = strlen(toks);
+	/* index */
+	uint32_t i;
+
+	/* when s has a value, we are using a new input string */
+	if (s)
+		in_string_p = s;
+
+	/* set new starting position */
+	this_string_p = in_string_p;
+
+	/*
+	 * loop through the string until a token or end-of-string(null)
+	 * is found.
+	 */
+	for (; *in_string_p; in_string_p++)
+		/* Is character a token? */
+		for (i = 0; i < toks_len; i++)
+			if (*in_string_p == toks[i]) {
+				/*token found => end-of-word */
+				*in_string_p = 0;
+				in_string_p++;
+				return this_string_p;
+			}
+
+	if (*this_string_p == 0)
+		return NULL;
+
+	return this_string_p;
+}
+
+/*convert 64-bit debug mask into a readable string of keywords*/
+static int proc_mask_to_debug(struct __keyword_mask_t *mask_map,
+			      int num_mask_map,
+			      uint64_t mask,
+			      char *debug_string)
+{
+	unsigned int index = 0;
+	unsigned int i;
+
+	memset(debug_string, 0, PVFS2_MAX_DEBUG_STRING_LEN);
+
+	for (i = 0; i < num_mask_map; i++) {
+		if ((index + strlen(mask_map[i].keyword)) >=
+		    PVFS2_MAX_DEBUG_STRING_LEN)
+			return 0;
+
+		switch (mask_map[i].mask_val) {
+		case GOSSIP_NO_DEBUG:
+			if (mask == GOSSIP_NO_DEBUG) {
+				strcpy(debug_string, mask_map[i].keyword);
+				return 0;
+			}
+			break;
+		case GOSSIP_MAX_DEBUG:
+			if (mask == GOSSIP_MAX_DEBUG) {
+				strcpy(debug_string, mask_map[i].keyword);
+				return 0;
+			}
+			break;
+		default:
+			if ((mask & mask_map[i].mask_val) !=
+			    mask_map[i].mask_val)
+				/*mask does NOT contain the mask value */
+				break;
+
+			if (index != 0) {
+				/*
+				 * add comma for second and subsequent mask
+				 * keywords
+				 */
+				(debug_string[index]) = ',';
+				index++;
+			}
+
+			/*add keyword and slide index */
+			memcpy(&debug_string[index],
+			       mask_map[i].keyword,
+			       strlen(mask_map[i].keyword));
+			index += strlen(mask_map[i].keyword);
+		}
+	}
+
+	return 0;
+}
+
+static uint64_t proc_debug_to_mask(struct __keyword_mask_t *mask_map,
+				   int num_mask_map,
+				   const char *event_logging)
+{
+	uint64_t mask = 0;
+	char *s = NULL;
+	char *t = NULL;
+	const char *toks = ", ";
+	int i = 0;
+	int negate = 0;
+	int slen = 0;
+
+	if (event_logging) {
+		/* s = strdup(event_logging); */
+		slen = strlen(event_logging);
+		s = kmalloc(slen + 1, GFP_KERNEL);
+		if (!s)
+			return -ENOMEM;
+		memset(s, 0, slen + 1);
+		memcpy(s, event_logging, slen);
+
+		/* t = strtok(s, toks); */
+		t = pvfs2_strtok(s, toks);
+
+		while (t) {
+			if (*t == '-') {
+				negate = 1;
+				++t;
+			}
+
+			for (i = 0; i < num_mask_map; i++) {
+				if (!strcmp(t, mask_map[i].keyword)) {
+
+					if (negate)
+						mask &= ~mask_map[i].mask_val;
+					else
+						mask |= mask_map[i].mask_val;
+
+					break;
+				}
+			}
+			/* t = strtok(NULL, toks); */
+			t = pvfs2_strtok(NULL, toks);
+		}
+		kfree(s);
+	}
+	return mask;
+}
+
+/*
+ * Based on human readable keywords, translate them into
+ * a mask value appropriate for the debugging level desired.
+ * The 'computed' mask is returned; 0 if no keywords are
+ * present or recognized.  Unrecognized keywords are ignored when
+ * mixed with recognized keywords.
+ *
+ * Prefix a keyword with "-" to turn it off.  All keywords
+ * processed in specified order.
+ */
+uint64_t PVFS_proc_debug_eventlog_to_mask(const char *event_logging)
+{
+	return proc_debug_to_mask(s_keyword_mask_map,
+				  num_keyword_mask_map,
+				  event_logging);
+}
+
+uint64_t PVFS_proc_kmod_eventlog_to_mask(const char *event_logging)
+{
+	return proc_debug_to_mask(s_kmod_keyword_mask_map,
+				  num_kmod_keyword_mask_map,
+				  event_logging);
+}
+
+int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string)
+{
+	return proc_mask_to_debug(s_kmod_keyword_mask_map,
+				  num_kmod_keyword_mask_map,
+				  mask,
+				  debug_string);
+}
+
+int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string)
+{
+
+	return proc_mask_to_debug(s_keyword_mask_map,
+				  num_keyword_mask_map,
+				  mask,
+				  debug_string);
+}
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
new file mode 100644
index 0000000..298a85e
--- /dev/null
+++ b/fs/orangefs/super.c
@@ -0,0 +1,548 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+#include <linux/parser.h>
+
+/* a cache for pvfs2-inode objects (i.e. pvfs2 inode private data) */
+static struct kmem_cache *pvfs2_inode_cache;
+
+/* list for storing pvfs2 specific superblocks in use */
+LIST_HEAD(pvfs2_superblocks);
+
+DEFINE_SPINLOCK(pvfs2_superblocks_lock);
+
+enum {
+	Opt_intr,
+	Opt_acl,
+
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{ Opt_acl,	"acl" },
+	{ Opt_intr,	"intr" },
+	{ Opt_err,	NULL }
+};
+
+
+static int parse_mount_options(struct super_block *sb, char *options,
+		int silent)
+{
+	struct pvfs2_sb_info_s *pvfs2_sb = PVFS2_SB(sb);
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+
+	sb->s_flags &= ~MS_POSIXACL;
+	pvfs2_sb->flags &= ~PVFS2_OPT_INTR;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_acl:
+			sb->s_flags |= MS_POSIXACL;
+			break;
+		case Opt_intr:
+			pvfs2_sb->flags |= PVFS2_OPT_INTR;
+			break;
+		default:
+			goto fail;
+		}
+	}
+
+	return 0;
+fail:
+	if (!silent)
+		gossip_err("Error: mount option [%s] is not supported.\n", p);
+	return -EINVAL;
+}
+
+static void pvfs2_inode_cache_ctor(void *req)
+{
+	struct pvfs2_inode_s *pvfs2_inode = req;
+
+	inode_init_once(&pvfs2_inode->vfs_inode);
+	init_rwsem(&pvfs2_inode->xattr_sem);
+
+	pvfs2_inode->vfs_inode.i_version = 1;
+}
+
+static struct inode *pvfs2_alloc_inode(struct super_block *sb)
+{
+	struct pvfs2_inode_s *pvfs2_inode;
+
+	pvfs2_inode = kmem_cache_alloc(pvfs2_inode_cache,
+				       PVFS2_CACHE_ALLOC_FLAGS);
+	if (pvfs2_inode == NULL) {
+		gossip_err("Failed to allocate pvfs2_inode\n");
+		return NULL;
+	}
+
+	/*
+	 * We want to clear everything except for rw_semaphore and the
+	 * vfs_inode.
+	 */
+	memset(&pvfs2_inode->refn.khandle, 0, 16);
+	pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL;
+	pvfs2_inode->last_failed_block_index_read = 0;
+	memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target));
+	pvfs2_inode->pinode_flags = 0;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_alloc_inode: allocated %p\n",
+		     &pvfs2_inode->vfs_inode);
+	return &pvfs2_inode->vfs_inode;
+}
+
+static void pvfs2_destroy_inode(struct inode *inode)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+			"%s: deallocated %p destroying inode %pU\n",
+			__func__, pvfs2_inode, get_khandle_from_ino(inode));
+
+	kmem_cache_free(pvfs2_inode_cache, pvfs2_inode);
+}
+
+/*
+ * NOTE: information filled in here is typically reflected in the
+ * output of the system command 'df'
+*/
+static int pvfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int ret = -ENOMEM;
+	struct pvfs2_kernel_op *new_op = NULL;
+	int flags = 0;
+	struct super_block *sb = NULL;
+
+	sb = dentry->d_sb;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_statfs: called on sb %p (fs_id is %d)\n",
+		     sb,
+		     (int)(PVFS2_SB(sb)->fs_id));
+
+	new_op = op_alloc(PVFS2_VFS_OP_STATFS);
+	if (!new_op)
+		return ret;
+	new_op->upcall.req.statfs.fs_id = PVFS2_SB(sb)->fs_id;
+
+	if (PVFS2_SB(sb)->flags & PVFS2_OPT_INTR)
+		flags = PVFS2_OP_INTERRUPTIBLE;
+
+	ret = service_operation(new_op, "pvfs2_statfs", flags);
+
+	if (new_op->downcall.status < 0)
+		goto out_op_release;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_statfs: got %ld blocks available | "
+		     "%ld blocks total | %ld block size\n",
+		     (long)new_op->downcall.resp.statfs.blocks_avail,
+		     (long)new_op->downcall.resp.statfs.blocks_total,
+		     (long)new_op->downcall.resp.statfs.block_size);
+
+	buf->f_type = sb->s_magic;
+	memcpy(&buf->f_fsid, &PVFS2_SB(sb)->fs_id, sizeof(buf->f_fsid));
+	buf->f_bsize = new_op->downcall.resp.statfs.block_size;
+	buf->f_namelen = PVFS2_NAME_LEN;
+
+	buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total;
+	buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+	buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+	buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total;
+	buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail;
+	buf->f_frsize = sb->s_blocksize;
+
+out_op_release:
+	op_release(new_op);
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_statfs: returning %d\n", ret);
+	return ret;
+}
+
+/*
+ * Remount as initiated by VFS layer.  We just need to reparse the mount
+ * options, no need to signal pvfs2-client-core about it.
+ */
+static int pvfs2_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount_fs: called\n");
+	return parse_mount_options(sb, data, 1);
+}
+
+/*
+ * Remount as initiated by pvfs2-client-core on restart.  This is used to
+ * repopulate mount information left from previous pvfs2-client-core.
+ *
+ * the idea here is that given a valid superblock, we're
+ * re-initializing the user space client with the initial mount
+ * information specified when the super block was first initialized.
+ * this is very different than the first initialization/creation of a
+ * superblock.  we use the special service_priority_operation to make
+ * sure that the mount gets ahead of any other pending operation that
+ * is waiting for servicing.  this means that the pvfs2-client won't
+ * fail to start several times for all other pending operations before
+ * the client regains all of the mount information from us.
+ * NOTE: this function assumes that the request_mutex is already acquired!
+ */
+int pvfs2_remount(struct super_block *sb)
+{
+	struct pvfs2_kernel_op *new_op;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_remount: called\n");
+
+	new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT);
+	if (!new_op)
+		return -ENOMEM;
+	strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server,
+		PVFS2_SB(sb)->devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Attempting PVFS2 Remount via host %s\n",
+		     new_op->upcall.req.fs_mount.pvfs2_config_server);
+
+	/*
+	 * we assume that the calling function has already acquire the
+	 * request_mutex to prevent other operations from bypassing
+	 * this one
+	 */
+	ret = service_operation(new_op, "pvfs2_remount",
+		PVFS2_OP_PRIORITY | PVFS2_OP_NO_SEMAPHORE);
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_remount: mount got return value of %d\n",
+		     ret);
+	if (ret == 0) {
+		/*
+		 * store the id assigned to this sb -- it's just a
+		 * short-lived mapping that the system interface uses
+		 * to map this superblock to a particular mount entry
+		 */
+		PVFS2_SB(sb)->id = new_op->downcall.resp.fs_mount.id;
+		PVFS2_SB(sb)->mount_pending = 0;
+	}
+
+	op_release(new_op);
+	return ret;
+}
+
+int fsid_key_table_initialize(void)
+{
+	return 0;
+}
+
+void fsid_key_table_finalize(void)
+{
+	return;
+}
+
+/* Called whenever the VFS dirties the inode in response to atime updates */
+static void pvfs2_dirty_inode(struct inode *inode, int flags)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_dirty_inode: %pU\n",
+		     get_khandle_from_ino(inode));
+	SetAtimeFlag(pvfs2_inode);
+}
+
+struct super_operations pvfs2_s_ops = {
+	.alloc_inode = pvfs2_alloc_inode,
+	.destroy_inode = pvfs2_destroy_inode,
+	.dirty_inode = pvfs2_dirty_inode,
+	.drop_inode = generic_delete_inode,
+	.statfs = pvfs2_statfs,
+	.remount_fs = pvfs2_remount_fs,
+	.show_options = generic_show_options,
+};
+
+struct dentry *pvfs2_fh_to_dentry(struct super_block *sb,
+				  struct fid *fid,
+				  int fh_len,
+				  int fh_type)
+{
+	PVFS_object_kref refn;
+
+	if (fh_len < 5 || fh_type > 2)
+		return NULL;
+
+	PVFS_khandle_from(&(refn.khandle), fid->raw, 16);
+	refn.fs_id = (u32) fid->raw[4];
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "fh_to_dentry: handle %pU, fs_id %d\n",
+		     &refn.khandle,
+		     refn.fs_id);
+
+	return d_obtain_alias(pvfs2_iget(sb, &refn));
+}
+
+int pvfs2_encode_fh(struct inode *inode,
+		    __u32 *fh,
+		    int *max_len,
+		    struct inode *parent)
+{
+	int len = parent ? 10 : 5;
+	int type = 1;
+	PVFS_object_kref refn;
+
+	if (*max_len < len) {
+		gossip_lerr("fh buffer is too small for encoding\n");
+		*max_len = len;
+		type = 255;
+		goto out;
+	}
+
+	refn = PVFS2_I(inode)->refn;
+	PVFS_khandle_to(&refn.khandle, fh, 16);
+	fh[4] = refn.fs_id;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Encoding fh: handle %pU, fsid %u\n",
+		     &refn.khandle,
+		     refn.fs_id);
+
+
+	if (parent) {
+		refn = PVFS2_I(parent)->refn;
+		PVFS_khandle_to(&refn.khandle, (char *) fh + 20, 16);
+		fh[9] = refn.fs_id;
+
+		type = 2;
+		gossip_debug(GOSSIP_SUPER_DEBUG,
+			     "Encoding parent: handle %pU, fsid %u\n",
+			     &refn.khandle,
+			     refn.fs_id);
+	}
+	*max_len = len;
+
+out:
+	return type;
+}
+
+static struct export_operations pvfs2_export_ops = {
+	.encode_fh = pvfs2_encode_fh,
+	.fh_to_dentry = pvfs2_fh_to_dentry,
+};
+
+int pvfs2_fill_sb(struct super_block *sb, void *data, int silent)
+{
+	int ret = -EINVAL;
+	struct inode *root = NULL;
+	struct dentry *root_dentry = NULL;
+	struct pvfs2_mount_sb_info_t *mount_sb_info =
+		(struct pvfs2_mount_sb_info_t *) data;
+	PVFS_object_kref root_object;
+
+	/* alloc and init our private pvfs2 sb info */
+	sb->s_fs_info =
+		kmalloc(sizeof(struct pvfs2_sb_info_s), PVFS2_GFP_FLAGS);
+	if (!PVFS2_SB(sb))
+		return -ENOMEM;
+	memset(sb->s_fs_info, 0, sizeof(struct pvfs2_sb_info_s));
+	PVFS2_SB(sb)->sb = sb;
+
+	PVFS2_SB(sb)->root_khandle = mount_sb_info->root_khandle;
+	PVFS2_SB(sb)->fs_id = mount_sb_info->fs_id;
+	PVFS2_SB(sb)->id = mount_sb_info->id;
+
+	if (mount_sb_info->data) {
+		ret = parse_mount_options(sb, mount_sb_info->data,
+					  silent);
+		if (ret)
+			return ret;
+	}
+
+	/* Hang the xattr handlers off the superblock */
+	sb->s_xattr = pvfs2_xattr_handlers;
+	sb->s_magic = PVFS2_SUPER_MAGIC;
+	sb->s_op = &pvfs2_s_ops;
+	sb->s_d_op = &pvfs2_dentry_operations;
+
+	sb->s_blocksize = pvfs_bufmap_size_query();
+	sb->s_blocksize_bits = pvfs_bufmap_shift_query();
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+	root_object.khandle = PVFS2_SB(sb)->root_khandle;
+	root_object.fs_id = PVFS2_SB(sb)->fs_id;
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "get inode %pU, fsid %d\n",
+		     &root_object.khandle,
+		     root_object.fs_id);
+
+	root = pvfs2_iget(sb, &root_object);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Allocated root inode [%p] with mode %x\n",
+		     root,
+		     root->i_mode);
+
+	/* allocates and places root dentry in dcache */
+	root_dentry = d_make_root(root);
+	if (!root_dentry) {
+		iput(root);
+		return -ENOMEM;
+	}
+
+	sb->s_export_op = &pvfs2_export_ops;
+	sb->s_root = root_dentry;
+	return 0;
+}
+
+struct dentry *pvfs2_mount(struct file_system_type *fst,
+			   int flags,
+			   const char *devname,
+			   void *data)
+{
+	int ret = -EINVAL;
+	struct super_block *sb = ERR_PTR(-EINVAL);
+	struct pvfs2_kernel_op *new_op;
+	struct pvfs2_mount_sb_info_t mount_sb_info;
+	struct dentry *mnt_sb_d = ERR_PTR(-EINVAL);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_mount: called with devname %s\n",
+		     devname);
+
+	if (!devname) {
+		gossip_err("ERROR: device name not specified.\n");
+		return ERR_PTR(-EINVAL);
+	}
+	
+	new_op = op_alloc(PVFS2_VFS_OP_FS_MOUNT);
+	if (!new_op)
+		return ERR_PTR(-ENOMEM);
+
+	strncpy(new_op->upcall.req.fs_mount.pvfs2_config_server,
+		devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Attempting PVFS2 Mount via host %s\n",
+		     new_op->upcall.req.fs_mount.pvfs2_config_server);
+
+	ret = service_operation(new_op, "pvfs2_mount", 0);
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_mount: mount got return value of %d\n", ret);
+	if (ret)
+		goto free_op;
+
+	if (new_op->downcall.resp.fs_mount.fs_id == PVFS_FS_ID_NULL) {
+		gossip_err("ERROR: Retrieved null fs_id\n");
+		ret = -EINVAL;
+		goto free_op;
+	}
+
+	/* fill in temporary structure passed to fill_sb method */
+	mount_sb_info.data = data;
+	mount_sb_info.root_khandle =
+		new_op->downcall.resp.fs_mount.root_khandle;
+	mount_sb_info.fs_id = new_op->downcall.resp.fs_mount.fs_id;
+	mount_sb_info.id = new_op->downcall.resp.fs_mount.id;
+
+	/*
+	 * the mount_sb_info structure looks odd, but it's used because
+	 * the private sb info isn't allocated until we call
+	 * pvfs2_fill_sb, yet we have the info we need to fill it with
+	 * here.  so we store it temporarily and pass all of the info
+	 * to fill_sb where it's properly copied out
+	 */
+	mnt_sb_d = mount_nodev(fst,
+			       flags,
+			       (void *)&mount_sb_info,
+			       pvfs2_fill_sb);
+	if (IS_ERR(mnt_sb_d)) {
+		sb = ERR_CAST(mnt_sb_d);
+		goto free_op;
+	}
+
+	sb = mnt_sb_d->d_sb;
+
+	/*
+	 * on successful mount, store the devname and data
+	 * used
+	 */
+	strncpy(PVFS2_SB(sb)->devname,
+		devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	/* mount_pending must be cleared */
+	PVFS2_SB(sb)->mount_pending = 0;
+
+	/*
+	 * finally, add this sb to our list of known pvfs2
+	 * sb's
+	 */
+	add_pvfs2_sb(sb);
+	op_release(new_op);
+	return mnt_sb_d;
+
+free_op:
+	gossip_err("pvfs2_mount: mount request failed with %d\n", ret);
+	if (ret == -EINVAL) {
+		gossip_err("Ensure that all pvfs2-servers have the same FS configuration files\n");
+		gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n");
+	}
+
+	op_release(new_op);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "pvfs2_mount: returning dentry %p\n",
+		     mnt_sb_d);
+	return mnt_sb_d;
+}
+
+void pvfs2_kill_sb(struct super_block *sb)
+{
+	gossip_debug(GOSSIP_SUPER_DEBUG, "pvfs2_kill_sb: called\n");
+
+	/*
+	 * issue the unmount to userspace to tell it to remove the
+	 * dynamic mount info it has for this superblock
+	 */
+	pvfs2_unmount_sb(sb);
+
+	/* remove the sb from our list of pvfs2 specific sb's */
+	remove_pvfs2_sb(sb);
+
+	/* provided sb cleanup */
+	kill_anon_super(sb);
+
+	/* free the pvfs2 superblock private data */
+	kfree(PVFS2_SB(sb));
+}
+
+int pvfs2_inode_cache_initialize(void)
+{
+	pvfs2_inode_cache = kmem_cache_create("pvfs2_inode_cache",
+					      sizeof(struct pvfs2_inode_s),
+					      0,
+					      PVFS2_CACHE_CREATE_FLAGS,
+					      pvfs2_inode_cache_ctor);
+
+	if (!pvfs2_inode_cache) {
+		gossip_err("Cannot create pvfs2_inode_cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int pvfs2_inode_cache_finalize(void)
+{
+	kmem_cache_destroy(pvfs2_inode_cache);
+	return 0;
+}
diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c
new file mode 100644
index 0000000..7fed227
--- /dev/null
+++ b/fs/orangefs/symlink.c
@@ -0,0 +1,30 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+
+static void *pvfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *target =  PVFS2_I(dentry->d_inode)->link_target;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "pvfs2: %s called on %s (target is %p)\n",
+		     __func__, (char *)dentry->d_name.name, target);
+
+	nd_set_link(nd, target);
+	return NULL;
+}
+
+struct inode_operations pvfs2_symlink_inode_operations = {
+	.readlink = generic_readlink,
+	.follow_link = pvfs2_follow_link,
+	.setattr = pvfs2_setattr,
+	.getattr = pvfs2_getattr,
+	.listxattr = pvfs2_listxattr,
+	.setxattr = generic_setxattr,
+};
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html