[PATCH 19/39] reiserfs: journaled xattrs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



 Deadlocks are possible in the xattr code between the journal lock and the
 xattr sems.

 This patch implements journalling for xattr operations. The benefit is
 twofold:
 * It gets rid of the deadlock possibility by always ensuring that xattr
   write operations are initiated inside a transaction.
 * It corrects the problem where xattr backing files aren't considered any
   differently than normal files, despite the fact they are metadata.

 I discussed the added journal load with Chris Mason, and we decided that
 since xattrs (versus other journal activity) is fairly rare, the introduction
 of larger transactions to support journaled xattrs wouldn't be too big a deal.

Signed-off-by: Jeff Mahoney <jeffm@xxxxxxxx>

--
 fs/reiserfs/inode.c            |    3 -
 fs/reiserfs/namei.c            |   14 +----
 fs/reiserfs/xattr.c            |   52 +++++++++++++++++++-
 fs/reiserfs/xattr_acl.c        |  104 +++++++++++++++++++++++++++++++----------
 include/linux/reiserfs_acl.h   |    3 -
 include/linux/reiserfs_fs.h    |    4 +
 include/linux/reiserfs_xattr.h |   28 +++++++++++
 7 files changed, 168 insertions(+), 40 deletions(-)

--- a/fs/reiserfs/inode.c	2007-05-30 15:44:34.000000000 -0400
+++ b/fs/reiserfs/inode.c	2007-05-30 17:55:07.000000000 -0400
@@ -1920,9 +1920,8 @@ int reiserfs_new_inode(struct reiserfs_t
 		goto out_inserted_sd;
 	}
 
-	/* XXX CHECK THIS */
 	if (reiserfs_posixacl(inode->i_sb)) {
-		retval = reiserfs_inherit_default_acl(dir, dentry, inode);
+		retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
 		if (retval) {
 			err = retval;
 			reiserfs_check_path(&path_to_key);
--- a/fs/reiserfs/namei.c	2007-05-30 15:44:34.000000000 -0400
+++ b/fs/reiserfs/namei.c	2007-05-30 17:55:07.000000000 -0400
@@ -606,15 +606,13 @@ static int reiserfs_create(struct inode 
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
 	struct reiserfs_transaction_handle th;
-	int locked;
 
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
 	new_inode_init(inode, dir, mode);
 
-	locked = reiserfs_cache_default_acl(dir);
-
+	jbegin_count += reiserfs_cache_default_acl(dir);
 	reiserfs_write_lock(dir->i_sb);
 
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
@@ -668,7 +666,6 @@ static int reiserfs_mknod(struct inode *
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
-	int locked;
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -678,8 +675,7 @@ static int reiserfs_mknod(struct inode *
 	}
 	new_inode_init(inode, dir, mode);
 
-	locked = reiserfs_cache_default_acl(dir);
-
+	jbegin_count += reiserfs_cache_default_acl(dir);
 	reiserfs_write_lock(dir->i_sb);
 
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
@@ -736,7 +732,6 @@ static int reiserfs_mkdir(struct inode *
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
-	int locked;
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
@@ -748,8 +743,7 @@ static int reiserfs_mkdir(struct inode *
 	}
 	new_inode_init(inode, dir, mode);
 
-	locked = reiserfs_cache_default_acl(dir);
-
+	jbegin_count += reiserfs_cache_default_acl(dir);
 	reiserfs_write_lock(dir->i_sb);
 
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
@@ -1036,8 +1030,6 @@ static int reiserfs_symlink(struct inode
 	memcpy(name, symname, strlen(symname));
 	padd_item(name, item_len, strlen(symname));
 
-	/* We would inherit the default ACL here, but symlinks don't get ACLs */
-
 	retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
 	if (retval) {
 		drop_new_inode(inode);
--- a/fs/reiserfs/xattr.c	2007-05-30 15:44:34.000000000 -0400
+++ b/fs/reiserfs/xattr.c	2007-05-30 17:55:07.000000000 -0400
@@ -405,8 +405,9 @@ static inline __u32 xattr_hash(const cha
  * inode->i_mutex: down
  */
 int
-reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
-		   size_t buffer_size, int flags)
+reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
+                          struct inode *inode, const char *name,
+			  const void *buffer, size_t buffer_size, int flags)
 {
 	int err = 0;
 	struct file *fp;
@@ -532,6 +533,36 @@ reiserfs_xattr_set(struct inode *inode, 
 	return err;
 }
 
+/* We need to start a transaction to maintain lock ordering */
+int reiserfs_xattr_set(struct inode *inode, const char *name,
+                       const void *buffer, size_t buffer_size, int flags)
+{
+
+	struct reiserfs_transaction_handle th;
+	int error, error2;
+	size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size);
+
+	if (!(flags & XATTR_REPLACE))
+		jbegin_count += reiserfs_xattr_jcreate_nblocks(inode);
+
+	reiserfs_write_lock(inode->i_sb);
+	error = journal_begin(&th, inode->i_sb, jbegin_count);
+	if (error) {
+		reiserfs_write_unlock(inode->i_sb);
+		return error;
+	}
+
+	error = reiserfs_xattr_set_handle(&th, inode, name,
+	                                  buffer, buffer_size, flags);
+
+	error2 = journal_end(&th, inode->i_sb, jbegin_count);
+	if (error == 0)
+		error = error2;
+	reiserfs_write_unlock(inode->i_sb);
+
+	return error;
+}
+
 /*
  * inode->i_mutex: down
  */
@@ -713,11 +744,28 @@ int reiserfs_delete_xattrs(struct inode 
 
 	/* Leftovers besides . and .. -- that's not good. */
 	if (dir->d_inode->i_nlink <= 2) {
+		struct reiserfs_transaction_handle th;
+		int jbegin_count;
+		int jerr;
+
+		jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 *
+		               REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
+		reiserfs_write_lock(inode->i_sb);
+		err = journal_begin (&th, inode->i_sb, jbegin_count);
+		if (err) {
+			reiserfs_write_unlock(inode->i_sb);
+			unlock_kernel();
+			goto out_dir;
+		}
 		root = open_xa_root(inode->i_sb, XATTR_REPLACE);
 		mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_XATTR);
 		err = vfs_rmdir(root->d_inode, dir);
 		mutex_unlock(&root->d_inode->i_mutex);
+		jerr = journal_end (&th, inode->i_sb, jbegin_count);
+		reiserfs_write_unlock(inode->i_sb);
 		dput(root);
+		if (!err && jerr)
+			err = jerr;
 	} else {
 		reiserfs_warning(inode->i_sb, "jdm-20004",
 				 "Couldn't remove all entries in directory");
--- a/fs/reiserfs/xattr_acl.c	2007-05-30 15:44:34.000000000 -0400
+++ b/fs/reiserfs/xattr_acl.c	2007-05-30 15:44:48.000000000 -0400
@@ -10,15 +10,17 @@
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
 
-static int reiserfs_set_acl(struct inode *inode, int type,
+static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
+                            struct inode *inode, int type,
 			    struct posix_acl *acl);
 
 static int
 xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
 {
 	struct posix_acl *acl;
-	int error;
-
+	int error, error2;
+	struct reiserfs_transaction_handle th;
+	size_t jcreate_blocks;
 	if (!reiserfs_posixacl(inode->i_sb))
 		return -EOPNOTSUPP;
 	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
@@ -36,7 +38,21 @@ xattr_set_acl(struct inode *inode, int t
 	} else
 		acl = NULL;
 
-	error = reiserfs_set_acl(inode, type, acl);
+	/* Pessimism: We can't assume that anything from the xattr root up
+	 * has been created. */
+
+	jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
+	                 reiserfs_xattr_nblocks(inode, size) * 2;
+
+	reiserfs_write_lock(inode->i_sb);
+	error = journal_begin(&th, inode->i_sb, jcreate_blocks);
+	if (error == 0) {
+		error = reiserfs_set_acl(&th, inode, type, acl);
+		error2 = journal_end(&th, inode->i_sb, jcreate_blocks);
+		if (error2)
+			error = error2;
+	}
+	reiserfs_write_unlock(inode->i_sb);
 
       release_and_out:
 	posix_acl_release(acl);
@@ -266,7 +282,8 @@ struct posix_acl *reiserfs_get_acl(struc
  * BKL held [before 2.5.x]
  */
 static int
-reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
+                 int type, struct posix_acl *acl)
 {
 	char *name;
 	void *value = NULL;
@@ -313,7 +330,7 @@ reiserfs_set_acl(struct inode *inode, in
 			return (int)PTR_ERR(value);
 	}
 
-	error = reiserfs_xattr_set(inode, name, value, size, 0);
+	error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0);
 
 	kfree(value);
 
@@ -326,7 +343,8 @@ reiserfs_set_acl(struct inode *inode, in
 /* dir->i_mutex: locked,
  * inode is new and not released into the wild yet */
 int
-reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
+reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+                             struct inode *dir, struct dentry *dentry,
 			     struct inode *inode)
 {
 	struct posix_acl *acl;
@@ -363,7 +381,8 @@ reiserfs_inherit_default_acl(struct inod
 
 		/* Copy the default ACL to the default ACL of a new directory */
 		if (S_ISDIR(inode->i_mode)) {
-			err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+			err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
+			                       acl);
 			if (err)
 				goto cleanup;
 		}
@@ -384,9 +403,9 @@ reiserfs_inherit_default_acl(struct inod
 
 			/* If we need an ACL.. */
 			if (need_acl > 0) {
-				err =
-				    reiserfs_set_acl(inode, ACL_TYPE_ACCESS,
-						     acl_copy);
+				err = reiserfs_set_acl(th, inode,
+				                              ACL_TYPE_ACCESS,
+                                                              acl_copy);
 				if (err)
 					goto cleanup_copy;
 			}
@@ -404,21 +423,45 @@ reiserfs_inherit_default_acl(struct inod
 	return err;
 }
 
-/* Looks up and caches the result of the default ACL.
- * We do this so that we don't need to carry the xattr_sem into
- * reiserfs_new_inode if we don't need to */
+/* This is used to cache the default acl before a new object is created.
+ * The biggest reason for this is to get an idea of how many blocks will
+ * actually be required for the create operation if we must inherit an ACL.
+ * An ACL write can add up to 3 object creations and an additional file write
+ * so we'd prefer not to reserve that many blocks in the journal if we can.
+ * It also has the advantage of not loading the ACL with a transaction open,
+ * this may seem silly, but if the owner of the directory is doing the
+ * creation, the ACL may not be loaded since the permissions wouldn't require
+ * it.
+ * We return the number of blocks required for the transaction.
+ */
 int reiserfs_cache_default_acl(struct inode *inode)
 {
-	int ret = 0;
-	if (reiserfs_posixacl(inode->i_sb) && !is_reiserfs_priv_object(inode)) {
-		struct posix_acl *acl;
-		acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
-		ret = (acl && !IS_ERR(acl));
-		if (ret)
-			posix_acl_release(acl);
+	struct posix_acl *acl;
+	int nblocks = 0;
+
+	if (is_reiserfs_priv_object(inode))
+		return 0;
+
+	acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
+
+	if (acl && !IS_ERR(acl)) {
+		int size = reiserfs_acl_size(acl->a_count);
+
+		/* Other xattrs can be created during inode creation. We don't
+		 * want to claim too many blocks, so we check to see if we
+		 * we need to create the tree to the xattrs, and then we
+		 * just want two files. */
+		nblocks = reiserfs_xattr_jcreate_nblocks(inode);
+		nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+
+		REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
+
+		/* We need to account for writes + bitmaps for two files */
+		nblocks += reiserfs_xattr_nblocks(inode, size) * 4;
+		posix_acl_release(acl);
 	}
 
-	return ret;
+	return nblocks;
 }
 
 int reiserfs_acl_chmod(struct inode *inode)
@@ -444,8 +487,21 @@ int reiserfs_acl_chmod(struct inode *ino
 	if (!clone)
 		return -ENOMEM;
 	error = posix_acl_chmod_masq(clone, inode->i_mode);
-	if (!error)
-		error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+	if (!error) {
+		struct reiserfs_transaction_handle th;
+		size_t size = reiserfs_xattr_nblocks(inode,
+		                                     reiserfs_acl_size(clone->a_count));
+		reiserfs_write_lock(inode->i_sb);
+		error = journal_begin (&th, inode->i_sb, size * 2);
+		if (!error) {
+			int error2;
+			error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, clone);
+			error2 = journal_end(&th, inode->i_sb, size * 2);
+			if (error2)
+				error = error2;
+		}
+		reiserfs_write_unlock(inode->i_sb);
+	}
 	posix_acl_release(clone);
 	return error;
 }
--- a/include/linux/reiserfs_acl.h	2007-05-30 15:44:34.000000000 -0400
+++ b/include/linux/reiserfs_acl.h	2007-05-30 15:44:48.000000000 -0400
@@ -49,7 +49,8 @@ static inline int reiserfs_acl_count(siz
 #ifdef CONFIG_REISERFS_FS_POSIX_ACL
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
 int reiserfs_acl_chmod(struct inode *inode);
-int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
+int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+                                 struct inode *dir, struct dentry *dentry,
 				 struct inode *inode);
 int reiserfs_cache_default_acl(struct inode *dir);
 extern struct xattr_handler reiserfs_posix_acl_default_handler;
--- a/include/linux/reiserfs_fs.h	2007-05-30 15:44:34.000000000 -0400
+++ b/include/linux/reiserfs_fs.h	2007-05-30 17:55:07.000000000 -0400
@@ -1595,6 +1595,10 @@ struct reiserfs_journal_header {
 #define JOURNAL_MAX_COMMIT_AGE 30
 #define JOURNAL_MAX_TRANS_AGE 30
 #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
+#define JOURNAL_BLOCKS_PER_OBJECT(sb)  (JOURNAL_PER_BALANCE_CNT * 3 + \
+                                        2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \
+                                             REISERFS_QUOTA_TRANS_BLOCKS(sb)))
+
 #ifdef CONFIG_QUOTA
 /* We need to update data and inode (atime) */
 #define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? 2 : 0)
--- a/include/linux/reiserfs_xattr.h	2007-05-30 15:44:34.000000000 -0400
+++ b/include/linux/reiserfs_xattr.h	2007-05-30 17:55:07.000000000 -0400
@@ -45,6 +45,7 @@ int reiserfs_permission(struct inode *in
 
 int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
 int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
+int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *,struct inode *, const char *, const void *, size_t, int);
 
 extern struct xattr_handler reiserfs_xattr_user_handler;
 extern struct xattr_handler reiserfs_xattr_trusted_handler;
@@ -56,6 +57,33 @@ static inline void reiserfs_mark_inode_p
 	inode->i_flags |= S_PRIVATE;
 }
 
+#define xattr_size(size) ((size) + sizeof (struct reiserfs_xattr_header))
+#define reiserfs_xattr_nblocks(inode, size) (reiserfs_file_data_log(inode) && _ROUND_UP(xattr_size(size), (inode)->i_sb->s_blocksize) >> (inode)->i_sb->s_blocksize_bits)
+
+/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
+ * Let's try to be smart about it.
+ * xattr root: We cache it. If it's not cached, we may need to create it.
+ * xattr dir: If anything has been loaded for this inode, we can set a flag
+ *            saying so.
+ * xattr file: Since we don't cache xattrs, we can't tell. We always include
+ *             blocks for it.
+ *
+ * However, since root and dir can be created between calls - YOU MUST SAVE
+ * THIS VALUE.
+ */
+static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
+{
+	size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+
+	if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
+		nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+		if (REISERFS_SB(inode->i_sb)->xattr_root == NULL)
+			nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+	}
+
+	return nblocks;
+}
+
 #else
 
 #define is_reiserfs_priv_object(inode) 0

-- 

-
To unsubscribe from this list: send the line "unsubscribe reiserfs-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux File System Development]     [Linux BTRFS]     [Linux NFS]     [Linux Filesystems]     [Ext4 Filesystem]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Resources]

  Powered by Linux