On Mon, Jun 20, 2011 at 06:12:10PM +0800, Miao Xie wrote: > >From 457f39393b2e3d475fbba029b90b6a4e17b94d43 Mon Sep 17 00:00:00 2001 > From: Miao Xie <miaox@xxxxxxxxxxxxxx> > Date: Mon, 20 Jun 2011 17:21:51 +0800 > Subject: [PATCH] btrfs: fix inconsonant inode information > > When iputting the inode, We may leave the delayed nodes if they have some > delayed items that have not been dealt with. So when the inode is read again, > we must look up the relative delayed node, and use the information in it to > initialize the inode. Or we will get inconsonant inode information. > > Signed-off-by: Miao Xie <miaox@xxxxxxxxxxxxxx> > --- > fs/btrfs/delayed-inode.c | 104 +++++++++++++++++++++++++++++++++++----------- > fs/btrfs/delayed-inode.h | 1 + > fs/btrfs/inode.c | 12 ++++- > 3 files changed, 91 insertions(+), 26 deletions(-) > > diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c > index f1cbd02..280755e 100644 > --- a/fs/btrfs/delayed-inode.c > +++ b/fs/btrfs/delayed-inode.c > @@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root( > return root->fs_info->delayed_root; > } > > -static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( > - struct inode *inode) > +static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) > { > - struct btrfs_delayed_node *node; > struct btrfs_inode *btrfs_inode = BTRFS_I(inode); > struct btrfs_root *root = btrfs_inode->root; > u64 ino = btrfs_ino(inode); > - int ret; > + struct btrfs_delayed_node *node; > > -again: > node = ACCESS_ONCE(btrfs_inode->delayed_node); do you still need the volatile access here, after the again: label has been removed? it does not break things if it's there, but it raises questions ... > if (node) { > - atomic_inc(&node->refs); /* can be accessed */ > + atomic_inc(&node->refs); > return node; > } > > @@ -103,7 +100,9 @@ again: > if (node) { > if (btrfs_inode->delayed_node) { > spin_unlock(&root->inode_lock); > - goto again; > + BUG_ON(btrfs_inode->delayed_node != node); > + atomic_inc(&node->refs); /* can be accessed */ > + return node; > } > btrfs_inode->delayed_node = node; > atomic_inc(&node->refs); /* can be accessed */ > @@ -113,6 +112,23 @@ again: > } > spin_unlock(&root->inode_lock); > > + return NULL; > +} > + > +static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( > + struct inode *inode) > +{ > + struct btrfs_delayed_node *node; > + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); > + struct btrfs_root *root = btrfs_inode->root; > + u64 ino = btrfs_ino(inode); > + int ret; > + > +again: > + node = btrfs_get_delayed_node(inode); ... aha, it's been somehow moved here, which copies the original logic. Now reading inode->delayed_node is inside a function and I do not think that compiler could optimize reading value of btrfs_inode->delayed_node that it would require ACCESS_ONCE. And there is another ACCESS_ONCE in btrfs_remove_delayed_node. I wonder what's the reason for that. Sorry to abuse this thread, but I'd like to be sure about protection of the ->delayed_node members inside btrfs_inode. Can you please comment on that? If you recall one message from the original report: [ 5447.554187] err add delayed dir index item(name: pglog_0.965_0) into the insertion tree of the delayed node(root id: 262, inode id: 258, errno: -17) (-17 == -EEXIST) a printk after return from __btrfs_add_delayed_item (which is able to return -EEXIST) in btrfs_insert_delayed_dir_index. I haven't looked farther, but it seems that the item is being inserted (at least) twice and I suspect missing locking or other type of protection. thanks, david > + if (node) > + return node; > + > node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); > if (!node) > return ERR_PTR(-ENOMEM); > @@ -548,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item( > return next; > } > > -static inline struct btrfs_delayed_node *btrfs_get_delayed_node( > - struct inode *inode) > -{ > - struct btrfs_inode *btrfs_inode = BTRFS_I(inode); > - struct btrfs_delayed_node *delayed_node; > - > - delayed_node = btrfs_inode->delayed_node; > - if (delayed_node) > - atomic_inc(&delayed_node->refs); > - > - return delayed_node; > -} > - > static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, > u64 root_id) > { > @@ -1404,8 +1407,7 @@ end: > > int btrfs_inode_delayed_dir_index_count(struct inode *inode) > { > - struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node; > - int ret = 0; > + struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); > > if (!delayed_node) > return -ENOENT; > @@ -1415,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) > * a new directory index is added into the delayed node and index_cnt > * is updated now. So we needn't lock the delayed node. > */ > - if (!delayed_node->index_cnt) > + if (!delayed_node->index_cnt) { > + btrfs_release_delayed_node(delayed_node); > return -EINVAL; > + } > > BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; > - return ret; > + btrfs_release_delayed_node(delayed_node); > + return 0; > } > > void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, > @@ -1613,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, > inode->i_ctime.tv_nsec); > } > > +int btrfs_fill_inode(struct inode *inode, u32 *rdev) > +{ > + struct btrfs_delayed_node *delayed_node; > + struct btrfs_inode_item *inode_item; > + struct btrfs_timespec *tspec; > + > + delayed_node = btrfs_get_delayed_node(inode); > + if (!delayed_node) > + return -ENOENT; > + > + mutex_lock(&delayed_node->mutex); > + if (!delayed_node->inode_dirty) { > + mutex_unlock(&delayed_node->mutex); > + btrfs_release_delayed_node(delayed_node); > + return -ENOENT; > + } > + > + inode_item = &delayed_node->inode_item; > + > + inode->i_uid = btrfs_stack_inode_uid(inode_item); > + inode->i_gid = btrfs_stack_inode_gid(inode_item); > + btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); > + inode->i_mode = btrfs_stack_inode_mode(inode_item); > + inode->i_nlink = btrfs_stack_inode_nlink(inode_item); > + inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); > + BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); > + BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); > + inode->i_rdev = 0; > + *rdev = btrfs_stack_inode_rdev(inode_item); > + BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); > + > + tspec = btrfs_inode_atime(inode_item); > + inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec); > + inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec); > + > + tspec = btrfs_inode_mtime(inode_item); > + inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec); > + inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec); > + > + tspec = btrfs_inode_ctime(inode_item); > + inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec); > + inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec); > + > + inode->i_generation = BTRFS_I(inode)->generation; > + BTRFS_I(inode)->index_cnt = (u64)-1; > + > + mutex_unlock(&delayed_node->mutex); > + btrfs_release_delayed_node(delayed_node); > + return 0; > +} > + > int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, > struct btrfs_root *root, struct inode *inode) > { > diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h > index d1a6a29..8d27af4 100644 > --- a/fs/btrfs/delayed-inode.h > +++ b/fs/btrfs/delayed-inode.h > @@ -119,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode); > > int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, > struct btrfs_root *root, struct inode *inode); > +int btrfs_fill_inode(struct inode *inode, u32 *rdev); > > /* Used for drop dead root */ > void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 5813dec..1d25a04 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode) > int maybe_acls; > u32 rdev; > int ret; > + bool filled = false; > + > + ret = btrfs_fill_inode(inode, &rdev); > + if (!ret) > + filled = true; > > path = btrfs_alloc_path(); > BUG_ON(!path); > @@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode) > goto make_bad; > > leaf = path->nodes[0]; > + > + if (filled) > + goto cache_acl; > + > inode_item = btrfs_item_ptr(leaf, path->slots[0], > struct btrfs_inode_item); > if (!leaf->map_token) > @@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode) > > BTRFS_I(inode)->index_cnt = (u64)-1; > BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); > - > +cache_acl: > /* > * try to precache a NULL acl entry for files that don't have > * any xattrs or acls > @@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode) > } > > btrfs_free_path(path); > - inode_item = NULL; > > switch (inode->i_mode & S_IFMT) { > case S_IFREG: > -- > 1.7.4 > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html