On Wed, Apr 03, 2013 at 04:11:25PM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner at redhat.com> > > Signed-off-by: Dave Chinner <dchinner at redhat.com> A few nits below. > --- > fs/xfs/xfs_attr.c | 33 +- > fs/xfs/xfs_attr_leaf.c | 29 +- > fs/xfs/xfs_bmap.c | 1 + > fs/xfs/xfs_da_btree.c | 1395 +++++++++++++++++++++++++++++------------------- > fs/xfs/xfs_da_btree.h | 106 +++- > fs/xfs/xfs_dir2_node.c | 26 +- > fs/xfs/xfs_trace.c | 2 +- > 7 files changed, 972 insertions(+), 620 deletions(-) > > diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c > index 8886838..e03128c 100644 > --- a/fs/xfs/xfs_attr.c > +++ b/fs/xfs/xfs_attr.c > @@ -15,7 +15,6 @@ > * along with this program; if not, write the Free Software Foundation, > * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA > */ > - > #include "xfs.h" > #include "xfs_fs.h" > #include "xfs_types.h" > @@ -25,6 +24,7 @@ > #include "xfs_sb.h" > #include "xfs_ag.h" > #include "xfs_mount.h" > +#include "xfs_error.h" > #include "xfs_da_btree.h" > #include "xfs_bmap_btree.h" > #include "xfs_attr_sf.h" > @@ -35,7 +35,6 @@ > #include "xfs_bmap.h" > #include "xfs_attr.h" > #include "xfs_attr_leaf.h" > -#include "xfs_error.h" Why did xfs_error.h need to be moved? > @@ -935,16 +936,16 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) > /* > * Set up the new root node. > */ > - error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); > + error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); > if (error) > goto out; > node = bp1->b_addr; > leaf = bp2->b_addr; > ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); > /* both on-disk, don't endian-flip twice */ > - node->btree[0].hashval = > - leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; > - node->btree[0].before = cpu_to_be32(blkno); > + btree = xfs_da3_node_tree_p(node); > + btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; ^ Extra space. > +static bool > +xfs_da3_node_verify( > struct xfs_buf *bp) > { > struct xfs_mount *mp = bp->b_target->bt_mount; > - struct xfs_da_node_hdr *hdr = bp->b_addr; > - int block_ok = 0; > - > - block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC); > - block_ok = block_ok && > - be16_to_cpu(hdr->level) > 0 && > - be16_to_cpu(hdr->count) > 0 ; > - if (!block_ok) { > - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); > - xfs_buf_ioerror(bp, EFSCORRUPTED); > + struct xfs_da_intnode *hdr = bp->b_addr; > + struct xfs_da3_icnode_hdr ichdr; > + > + xfs_da3_node_hdr_from_disk(&ichdr, hdr); > + > + if (xfs_sb_version_hascrc(&mp->m_sb)) { > + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; > + > + if (ichdr.magic != XFS_DA3_NODE_MAGIC) > + return false; > + > + if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) > + return false; > + if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) > + return false; > + } else { > + if (ichdr.magic != XFS_DA_NODE_MAGIC) > + return false; > } > + if (ichdr.level == 0) > + return false; > + if (ichdr.level > XFS_DA_NODE_MAXDEPTH) > + return false; > + if (ichdr.count == 0) > + return false; > + > + /* > + * we don't know if the node is for and attribute or directory tree, > + * so only fail if the count is outside both bounds > + */ > + if (ichdr.count > mp->m_dir_node_ents && > + ichdr.count > mp->m_attr_node_ents) > + return false; > + > + /* XXX: hash order check? */ Still planning to add that in a subsequent patch? > @@ -178,33 +314,45 @@ xfs_da_node_read( > * Create the initial contents of an intermediate node. > */ > int > -xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, > - struct xfs_buf **bpp, int whichfork) > +xfs_da3_node_create( > + struct xfs_da_args *args, > + xfs_dablk_t blkno, > + int level, > + struct xfs_buf **bpp, > + int whichfork) > { > - xfs_da_intnode_t *node; > - struct xfs_buf *bp; > - int error; > - xfs_trans_t *tp; > + struct xfs_da_intnode *node; > + struct xfs_trans *tp = args->trans; > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_da3_icnode_hdr ichdr = {0}; > + struct xfs_buf *bp; > + int error; > > trace_xfs_da_node_create(args); > + ASSERT(level <= XFS_DA_NODE_MAXDEPTH); > > - tp = args->trans; > error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); Looks like this function can return 0 with a null bp. We should probably check for that. > @@ -320,8 +474,10 @@ xfs_da_split(xfs_da_state_t *state) > * just got bumped because of the addition of a new root node. > * There might be three blocks involved if a double split occurred, > * and the original block 0 could be at any position in the list. > + * > + * Note: the info structures being modified here for both v2 and v3 da > + * headers, so we can do this linkage just using the v2 structures. I had a hard time parsing that the first few times. How about something like: Note: The xfs_da_blkinfo fields being modified here are shared by both xfs_da_node_hdr and xfs_da3_intnode, and occur before the additional fields added in xfs_da3_intnode, so this linkage can be done using just the v2 structures. > @@ -591,12 +801,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, > * Move the req'd B-tree elements from high in node1 to > * low in node2. > */ > - be16_add_cpu(&node2->hdr.count, count); > + nodehdr2.count += count; > tmp = count * (uint)sizeof(xfs_da_node_entry_t); > - btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count]; > - btree_d = &node2->btree[0]; > + btree_s = &btree1[nodehdr1.count- count]; ^ Add a space. > @@ -926,35 +1172,34 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) > * We prefer coalescing with the lower numbered sibling so as > * to shrink a directory over time. > */ > + count = state->node_ents; > + count -= state->node_ents >> 2; > + count -= nodehdr.count; > + > /* start with smaller blk num */ > - forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back)); > + forward = nodehdr.forw < nodehdr.back; > for (i = 0; i < 2; forward = !forward, i++) { > if (forward) > - blkno = be32_to_cpu(info->forw); > + blkno = nodehdr.forw; > else > - blkno = be32_to_cpu(info->back); > + blkno = nodehdr.back; > if (blkno == 0) > continue; > - error = xfs_da_node_read(state->args->trans, state->args->dp, > + error = xfs_da3_node_read(state->args->trans, state->args->dp, > blkno, -1, &bp, state->args->whichfork); > if (error) > return(error); > - ASSERT(bp != NULL); > > - node = (xfs_da_intnode_t *)info; > - count = state->node_ents; > - count -= state->node_ents >> 2; > - count -= be16_to_cpu(node->hdr.count); At first initializing count outside the loop looked like a problem to me, but upon a closer inspection now I believe it's ok. > /* > - * Unbalance the btree elements between two intermediate nodes, > + * Unbalance the elements between two intermediate nodes, > * move all Btree elements from one node into another. > */ > STATIC void > -xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, > - xfs_da_state_blk_t *save_blk) > +xfs_da3_node_unbalance( > + struct xfs_da_state *state, > + struct xfs_da_state_blk *drop_blk, > + struct xfs_da_state_blk *save_blk) > { > - xfs_da_intnode_t *drop_node, *save_node; > - xfs_da_node_entry_t *btree; > - int tmp; > - xfs_trans_t *tp; > + struct xfs_da_intnode *drop_node; > + struct xfs_da_intnode *save_node; > + struct xfs_da_node_entry *dbtree; > + struct xfs_da_node_entry *sbtree; > + struct xfs_da3_icnode_hdr dhdr; > + struct xfs_da3_icnode_hdr shdr; Would be better as drop and save. > + struct xfs_trans *tp; > + int sindex; > + int tmp; > > trace_xfs_da_node_unbalance(state->args); > > drop_node = drop_blk->bp->b_addr; > save_node = save_blk->bp->b_addr; > - ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); > - ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); > + xfs_da3_node_hdr_from_disk(&dhdr, drop_node); > + xfs_da3_node_hdr_from_disk(&shdr, save_node); > + dbtree = xfs_da3_node_tree_p(drop_node); > + sbtree = xfs_da3_node_tree_p(save_node); > tp = state->args->trans; > > /* > * If the dying block has lower hashvals, then move all the > * elements in the remaining block up to make a hole. > */ > - if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) || > - (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) < > - be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval))) > - { > - btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)]; > - tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); > - memmove(btree, &save_node->btree[0], tmp); > - btree = &save_node->btree[0]; > + if ((be32_to_cpu(dbtree[0].hashval) < be32_to_cpu(sbtree[ 0 ].hashval)) || ^ ^ Extra spaces. > + (be32_to_cpu(dbtree[dhdr.count - 1].hashval) < > + be32_to_cpu(sbtree[shdr.count - 1].hashval))) { > + /* XXX: check this - is memmove dst correct? */ > + tmp = shdr.count * (uint)sizeof(xfs_da_node_entry_t); > + memmove(&sbtree[dhdr.count], &sbtree[0], tmp); I believe the destination in this memmove is correct. We're moving items from the beginning of the save_node toward the end of save_node so that later we can copy from drop_node into the beginning of save_node. Making room. > + > + sindex = 0; > xfs_trans_log_buf(tp, save_blk->bp, > - XFS_DA_LOGRANGE(save_node, btree, > - (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) * > - sizeof(xfs_da_node_entry_t))); > + XFS_DA_LOGRANGE(save_node, &sbtree[0], > + (shdr.count + dhdr.count) * > + sizeof(xfs_da_node_entry_t))); Seems like here we are logging more of the node than is strictly necessary. So far we have only modified save_node at shdr.count up to shdr.count + dhdr.count, there is no need to log the beginning of save_node until after we've copied from drop_node into it. Not a big deal I guess. > } else { > - btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)]; > + sindex = shdr.count; > xfs_trans_log_buf(tp, save_blk->bp, > - XFS_DA_LOGRANGE(save_node, btree, > - be16_to_cpu(drop_node->hdr.count) * > - sizeof(xfs_da_node_entry_t))); > + XFS_DA_LOGRANGE(save_node, &sbtree[sindex], > + dhdr.count * sizeof(xfs_da_node_entry_t))); And here it seems that we're logging save_node before we've made any modification to it. Maybe I'm missing something. > } > > /* > * Move all the B-tree elements from drop_blk to save_blk. > */ > - tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); > - memcpy(btree, &drop_node->btree[0], tmp); > - be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); > + tmp = dhdr.count * (uint)sizeof(xfs_da_node_entry_t); > + memcpy(&sbtree[sindex], &dbtree[0], tmp); > + shdr.count += dhdr.count; > + xfs_da3_node_hdr_to_disk(save_node, &shdr); > xfs_trans_log_buf(tp, save_blk->bp, > XFS_DA_LOGRANGE(save_node, &save_node->hdr, > - sizeof(save_node->hdr))); > + xfs_da3_node_hdr_size(save_node))); > And then after modifying save_node by copying stuff in from drop_blk we only log the header, and not the entries. I guess I'd prefer to see the entries logged after they've been modified. It's not a bug, looks like.. just not what I'm used to seeing. > @@ -1190,66 +1478,73 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) > } > curr = blk->bp->b_addr; > blk->magic = be16_to_cpu(curr->magic); > - ASSERT(blk->magic == XFS_DA_NODE_MAGIC || > - blk->magic == XFS_DIR2_LEAFN_MAGIC || > - blk->magic == XFS_ATTR_LEAF_MAGIC); > + > + if (blk->magic == XFS_ATTR_LEAF_MAGIC) { > + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); > + break; > + } > + > + if (blk->magic == XFS_DIR2_LEAFN_MAGIC || > + blk->magic == XFS_DIR3_LEAFN_MAGIC) { > + blk->magic = XFS_DIR2_LEAFN_MAGIC; > + blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); > + break; > + } Something like this would be better for this rearrangement: if (blk->magic == XFS_ATTR_LEAF_MAGIC) { ... break; } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC || blk->magic == XFS_DIR3_LEAFN_MAGIC) { ... break; } else if (blk->magic != XFS_DA_NODE_MAGIC) { return XFS_ERROR(EFSCORRUPTED); } So that we're sure what kind of block we have. > + > + blk->magic = XFS_DA_NODE_MAGIC; > + Why was that assignment necessary? > @@ -1316,9 +1647,6 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, > ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || > old_blk->magic == XFS_DIR2_LEAFN_MAGIC || > old_blk->magic == XFS_ATTR_LEAF_MAGIC); Seems like these asserts need to be updated. > - ASSERT(old_blk->magic == be16_to_cpu(old_info->magic)); > - ASSERT(new_blk->magic == be16_to_cpu(new_info->magic)); > - ASSERT(old_blk->magic == new_blk->magic); These seem like good asserts. > @@ -1449,8 +1738,6 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, > ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || > save_blk->magic == XFS_DIR2_LEAFN_MAGIC || > save_blk->magic == XFS_ATTR_LEAF_MAGIC); Seems like these asserts need to be updated. > - ASSERT(save_blk->magic == be16_to_cpu(save_info->magic)); > - ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic)); These two seem like useful asserts. > @@ -1856,17 +2176,22 @@ xfs_da_swap_lastblock( > dead_level = 0; > dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); > } else { > - ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); > + struct xfs_da3_icnode_hdr deadhdr; > + > + ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || > + dead_info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); I think these are covered by asserts in xfs_da3_node_hdr_from_disk and can be removed. > @@ -1912,31 +2237,31 @@ xfs_da_swap_lastblock( > * Walk down the tree looking for the parent of the moved block. > */ > for (;;) { > - error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); > + error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); > if (error) > goto done; > par_node = par_buf->b_addr; > - if (unlikely(par_node->hdr.info.magic != > - cpu_to_be16(XFS_DA_NODE_MAGIC) || > - (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { > + xfs_da3_node_hdr_from_disk(&par_hdr, par_node); > + if (level >= 0 && level != par_hdr.level + 1) { Do we need to test par_node magic here like we were before? > XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", > XFS_ERRLEVEL_LOW, mp); > error = XFS_ERROR(EFSCORRUPTED); > goto done; > } _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs