Hi Dave, On Wed, Apr 03, 2013 at 04:11:26PM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Comments below. > +static bool > +xfs_attr3_leaf_verify( > struct xfs_buf *bp) > { > struct xfs_mount *mp = bp->b_target->bt_mount; > - struct xfs_attr_leaf_hdr *hdr = bp->b_addr; > - int block_ok = 0; > + struct xfs_attr_leafblock *leaf = bp->b_addr; > + struct xfs_attr3_icleaf_hdr ichdr; > > - block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC); > - if (!block_ok) { > - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); > - xfs_buf_ioerror(bp, EFSCORRUPTED); > + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); > + > + if (xfs_sb_version_hascrc(&mp->m_sb)) { > + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; > + > + if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) > + return false; > + > + if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) > + return false; > + if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) > + return false; > + } else { > + if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) > + return false; > } > + if (ichdr.count == 0) > + return false; > + > + /* XXX: need to range check rest of attr header values */ > + /* XXX: hash order check? */ Seems reasonable for debug code. Is that coming along in a subsequent patch? > int > -xfs_attr_leaf_to_shortform( > - struct xfs_buf *bp, > - xfs_da_args_t *args, > - int forkoff) > +xfs_attr3_leaf_to_shortform( > + struct xfs_buf *bp, > + struct xfs_da_args *args, > + int forkoff) > { > - xfs_attr_leafblock_t *leaf; > - xfs_attr_leaf_entry_t *entry; > - xfs_attr_leaf_name_local_t *name_loc; > - xfs_da_args_t nargs; > - xfs_inode_t *dp; > - char *tmpbuffer; > - int error, i; > + struct xfs_attr_leafblock *leaf; > + struct xfs_attr3_icleaf_hdr ichdr; > + struct xfs_attr_leaf_entry *entry; > + struct xfs_attr_leaf_name_local *name_loc; > + struct xfs_da_args nargs; > + struct xfs_inode *dp = args->dp; > + char *tmpbuffer; > + int error; > + int i; > > trace_xfs_attr_leaf_to_sf(args); > > - dp = args->dp; > tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); > - ASSERT(tmpbuffer != NULL); > + if (!tmpbuffer) > + return ENOMEM; > > - ASSERT(bp != NULL); > memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); > + > leaf = (xfs_attr_leafblock_t *)tmpbuffer; > - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); > + xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); > + entry = xfs_attr3_leaf_entryp(leaf); > + > + /* XXX (dgc): buffer is about to be marked stale - why zero it? */ > memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); Good point. Why do we need a temporary buffer here? It seems like we could keep the leaf around long enough to copy from it directly into shortform. > int > -xfs_attr_leaf_to_node(xfs_da_args_t *args) > +xfs_attr3_leaf_to_node( > + struct xfs_da_args *args) > { > - xfs_attr_leafblock_t *leaf; > - xfs_da_intnode_t *node; > - xfs_inode_t *dp; > - struct xfs_buf *bp1, *bp2; > - xfs_dablk_t blkno; > - int error; > + struct xfs_attr_leafblock *leaf; > + struct xfs_attr3_icleaf_hdr icleafhdr; > + struct xfs_attr_leaf_entry *entries; > struct xfs_da_node_entry *btree; > + struct xfs_da3_icnode_hdr icnodehdr; > + struct xfs_da_intnode *node; > + struct xfs_inode *dp = args->dp; > + struct xfs_mount *mp = dp->i_mount; > + struct xfs_buf *bp1 = NULL; > + struct xfs_buf *bp2 = NULL; > + xfs_dablk_t blkno; > + int error; > > trace_xfs_attr_leaf_to_node(args); > > - dp = args->dp; > - bp1 = bp2 = NULL; > error = xfs_da_grow_inode(args, &blkno); > if (error) > goto out; > - error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1); > + error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1); > if (error) > goto out; > > - bp2 = NULL; > - error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2, > - XFS_ATTR_FORK); > + error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK); > if (error) > goto out; > + > + /* copy leaf to new buffer, update identifiers */ > bp2->b_ops = bp1->b_ops; > - memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); > - bp1 = NULL; > - xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); > + memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp)); > + if (xfs_sb_version_hascrc(&mp->m_sb)) { > + struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; > + hdr3->blkno = cpu_to_be64(bp2->b_bn); Ok, so the memcpy above got uuid, ino, etc, and we only need to update blkno. > @@ -963,52 +1120,62 @@ out: > * or a leaf in a node attribute list. > */ > STATIC int > -xfs_attr_leaf_create( > - xfs_da_args_t *args, > - xfs_dablk_t blkno, > - struct xfs_buf **bpp) > +xfs_attr3_leaf_create( > + struct xfs_da_args *args, > + xfs_dablk_t blkno, > + struct xfs_buf **bpp) > { > - xfs_attr_leafblock_t *leaf; > - xfs_attr_leaf_hdr_t *hdr; > - xfs_inode_t *dp; > - struct xfs_buf *bp; > - int error; > + struct xfs_attr_leafblock *leaf; > + struct xfs_attr3_icleaf_hdr ichdr; > + struct xfs_inode *dp = args->dp; > + struct xfs_mount *mp = dp->i_mount; > + struct xfs_buf *bp; > + int error; > > trace_xfs_attr_leaf_create(args); > > - dp = args->dp; > - ASSERT(dp != NULL); > error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, > XFS_ATTR_FORK); > if (error) > - return(error); > - bp->b_ops = &xfs_attr_leaf_buf_ops; > + return error; > + bp->b_ops = &xfs_attr3_leaf_buf_ops; > leaf = bp->b_addr; > - memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); > - hdr = &leaf->hdr; > - hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC); > - hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount)); > - if (!hdr->firstused) { > - hdr->firstused = cpu_to_be16( > - XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN); > - } Saw this several times in review. I don't understand why that code would be there, given that we just set firstused to be XFS_LBSIZE, the blocksize of the filesystem. It's dead code now, so it's fine to remove... There must be some history there. Any idea? > @@ -1113,82 +1279,90 @@ xfs_attr_leaf_add( > * and we don't have enough freespace, then compaction will do us > * no good and we should just give up. > */ > - if (!hdr->holes && (sum < entsize)) > - return(XFS_ERROR(ENOSPC)); > + if (!ichdr.holes && sum < entsize) > + return XFS_ERROR(ENOSPC); > > /* > * Compact the entries to coalesce free space. > * This may change the hdr->count via dropping INCOMPLETE entries. > */ > - xfs_attr_leaf_compact(args, bp); > + xfs_attr3_leaf_compact(args, &ichdr, bp); > > /* > * After compaction, the block is guaranteed to have only one > * free region, in freemap[0]. If it is not big enough, give up. > */ > - if (be16_to_cpu(hdr->freemap[0].size) > - < (entsize + sizeof(xfs_attr_leaf_entry_t))) > - return(XFS_ERROR(ENOSPC)); > + if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { > + tmp = ENOSPC; > + goto out_log_hdr; That represents a change in behavior that I'm not sure about... Before if we had ENOSPC here we would simply return and xfs_addr3_leaf_add_work would not have a chance to log the header. Now... > + } > + > + tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); > > - return(xfs_attr_leaf_add_work(bp, args, 0)); > +out_log_hdr: > + xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); > + xfs_trans_log_buf(args->trans, bp, > + XFS_DA_LOGRANGE(leaf, &leaf->hdr, > + xfs_attr3_leaf_hdr_size(leaf))); We're logging the header here instead of in xfs_attr_leaf_add_work > + return tmp; > } and then returning. I think this would be better with a second goto so that we don't log the header in keeping with the old behavior. > @@ -1232,44 +1406,41 @@ xfs_attr_leaf_add_work( > args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); > } > xfs_trans_log_buf(args->trans, bp, > - XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), > + XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), > xfs_attr_leaf_entsize(leaf, args->index))); > > /* > * Update the control info for this leaf node > */ > - if (be16_to_cpu(entry->nameidx) < be16_to_cpu(hdr->firstused)) { > - /* both on-disk, don't endian-flip twice */ > - hdr->firstused = entry->nameidx; > - } > - ASSERT(be16_to_cpu(hdr->firstused) >= > - ((be16_to_cpu(hdr->count) * sizeof(*entry)) + sizeof(*hdr))); > - tmp = (be16_to_cpu(hdr->count)-1) * sizeof(xfs_attr_leaf_entry_t) > - + sizeof(xfs_attr_leaf_hdr_t); > - map = &hdr->freemap[0]; > + if (be16_to_cpu(entry->nameidx) < ichdr->firstused) > + ichdr->firstused = be16_to_cpu(entry->nameidx); > + > + ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) > + + xfs_attr3_leaf_hdr_size(leaf)); > + tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) > + + xfs_attr3_leaf_hdr_size(leaf); > + > for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { Looks like you can get rid of map. > @@ -1286,34 +1457,69 @@ xfs_attr_leaf_compact( > */ > leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; > leaf_d = bp->b_addr; > - hdr_s = &leaf_s->hdr; > - hdr_d = &leaf_d->hdr; > - hdr_d->info = hdr_s->info; /* struct copy */ > - hdr_d->firstused = cpu_to_be16(XFS_LBSIZE(mp)); > - /* handle truncation gracefully */ > - if (!hdr_d->firstused) { > - hdr_d->firstused = cpu_to_be16( > - XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN); > - } Another one. Weird. > - hdr_d->usedbytes = 0; > - hdr_d->count = 0; > - hdr_d->holes = 0; > - hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); > - hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) - > - sizeof(xfs_attr_leaf_hdr_t)); > + ichdr_s = *ichdr_d; /* struct copy */ ichdr_s should be initialized from the copy we have in the temporary buffer rather than from a struct copy of ichdr_d. I understand that what you have here probably works fine, but it is not very obvious, or clear that it was an intentional deviation from the struct copy up above. > + ichdr_d->firstused = XFS_LBSIZE(mp); > + ichdr_d->usedbytes = 0; > + ichdr_d->count = 0; > + ichdr_d->holes = 0; > + ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s); > + ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; > > /* > * Copy all entry's in the same (sorted) order, > * but allocate name/value pairs packed and in sequence. > */ > - xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0, > - be16_to_cpu(hdr_s->count), mp); > + xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0, > + ichdr_s.count, mp); > + /* > + * this logs the entire buffer, but the caller must write the header > + * back to the buffer when it is finished modifying it. > + */ > xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); Maybe better to just log the entries here, someday. > @@ -2201,45 +2425,41 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, > /* > * Set up environment. > */ > - ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); > - ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); > - hdr_s = &leaf_s->hdr; > - hdr_d = &leaf_d->hdr; > - ASSERT((be16_to_cpu(hdr_s->count) > 0) && > - (be16_to_cpu(hdr_s->count) < (XFS_LBSIZE(mp)/8))); > - ASSERT(be16_to_cpu(hdr_s->firstused) >= > - ((be16_to_cpu(hdr_s->count) > - * sizeof(*entry_s))+sizeof(*hdr_s))); > - ASSERT(be16_to_cpu(hdr_d->count) < (XFS_LBSIZE(mp)/8)); > - ASSERT(be16_to_cpu(hdr_d->firstused) >= > - ((be16_to_cpu(hdr_d->count) > - * sizeof(*entry_d))+sizeof(*hdr_d))); > - > - ASSERT(start_s < be16_to_cpu(hdr_s->count)); > - ASSERT(start_d <= be16_to_cpu(hdr_d->count)); > - ASSERT(count <= be16_to_cpu(hdr_s->count)); > + ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC || > + ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); This assert might be unnecessary due to the asserts in hdr_from_disk. > @@ -2286,71 +2504,40 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, > /* > * Zero out the entries we just copied. > */ > - if (start_s == be16_to_cpu(hdr_s->count)) { > + if (start_s == ichdr_s->count) { > tmp = count * sizeof(xfs_attr_leaf_entry_t); > - entry_s = &leaf_s->entries[start_s]; > + entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s]; > ASSERT(((char *)entry_s + tmp) <= > ((char *)leaf_s + XFS_LBSIZE(mp))); > - memset((char *)entry_s, 0, tmp); > + memset(entry_s, 0, tmp); > } else { > /* > * Move the remaining entries down to fill the hole, > * then zero the entries at the top. > */ > - tmp = be16_to_cpu(hdr_s->count) - count; > - tmp *= sizeof(xfs_attr_leaf_entry_t); > - entry_s = &leaf_s->entries[start_s + count]; > - entry_d = &leaf_s->entries[start_s]; > - memmove((char *)entry_d, (char *)entry_s, tmp); > + tmp = (ichdr_s->count - count) - sizeof(xfs_attr_leaf_entry_t); * Multiply. > @@ -2379,20 +2567,21 @@ xfs_attr_leaf_lasthash( > STATIC int > xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) > { > + struct xfs_attr_leaf_entry *entries; > xfs_attr_leaf_name_local_t *name_loc; > xfs_attr_leaf_name_remote_t *name_rmt; > int size; > > - ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); I don't see a replacement for this assert. > @@ -2786,38 +3003,44 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) > */ > error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); > if (error) > - return(error); > - blkno = XFS_BUF_ADDR(bp); > + return error; > + blkno = bp->b_bn; I'm a little concerned about the switch from XFS_BUF_ADDR to bp->b_bn here, but have not looked into it yet. If you have a quick explanation, that's great. Otherwise I'll come back to it. > +/* > + * incore, neutral version of the attribute leaf header > + */ > +struct xfs_attr3_icleaf_hdr { > + __uint32_t forw; > + __uint32_t back; > + __uint16_t magic; > + __uint16_t count; > + __uint16_t usedbytes; > + __uint16_t firstused; > + __u8 holes; > + struct { > + __uint16_t base; > + __uint16_t size; > + } freemap[XFS_ATTR_LEAF_MAPSIZE]; Should that be an array of xfs_attr_leaf_map_t? No, that's ondisk format. Nevermind then. > @@ -1479,7 +1480,9 @@ xfs_da3_node_lookup_int( > curr = blk->bp->b_addr; > blk->magic = be16_to_cpu(curr->magic); > > - if (blk->magic == XFS_ATTR_LEAF_MAGIC) { > + if (blk->magic == XFS_ATTR_LEAF_MAGIC || > + blk->magic == XFS_ATTR3_LEAF_MAGIC) { > + blk->magic = XFS_ATTR_LEAF_MAGIC; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ I'm still a little confused by these assignments. I understand you're squashing them down from a comment, and that the correct magic is still in the buffer... I need to find who is looking at blk->magic. Any hints? That's about it for now. Thanks, Ben _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs