From: Dave Chinner <dchinner@xxxxxxxxxx> Add code to track the filetype of an inode from phase 3 when all the inodes are scanned throught to phase 6 when the directory structure is validated and corrected. Add code to phase 6 shortform and long form directory entry validation to read the ftype from the dirent, lookup the inode record and check they are the same. If they aren't and we are in no-modify mode, issue a warning such as: Phase 6 - check inode connectivity... - traversing filesystem ... would fix ftype mismatch (5/1) in directory/child inode 64/68 - traversal finished ... - moving disconnected inodes to lost+found ... If we are fixing the problem: Phase 6 - check inode connectivity... - resetting contents of realtime bitmap and summary inodes - traversing filesystem ... fixing ftype mismatch (5/1) in directory/child inode 64/68 - traversal finished ... - moving disconnected inodes to lost+found ... Note that this is from a leaf form directory entry that was intentionally corrupted with xfs_db like so: xfs_db> inode 64 xfs_db> a u3.bmx[0].startblock xfs_db> p .... du[3].inumber = 68 du[3].namelen = 11 du[3].name = "syscalltest" du[3].filetype = 1 du[3].tag = 0x70 .... xfs_db> write du[3].filetype 5 du[3].filetype = 5 xfs_db> quit Shortform directory entry repair was tested in a similar fashion. Further, track the ftype in the directory hash table that is build, so if the directory is rebuild from scratch it has the necessary ftype information to rebuild the directory correctly. Further, if we detect a ftype mismatch, update the entry in the hash so that later directory errors that lead to it being rebuilt use the corrected ftype field, not the bad one. Note that this code pulls in some kernel side code that is currently in kernel private locations (xfs_mode_to_ftype table), so there'll be some kernel/userspace sync work needed to bring these back into line. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> Version 4: - removed a couple more typedefs - fixed no_modify return value from shortform_dir2_junk() - cleaned up variable naming, formatting and comments in shortform_dir2_junk() to make it more obvious what it is doing. Version 3: - fixed max_size updates - fixed comment grammar Version 2: - factored out junking of entry in shortform directory code - fixed leak of ftypes array memory --- include/xfs_dir2.h | 3 + libxfs/xfs_dir2.c | 16 ++++ repair/dino_chunks.c | 11 +++ repair/incore.h | 27 +++++- repair/incore_ino.c | 30 ++++++- repair/phase6.c | 246 ++++++++++++++++++++++++++++++++++++--------------- repair/scan.c | 4 +- 7 files changed, 258 insertions(+), 79 deletions(-) diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h index 9910401..3900130 100644 --- a/include/xfs_dir2.h +++ b/include/xfs_dir2.h @@ -57,6 +57,9 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, uint resblks); +#define S_SHIFT 12 +extern const unsigned char xfs_mode_to_ftype[]; + /* * Direct call from the bmap code, bypassing the generic directory layer. */ diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c index 96a3c1d..4c8c836 100644 --- a/libxfs/xfs_dir2.c +++ b/libxfs/xfs_dir2.c @@ -20,6 +20,22 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; +/* + * @mode, if set, indicates that the type field needs to be set up. + * This uses the transformation from file mode to DT_* as defined in linux/fs.h + * for file type specification. This will be propagated into the directory + * structure if appropriate for the given operation and filesystem config. + */ +const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { + [0] = XFS_DIR3_FT_UNKNOWN, + [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, + [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, + [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, +}; /* * ASCII case-insensitive (ie. A-Z) support for directories that was diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index d3c2236..65281e4 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -788,6 +788,8 @@ process_inode_chunk( * we do now, this is where to start. */ if (is_used) { + __uint16_t di_mode; + if (is_inode_free(ino_rec, irec_offset)) { if (verbose || no_modify) { do_warn( @@ -803,6 +805,15 @@ process_inode_chunk( set_inode_used(ino_rec, irec_offset); /* + * store the on-disk file type for comparing in + * phase 6. + */ + di_mode = be16_to_cpu(dino->di_mode); + di_mode = (di_mode & S_IFMT) >> S_SHIFT; + set_inode_ftype(ino_rec, irec_offset, + xfs_mode_to_ftype[di_mode]); + + /* * store on-disk nlink count for comparing in phase 7 */ set_inode_disk_nlinks(ino_rec, irec_offset, diff --git a/repair/incore.h b/repair/incore.h index 38caa6d..5419884 100644 --- a/repair/incore.h +++ b/repair/incore.h @@ -293,6 +293,7 @@ typedef struct ino_tree_node { ino_ex_data_t *ex_data; /* phases 6,7 */ parent_list_t *plist; /* phases 2-5 */ } ino_un; + __uint8_t *ftypes; /* phases 3,6 */ } ino_tree_node_t; #define INOS_PER_IREC (sizeof(__uint64_t) * NBBY) @@ -359,7 +360,8 @@ ino_tree_node_t *find_uncertain_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino); void add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free); -void add_aginode_uncertain(xfs_agnumber_t agno, +void add_aginode_uncertain(struct xfs_mount *mp, + xfs_agnumber_t agno, xfs_agino_t agino, int free); void get_uncertain_inode_rec(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -476,6 +478,29 @@ static inline void add_inode_reached(struct ino_tree_node *irec, int offset) } /* + * get/set inode filetype. Only used if the superblock feature bit is set + * which allocates irec->ftypes. + */ +static inline void +set_inode_ftype(struct ino_tree_node *irec, + int ino_offset, + __uint8_t ftype) +{ + if (irec->ftypes) + irec->ftypes[ino_offset] = ftype; +} + +static inline __uint8_t +get_inode_ftype( + struct ino_tree_node *irec, + int ino_offset) +{ + if (!irec->ftypes) + return XFS_DIR3_FT_UNKNOWN; + return irec->ftypes[ino_offset]; +} + +/* * set/get inode number of parent -- works for directory inodes only */ void set_inode_parent(ino_tree_node_t *irec, int ino_offset, diff --git a/repair/incore_ino.c b/repair/incore_ino.c index 735737a..9502648 100644 --- a/repair/incore_ino.c +++ b/repair/incore_ino.c @@ -211,6 +211,21 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) return 0; } +static __uint8_t * +alloc_ftypes_array( + struct xfs_mount *mp) +{ + __uint8_t *ptr; + + if (!xfs_sb_version_hasftype(&mp->m_sb)) + return NULL; + + ptr = calloc(XFS_INODES_PER_CHUNK, sizeof(*ptr)); + if (!ptr) + do_error(_("could not allocate ftypes array\n")); + return ptr; +} + /* * Next is the uncertain inode list -- a sorted (in ascending order) * list of inode records sorted on the starting inode number. There @@ -226,6 +241,7 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) */ static struct ino_tree_node * alloc_ino_node( + struct xfs_mount *mp, xfs_agino_t starting_ino) { struct ino_tree_node *irec; @@ -245,6 +261,7 @@ alloc_ino_node( irec->ino_un.ex_data = NULL; irec->nlink_size = sizeof(__uint8_t); irec->disk_nlinks.un8 = alloc_nlink_array(irec->nlink_size); + irec->ftypes = alloc_ftypes_array(mp); return irec; } @@ -285,6 +302,7 @@ free_ino_tree_node( } + free(irec->ftypes); free(irec); } @@ -303,7 +321,11 @@ static ino_tree_node_t **last_rec; * free is set to 1 if the inode is thought to be free, 0 if used */ void -add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) +add_aginode_uncertain( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t ino, + int free) { ino_tree_node_t *ino_rec; xfs_agino_t s_ino; @@ -334,7 +356,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) ino_rec = (ino_tree_node_t *) avl_findrange(inode_uncertain_tree_ptrs[agno], s_ino); if (!ino_rec) { - ino_rec = alloc_ino_node(s_ino); + ino_rec = alloc_ino_node(mp, s_ino); if (!avl_insert(inode_uncertain_tree_ptrs[agno], &ino_rec->avl_node)) @@ -360,7 +382,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) void add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free) { - add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino), + add_aginode_uncertain(mp, XFS_INO_TO_AGNO(mp, ino), XFS_INO_TO_AGINO(mp, ino), free); } @@ -432,7 +454,7 @@ add_inode( { struct ino_tree_node *irec; - irec = alloc_ino_node(agino); + irec = alloc_ino_node(mp, agino); if (!avl_insert(inode_tree_ptrs[agno], &irec->avl_node)) do_warn(_("add_inode - duplicate inode range\n")); return irec; diff --git a/repair/phase6.c b/repair/phase6.c index d2d4a44..cdbf4db 100644 --- a/repair/phase6.c +++ b/repair/phase6.c @@ -134,7 +134,8 @@ dir_hash_add( __uint32_t addr, xfs_ino_t inum, int namelen, - unsigned char *name) + unsigned char *name, + __uint8_t ftype) { xfs_dahash_t hash = 0; int byaddr; @@ -148,6 +149,7 @@ dir_hash_add( xname.name = name; xname.len = namelen; + xname.type = ftype; junk = name[0] == '/'; byaddr = DIR_HASH_FUNC(hashtab, addr); @@ -312,6 +314,23 @@ dir_hash_see( return DIR_HASH_CK_NODATA; } +static void +dir_hash_update_ftype( + dir_hash_tab_t *hashtab, + xfs_dir2_dataptr_t addr, + __uint8_t ftype) +{ + int i; + dir_hash_ent_t *p; + + i = DIR_HASH_FUNC(hashtab, addr); + for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) { + if (p->address != addr) + continue; + p->name.type = ftype; + } +} + /* * checks to make sure leafs match a data entry, and that the stale * count is valid. @@ -1685,11 +1704,12 @@ longform_dir2_entry_check_data( if (!orphanage_ino) orphanage_ino = inum; } + /* * check for duplicate names in directory. */ if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen, - dep->name)) { + dep->name, xfs_dir3_dirent_get_ftype(mp, dep))) { nbad++; if (entry_junked( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), @@ -1763,6 +1783,35 @@ longform_dir2_entry_check_data( */ if (no_modify && verify_inum(mp, inum)) continue; + + /* validate ftype field if supported */ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + __uint8_t dir_ftype; + __uint8_t ino_ftype; + + dir_ftype = xfs_dir3_dirent_get_ftype(mp, dep); + ino_ftype = get_inode_ftype(irec, ino_offset); + + if (dir_ftype != ino_ftype) { + if (no_modify) { + do_warn( + _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ip->i_ino, inum); + } else { + do_warn( + _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ip->i_ino, inum); + xfs_dir3_dirent_put_ftype(mp, dep, + ino_ftype); + libxfs_dir2_data_log_entry(tp, bp, dep); + dir_hash_update_ftype(hashtab, addr, + ino_ftype); + } + } + } + /* * check easy case first, regular inode, just bump * the link count and continue @@ -2189,6 +2238,62 @@ out_fix: * shortform directory v2 processing routines -- entry verification and * bad entry deletion (pruning). */ +static struct xfs_dir2_sf_entry * +shortform_dir2_junk( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t lino, + int *max_size, + int *index, + int *bytes_deleted, + int *ino_dirty) +{ + struct xfs_dir2_sf_entry *next_sfep; + int next_len; + int next_elen; + + if (lino == orphanage_ino) + orphanage_ino = 0; + + next_elen = xfs_dir3_sf_entsize(mp, sfp, sfep->namelen); + next_sfep = (xfs_dir2_sf_entry_t *)((__psint_t)sfep + next_elen); + + /* + * if we are just checking, simply return the pointer to the next entry + * here so that the checking loop can continue. + */ + if (no_modify) { + do_warn(_("would junk entry\n")); + return next_sfep; + } + + /* + * now move all the remaining entries down over the junked entry and + * clear the newly unused bytes at the tail of the directory region. + */ + next_len = *max_size - ((__psint_t)next_sfep - (__psint_t)sfp); + *max_size -= next_elen; + *bytes_deleted += next_elen; + + memmove(sfep, next_sfep, next_len); + memset((void *)((__psint_t)sfep + next_len), 0, next_elen); + sfp->count -= 1; + *ino_dirty = 1; + + /* + * WARNING: drop the index i by one so it matches the decremented count + * for accurate comparisons in the loop test + */ + (*index)--; + + if (verbose) + do_warn(_("junking entry\n")); + else + do_warn("\n"); + return sfep; +} + static void shortform_dir2_entry_check(xfs_mount_t *mp, xfs_ino_t ino, @@ -2201,15 +2306,13 @@ shortform_dir2_entry_check(xfs_mount_t *mp, xfs_ino_t lino; xfs_ino_t parent; struct xfs_dir2_sf_hdr *sfp; - xfs_dir2_sf_entry_t *sfep, *next_sfep, *tmp_sfep; - xfs_ifork_t *ifp; - ino_tree_node_t *irec; + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + struct xfs_ifork *ifp; + struct ino_tree_node *irec; int max_size; int ino_offset; int i; - int junkit; - int tmp_len; - int tmp_elen; int bad_sfnamelen; int namelen; int bytes_deleted; @@ -2266,9 +2369,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, for (i = 0; i < sfp->count && max_size > (__psint_t)next_sfep - (__psint_t)sfp; sfep = next_sfep, i++) { - junkit = 0; bad_sfnamelen = 0; - tmp_sfep = NULL; lino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); @@ -2340,7 +2441,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"), fname, ino, lino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, + &max_size, &i, &bytes_deleted, + ino_dirty); + continue; } ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; @@ -2354,7 +2458,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"), fname, ino, lino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, + &max_size, &i, &bytes_deleted, + ino_dirty); + continue; } /* * check if this inode is lost+found dir in the root @@ -2367,7 +2474,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), ORPHANAGE, lino, ino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, + lino, &max_size, &i, + &bytes_deleted, ino_dirty); + continue; } /* * if this is a dup, it will be picked up below, @@ -2381,11 +2491,15 @@ shortform_dir2_entry_check(xfs_mount_t *mp, */ if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t) (sfep - xfs_dir2_sf_firstentry(sfp)), - lino, sfep->namelen, sfep->name)) { + lino, sfep->namelen, sfep->name, + xfs_dir3_sfe_get_ftype(mp, sfp, sfep))) { do_warn( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), fname, lino, ino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, + &max_size, &i, &bytes_deleted, + ino_dirty); + continue; } if (!inode_isadir(irec, ino_offset)) { @@ -2403,11 +2517,14 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), * the .. in the child, blow out the entry */ if (is_inode_reached(irec, ino_offset)) { - junkit = 1; do_warn( _("entry \"%s\" in directory inode %" PRIu64 " references already connected inode %" PRIu64 ".\n"), fname, ino, lino); + next_sfep = shortform_dir2_junk(mp, sfp, sfep, + lino, &max_size, &i, + &bytes_deleted, ino_dirty); + continue; } else if (parent == ino) { add_inode_reached(irec, ino_offset); add_inode_ref(current_irec, current_ino_offset); @@ -2423,76 +2540,60 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), add_dotdot_update(XFS_INO_TO_AGNO(mp, lino), irec, ino_offset); } else { - junkit = 1; do_warn( _("entry \"%s\" in directory inode %" PRIu64 " not consistent with .. value (%" PRIu64 ") in inode %" PRIu64 ",\n"), fname, ino, parent, lino); + next_sfep = shortform_dir2_junk(mp, sfp, sfep, + lino, &max_size, &i, + &bytes_deleted, ino_dirty); + continue; } } - if (junkit) { -do_junkit: - if (lino == orphanage_ino) - orphanage_ino = 0; - if (!no_modify) { - tmp_elen = xfs_dir3_sf_entsize(mp, sfp, - sfep->namelen); - tmp_sfep = (xfs_dir2_sf_entry_t *) - ((__psint_t) sfep + tmp_elen); - tmp_len = max_size - ((__psint_t) tmp_sfep - - (__psint_t) sfp); - max_size -= tmp_elen; - bytes_deleted += tmp_elen; - - memmove(sfep, tmp_sfep, tmp_len); - - sfp->count -= 1; - memset((void *)((__psint_t)sfep + tmp_len), 0, - tmp_elen); - - /* - * set the tmp value to the current - * pointer so we'll process the entry - * we just moved up - */ - tmp_sfep = sfep; - - /* - * WARNING: drop the index i by one - * so it matches the decremented count for - * accurate comparisons in the loop test - */ - i--; + /* validate ftype field if supported */ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + __uint8_t dir_ftype; + __uint8_t ino_ftype; - *ino_dirty = 1; + dir_ftype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); + ino_ftype = get_inode_ftype(irec, ino_offset); - if (verbose) - do_warn(_("junking entry\n")); - else - do_warn("\n"); - } else { - do_warn(_("would junk entry\n")); + if (dir_ftype != ino_ftype) { + if (no_modify) { + do_warn( + _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ino, lino); + } else { + do_warn( + _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ino, lino); + xfs_dir3_sfe_put_ftype(mp, sfp, sfep, + ino_ftype); + dir_hash_update_ftype(hashtab, + (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)), + ino_ftype); + *ino_dirty = 1; + } } - } else if (lino > XFS_DIR2_MAX_SHORT_INUM) + } + + if (lino > XFS_DIR2_MAX_SHORT_INUM) i8++; /* - * go onto next entry unless we've just junked an - * entry in which the current entry pointer points - * to an unprocessed entry. have to take into entries - * with bad namelen into account in no modify mode since we - * calculate size based on next_sfep. + * go onto next entry - we have to take entries with bad namelen + * into account in no modify mode since we calculate size based + * on next_sfep. */ ASSERT(no_modify || bad_sfnamelen == 0); - - next_sfep = (tmp_sfep == NULL) - ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep - + ((!bad_sfnamelen) - ? xfs_dir3_sf_entsize(mp, sfp, sfep->namelen) - : xfs_dir3_sf_entsize(mp, sfp, namelen))) - : tmp_sfep; + next_sfep = (struct xfs_dir2_sf_entry *)((__psint_t)sfep + + (bad_sfnamelen + ? xfs_dir3_sf_entsize(mp, sfp, namelen) + : xfs_dir3_sf_entsize(mp, sfp, sfep->namelen))); } if (sfp->i8count != i8) { @@ -2501,6 +2602,8 @@ do_junkit: ino); } else { if (i8 == 0) { + struct xfs_dir2_sf_entry *tmp_sfep; + tmp_sfep = next_sfep; process_sf_dir2_fixi8(mp, sfp, &tmp_sfep); bytes_deleted += @@ -2518,8 +2621,7 @@ do_junkit: /* * sync up sizes if required */ - if (*ino_dirty) { - ASSERT(bytes_deleted > 0); + if (*ino_dirty && bytes_deleted > 0) { ASSERT(!no_modify); libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); ip->i_d.di_size -= bytes_deleted; diff --git a/repair/scan.c b/repair/scan.c index 49ed194..73b4581 100644 --- a/repair/scan.c +++ b/repair/scan.c @@ -866,9 +866,9 @@ _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n") for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { if (XFS_INOBT_IS_FREE_DISK(rp, j)) { nfree++; - add_aginode_uncertain(agno, ino + j, 1); + add_aginode_uncertain(mp, agno, ino + j, 1); } else { - add_aginode_uncertain(agno, ino + j, 0); + add_aginode_uncertain(mp, agno, ino + j, 0); } } } -- 1.8.4.rc3 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs