On Wed, Jul 04, 2018 at 01:00:22PM +1000, Dave Chinner wrote: > On Sun, Jun 24, 2018 at 12:25:04PM -0700, Darrick J. Wong wrote: > > +#include "scrub/repair.h" > > + > > +/* Inode fork block mapping (BMBT) repair. */ > > + > > +struct xfs_repair_bmap_extent { > > + struct list_head list; > > + struct xfs_rmap_irec rmap; > > + xfs_agnumber_t agno; > > +}; > > + > > +struct xfs_repair_bmap { > > + struct list_head *extlist; > > + struct xfs_repair_extent_list *btlist; > > + struct xfs_scrub_context *sc; > > + xfs_ino_t ino; > > + xfs_rfsblock_t otherfork_blocks; > > + xfs_rfsblock_t bmbt_blocks; > > + xfs_extnum_t extents; > > + int whichfork; > > +}; > > + > > +/* Record extents that belong to this inode's fork. */ > > +STATIC int > > +xfs_repair_bmap_extent_fn( > > + struct xfs_btree_cur *cur, > > + struct xfs_rmap_irec *rec, > > + void *priv) > > +{ > > + struct xfs_repair_bmap *rb = priv; > > + struct xfs_repair_bmap_extent *rbe; > > + struct xfs_mount *mp = cur->bc_mp; > > + xfs_fsblock_t fsbno; > > + int error = 0; > > + > > + if (xfs_scrub_should_terminate(rb->sc, &error)) > > + return error; > > + > > + /* Skip extents which are not owned by this inode and fork. */ > > + if (rec->rm_owner != rb->ino) { > > + return 0; > > + } else if (rb->whichfork == XFS_DATA_FORK && > > + (rec->rm_flags & XFS_RMAP_ATTR_FORK)) { > > + rb->otherfork_blocks += rec->rm_blockcount; > > + return 0; > > + } else if (rb->whichfork == XFS_ATTR_FORK && > > + !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) { > > + rb->otherfork_blocks += rec->rm_blockcount; > > + return 0; > > + } > > + > > + rb->extents++; > > Shouldn't this be incremented after we've checked for and processed > old BMBT blocks? Yes. > > + /* Delete the old bmbt blocks later. */ > > + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) { > > + fsbno = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, > > + rec->rm_startblock); > > + rb->bmbt_blocks += rec->rm_blockcount; > > + return xfs_repair_collect_btree_extent(rb->sc, rb->btlist, > > + fsbno, rec->rm_blockcount); > > + } > .... > > + > > +/* Check for garbage inputs. */ > > +STATIC int > > +xfs_repair_bmap_check_inputs( > > + struct xfs_scrub_context *sc, > > + int whichfork) > > +{ > > + ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK); > > + > > + /* Don't know how to repair the other fork formats. */ > > + if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS && > > + XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE) > > + return -EOPNOTSUPP; > > + > > + /* Only files, symlinks, and directories get to have data forks. */ > > + if (whichfork == XFS_DATA_FORK && !S_ISREG(VFS_I(sc->ip)->i_mode) && > > + !S_ISDIR(VFS_I(sc->ip)->i_mode) && !S_ISLNK(VFS_I(sc->ip)->i_mode)) > > + return -EINVAL; > > That'd be nicer as a switch statement. Will fix. > > + > > + /* If we somehow have delalloc extents, forget it. */ > > + if (whichfork == XFS_DATA_FORK && sc->ip->i_delayed_blks) > > + return -EBUSY; > > and this can be rolled into the same if (datafork) branch. > > .... > > + if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb)) > > + return -EOPNOTSUPP; > > Do this first? It's redundant, see xfs_repair_bmap_check_inputs. Will remove this one. > Hmmm, and if you do the attr fork check second then the rest > of the code is all data fork. i.e. > > if (!rmap) > return -EOPNOTSUPP > if (attrfork) { > if (no attr fork) > return .... > return 0 > } > /* now do all data fork checks */ > > This becomes a lot easier to follow. Ok. > > +/* > > + * Collect block mappings for this fork of this inode and decide if we have > > + * enough space to rebuild. Caller is responsible for cleaning up the list if > > + * anything goes wrong. > > + */ > > +STATIC int > > +xfs_repair_bmap_find_mappings( > > + struct xfs_scrub_context *sc, > > + int whichfork, > > + struct list_head *mapping_records, > > + struct xfs_repair_extent_list *old_bmbt_blocks, > > + xfs_rfsblock_t *old_bmbt_block_count, > > + xfs_rfsblock_t *otherfork_blocks) > > +{ > > + struct xfs_repair_bmap rb; > > + xfs_agnumber_t agno; > > + unsigned int resblks; > > + int error; > > + > > + memset(&rb, 0, sizeof(rb)); > > + rb.extlist = mapping_records; > > + rb.btlist = old_bmbt_blocks; > > + rb.ino = sc->ip->i_ino; > > + rb.whichfork = whichfork; > > + rb.sc = sc; > > + > > + /* Iterate the rmaps for extents. */ > > + for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) { > > + error = xfs_repair_bmap_scan_ag(&rb, agno); > > + if (error) > > + return error; > > + } > > + > > + /* > > + * Guess how many blocks we're going to need to rebuild an entire bmap > > + * from the number of extents we found, and pump up our transaction to > > + * have sufficient block reservation. > > + */ > > + resblks = xfs_bmbt_calc_size(sc->mp, rb.extents); > > + error = xfs_trans_reserve_more(sc->tp, resblks, 0); > > + if (error) > > + return error; > > I don't really like this, but I can't think of a way around needing > it at the moment. Me neither. (That is to say, I can't think of a way around it that doesn't involve backing all the way out to the setup function, which would be pretty gruesome.) > > + > > + *otherfork_blocks = rb.otherfork_blocks; > > + *old_bmbt_block_count = rb.bmbt_blocks; > > + return 0; > > +} > > + > > +/* Update the inode counters. */ > > +STATIC int > > +xfs_repair_bmap_reset_counters( > > + struct xfs_scrub_context *sc, > > + xfs_rfsblock_t old_bmbt_block_count, > > + xfs_rfsblock_t otherfork_blocks, > > + int *log_flags) > > +{ > > + int error; > > + > > + xfs_trans_ijoin(sc->tp, sc->ip, 0); > > + > > + /* > > + * Drop the block counts associated with this fork since we'll re-add > > + * them with the bmap routines later. > > + */ > > + sc->ip->i_d.di_nblocks = otherfork_blocks; > > This needs a little more explanation. i.e. that the rmap walk we > just performed for this fork also counted all the data and bmbt > blocks for the other fork so this is really only zeroing the block > count for the fork we are about to rebuild. /* * We're going to use the bmap routines to reconstruct a fork from rmap * records. Those functions increment di_nblocks for us, so we need to * subtract out all the data and bmbt blocks from the fork we're about * to rebuild. otherfork_blocks reflects all the data and bmbt blocks * for the other fork, so this assignment effectively performs the * subtraction for us. */ > > > +/* Initialize a new fork and implant it in the inode. */ > > +STATIC void > > +xfs_repair_bmap_reset_fork( > > + struct xfs_scrub_context *sc, > > + int whichfork, > > + bool has_mappings, > > + int *log_flags) > > +{ > > + /* Set us back to extents format with zero records. */ > > + XFS_IFORK_FMT_SET(sc->ip, whichfork, XFS_DINODE_FMT_EXTENTS); > > + XFS_IFORK_NEXT_SET(sc->ip, whichfork, 0); > > + > > + /* Reinitialize the on-disk fork. */ > > I don't think this touches the on-disk fork - it's re-initialising > the in-memory fork. Will fix. > > + if (XFS_IFORK_PTR(sc->ip, whichfork) != NULL) > > + xfs_idestroy_fork(sc->ip, whichfork); > > + if (whichfork == XFS_DATA_FORK) { > > + memset(&sc->ip->i_df, 0, sizeof(struct xfs_ifork)); > > + sc->ip->i_df.if_flags |= XFS_IFEXTENTS; > > + } else if (whichfork == XFS_ATTR_FORK) { > > + if (has_mappings) { > > + sc->ip->i_afp = NULL; > > + } else { > > + sc->ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, > > + KM_SLEEP); > > + sc->ip->i_afp->if_flags |= XFS_IFEXTENTS; > > + } > > + } /* * Now that we've reinitialized the in-memory fork and set the inode * back to extents format with zero extents, any extents that we * subsequently map into the file will reinitialize the on-disk fork * area for us. All we have to do is log the inode core to preserve * the format and extent count fields. */ > > + *log_flags |= XFS_ILOG_CORE; > > +} > ...... > > > +/* Repair an inode fork. */ > > +STATIC int > > +xfs_repair_bmap( > > + struct xfs_scrub_context *sc, > > + int whichfork) > > +{ > > + struct list_head mapping_records; > > + struct xfs_repair_extent_list old_bmbt_blocks; > > + struct xfs_inode *ip = sc->ip; > > + xfs_rfsblock_t old_bmbt_block_count; > > + xfs_rfsblock_t otherfork_blocks; > > + int log_flags = 0; > > + int error = 0; > > + > > + error = xfs_repair_bmap_check_inputs(sc, whichfork); > > + if (error) > > + return error; > > + > > + /* > > + * If this is a file data fork, wait for all pending directio to > > + * complete, then tear everything out of the page cache. > > + */ > > + if (S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { > > + inode_dio_wait(VFS_I(ip)); > > + truncate_inode_pages(VFS_I(ip)->i_mapping, 0); > > + } > > Why would we be waiting only for DIO here? Haven't we already locked > up the inode, flushed dirty data, waited for dio and invalidated the > page cache when we called xfs_scrub_setup_inode_bmap() prior to > doing this work? Extra paranoia? IOWs I don't know why. :) Probably we should xfs_break_layouts here though. --D > Cheers, > > Dave. > -- > Dave Chinner > david@xxxxxxxxxxxxx > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html