On Tue, Apr 18, 2017 at 05:14:34PM -0700, Darrick J. Wong wrote: > Currently, the dir2 leaf block getdents function uses a complex state > tracking mechanism to create a shadow copy of the block mappings and > then uses the shadow copy to schedule readahead. Since the read and > readahead functions are perfectly capable of reading the mappings > themselves, we can tear all that out in favor of a simpler function that > simply keeps pushing the readahead window further out. > > Inspired-by: Dave Chinner <david@xxxxxxxxxxxxx> > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- I attempted to take a look at this yesterday (email has been dead) but noticed it didn't apply to for-next (w/ or w/o Eric's fix)..? > fs/xfs/xfs_dir2_readdir.c | 324 ++++++++++++--------------------------------- > 1 file changed, 87 insertions(+), 237 deletions(-) > > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > index 929e8b6..290c610 100644 > --- a/fs/xfs/xfs_dir2_readdir.c > +++ b/fs/xfs/xfs_dir2_readdir.c > @@ -243,215 +243,109 @@ xfs_dir2_block_getdents( ... > + while (ra_want > 0 && next_ra < last_da) { > + nmap = 1; > + error = xfs_bmapi_read(dp, next_ra, last_da - next_ra, > + &map, &nmap, 0); > + if (error || !nmap) > + break; > + next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount); > + while (map.br_startblock != HOLESTARTBLOCK && > + next_ra < map.br_startoff + map.br_blockcount) { > + xfs_dir3_data_readahead(dp, next_ra, -2); > + *ra_blk = next_ra; > + ra_want -= geo->fsbcount; > + next_ra += geo->fsbcount; > } FWIW and not having looked at the rest of the patch, it does look like the readahead window can stretch far beyond the expected size if you happen to have a large contiguous extent (IOW, the inner loop doesn't consider ra_want). Brian > + next_ra = map.br_startoff + map.br_blockcount; > } > blk_finish_plug(&plug); > > @@ -475,14 +369,14 @@ xfs_dir2_leaf_getdents( > xfs_dir2_data_hdr_t *hdr; /* data block header */ > xfs_dir2_data_entry_t *dep; /* data entry */ > xfs_dir2_data_unused_t *dup; /* unused entry */ > - int error = 0; /* error return value */ > - int length; /* temporary length value */ > - int byteoff; /* offset in current block */ > - xfs_dir2_off_t curoff; /* current overall offset */ > - xfs_dir2_off_t newoff; /* new curoff after new blk */ > char *ptr = NULL; /* pointer to current data */ > - struct xfs_dir2_leaf_map_info *map_info; > struct xfs_da_geometry *geo = args->geo; > + xfs_dablk_t rablk = 0; /* current readahead block */ > + xfs_dir2_off_t curoff; /* current overall offset */ > + int length; /* temporary length value */ > + int byteoff; /* offset in current block */ > + int lock_mode; > + int error = 0; /* error return value */ > > /* > * If the offset is at or past the largest allowed value, > @@ -492,30 +386,12 @@ xfs_dir2_leaf_getdents( > return 0; > > /* > - * Set up to bmap a number of blocks based on the caller's > - * buffer size, the directory block size, and the filesystem > - * block size. > - */ > - length = howmany(bufsize + geo->blksize, (1 << geo->fsblog)); > - map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) + > - (length * sizeof(struct xfs_bmbt_irec)), > - KM_SLEEP | KM_NOFS); > - map_info->map_size = length; > - > - /* > * Inside the loop we keep the main offset value as a byte offset > * in the directory file. > */ > curoff = xfs_dir2_dataptr_to_byte(ctx->pos); > > /* > - * Force this conversion through db so we truncate the offset > - * down to get the start of the data block. > - */ > - map_info->map_off = xfs_dir2_db_to_da(geo, > - xfs_dir2_byte_to_db(geo, curoff)); > - > - /* > * Loop over directory entries until we reach the end offset. > * Get more blocks and readahead as necessary. > */ > @@ -527,38 +403,13 @@ xfs_dir2_leaf_getdents( > * current buffer, need to get another one. > */ > if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { > - int lock_mode; > - bool trim_map = false; > - > - if (bp) { > - xfs_trans_brelse(args->trans, bp); > - bp = NULL; > - trim_map = true; > - } > - > lock_mode = xfs_ilock_data_map_shared(dp); > - error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, > - &curoff, &bp, trim_map); > + error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff, > + &rablk, &bp); > xfs_iunlock(dp, lock_mode); > - if (error || !map_info->map_valid) > + if (error || !bp) > break; > > - /* > - * Having done a read, we need to set a new offset. > - */ > - newoff = xfs_dir2_db_off_to_byte(geo, > - map_info->curdb, 0); > - /* > - * Start of the current block. > - */ > - if (curoff < newoff) > - curoff = newoff; > - /* > - * Make sure we're in the right block. > - */ > - else if (curoff > newoff) > - ASSERT(xfs_dir2_byte_to_db(geo, curoff) == > - map_info->curdb); > hdr = bp->b_addr; > xfs_dir3_data_check(dp, bp); > /* > @@ -643,7 +494,6 @@ xfs_dir2_leaf_getdents( > ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; > else > ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff; > - kmem_free(map_info); > if (bp) > xfs_trans_brelse(args->trans, bp); > return error; > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html