On Tue, Mar 12, 2024 at 07:11:01PM -0700, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@xxxxxxxxxx> > > Amortize the cost of indirect calls further by loading a batch of > records into a new btree block instead of one record per ->get_record > call. On a rmap btree with 3.9 million records, this reduces the > runtime of xfs_btree_bload by 3% for xfsprogs. For the upcoming online > repair functionality, this will reduce runtime by 6% when spectre > mitigations are enabled in the kernel. > > Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> Reviewed-by: Bill O'Donnell <bodonnel@xxxxxxxxxx> > --- > repair/agbtree.c | 161 ++++++++++++++++++++++++++++++------------------------ > 1 file changed, 90 insertions(+), 71 deletions(-) > > > diff --git a/repair/agbtree.c b/repair/agbtree.c > index 981d8e340bf2..e014e216e0a5 100644 > --- a/repair/agbtree.c > +++ b/repair/agbtree.c > @@ -220,15 +220,19 @@ get_bnobt_records( > struct bt_rebuild *btr = priv; > struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a; > union xfs_btree_rec *block_rec; > + unsigned int loaded; > > - btr->bno_rec = get_bno_rec(cur, btr->bno_rec); > - arec->ar_startblock = btr->bno_rec->ex_startblock; > - arec->ar_blockcount = btr->bno_rec->ex_blockcount; > - btr->freeblks += btr->bno_rec->ex_blockcount; > + for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { > + btr->bno_rec = get_bno_rec(cur, btr->bno_rec); > + arec->ar_startblock = btr->bno_rec->ex_startblock; > + arec->ar_blockcount = btr->bno_rec->ex_blockcount; > + btr->freeblks += btr->bno_rec->ex_blockcount; > > - block_rec = libxfs_btree_rec_addr(cur, idx, block); > - cur->bc_ops->init_rec_from_cur(cur, block_rec); > - return 1; > + block_rec = libxfs_btree_rec_addr(cur, idx, block); > + cur->bc_ops->init_rec_from_cur(cur, block_rec); > + } > + > + return loaded; > } > > void > @@ -388,65 +392,72 @@ get_inobt_records( > { > struct bt_rebuild *btr = priv; > struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; > - struct ino_tree_node *ino_rec; > - union xfs_btree_rec *block_rec; > - int inocnt = 0; > - int finocnt = 0; > - int k; > - > - btr->ino_rec = ino_rec = get_ino_rec(cur, btr->ino_rec); > - > - /* Transform the incore record into an on-disk record. */ > - irec->ir_startino = ino_rec->ino_startnum; > - irec->ir_free = ino_rec->ir_free; > - > - for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++) { > - ASSERT(is_inode_confirmed(ino_rec, k)); > - > - if (is_inode_sparse(ino_rec, k)) > - continue; > - if (is_inode_free(ino_rec, k)) > - finocnt++; > - inocnt++; > - } > + unsigned int loaded = 0; > + > + while (loaded < nr_wanted) { > + struct ino_tree_node *ino_rec; > + union xfs_btree_rec *block_rec; > + int inocnt = 0; > + int finocnt = 0; > + int k; > + > + btr->ino_rec = ino_rec = get_ino_rec(cur, btr->ino_rec); > > - irec->ir_count = inocnt; > - irec->ir_freecount = finocnt; > - > - if (xfs_has_sparseinodes(cur->bc_mp)) { > - uint64_t sparse; > - int spmask; > - uint16_t holemask; > - > - /* > - * Convert the 64-bit in-core sparse inode state to the > - * 16-bit on-disk holemask. > - */ > - holemask = 0; > - spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1; > - sparse = ino_rec->ir_sparse; > - for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) { > - if (sparse & spmask) { > - ASSERT((sparse & spmask) == spmask); > - holemask |= (1 << k); > - } else > - ASSERT((sparse & spmask) == 0); > - sparse >>= XFS_INODES_PER_HOLEMASK_BIT; > + /* Transform the incore record into an on-disk record. */ > + irec->ir_startino = ino_rec->ino_startnum; > + irec->ir_free = ino_rec->ir_free; > + > + for (k = 0; k < sizeof(xfs_inofree_t) * NBBY; k++) { > + ASSERT(is_inode_confirmed(ino_rec, k)); > + > + if (is_inode_sparse(ino_rec, k)) > + continue; > + if (is_inode_free(ino_rec, k)) > + finocnt++; > + inocnt++; > } > > - irec->ir_holemask = holemask; > - } else { > - irec->ir_holemask = 0; > - } > + irec->ir_count = inocnt; > + irec->ir_freecount = finocnt; > > - if (btr->first_agino == NULLAGINO) > - btr->first_agino = ino_rec->ino_startnum; > - btr->freecount += finocnt; > - btr->count += inocnt; > + if (xfs_has_sparseinodes(cur->bc_mp)) { > + uint64_t sparse; > + int spmask; > + uint16_t holemask; > + > + /* > + * Convert the 64-bit in-core sparse inode state to the > + * 16-bit on-disk holemask. > + */ > + holemask = 0; > + spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1; > + sparse = ino_rec->ir_sparse; > + for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) { > + if (sparse & spmask) { > + ASSERT((sparse & spmask) == spmask); > + holemask |= (1 << k); > + } else > + ASSERT((sparse & spmask) == 0); > + sparse >>= XFS_INODES_PER_HOLEMASK_BIT; > + } > + > + irec->ir_holemask = holemask; > + } else { > + irec->ir_holemask = 0; > + } > + > + if (btr->first_agino == NULLAGINO) > + btr->first_agino = ino_rec->ino_startnum; > + btr->freecount += finocnt; > + btr->count += inocnt; > + > + block_rec = libxfs_btree_rec_addr(cur, idx, block); > + cur->bc_ops->init_rec_from_cur(cur, block_rec); > + loaded++; > + idx++; > + } > > - block_rec = libxfs_btree_rec_addr(cur, idx, block); > - cur->bc_ops->init_rec_from_cur(cur, block_rec); > - return 1; > + return loaded; > } > > /* Initialize both inode btree cursors as needed. */ > @@ -585,13 +596,17 @@ get_rmapbt_records( > struct xfs_rmap_irec *rec; > struct bt_rebuild *btr = priv; > union xfs_btree_rec *block_rec; > + unsigned int loaded; > > - rec = pop_slab_cursor(btr->slab_cursor); > - memcpy(&cur->bc_rec.r, rec, sizeof(struct xfs_rmap_irec)); > + for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { > + rec = pop_slab_cursor(btr->slab_cursor); > + memcpy(&cur->bc_rec.r, rec, sizeof(struct xfs_rmap_irec)); > > - block_rec = libxfs_btree_rec_addr(cur, idx, block); > - cur->bc_ops->init_rec_from_cur(cur, block_rec); > - return 1; > + block_rec = libxfs_btree_rec_addr(cur, idx, block); > + cur->bc_ops->init_rec_from_cur(cur, block_rec); > + } > + > + return loaded; > } > > /* Set up the rmap rebuild parameters. */ > @@ -663,13 +678,17 @@ get_refcountbt_records( > struct xfs_refcount_irec *rec; > struct bt_rebuild *btr = priv; > union xfs_btree_rec *block_rec; > + unsigned int loaded; > > - rec = pop_slab_cursor(btr->slab_cursor); > - memcpy(&cur->bc_rec.rc, rec, sizeof(struct xfs_refcount_irec)); > + for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { > + rec = pop_slab_cursor(btr->slab_cursor); > + memcpy(&cur->bc_rec.rc, rec, sizeof(struct xfs_refcount_irec)); > > - block_rec = libxfs_btree_rec_addr(cur, idx, block); > - cur->bc_ops->init_rec_from_cur(cur, block_rec); > - return 1; > + block_rec = libxfs_btree_rec_addr(cur, idx, block); > + cur->bc_ops->init_rec_from_cur(cur, block_rec); > + } > + > + return loaded; > } > > /* Set up the refcount rebuild parameters. */ > >