On Mon, Jun 05, 2023 at 12:22:35 PM -0700, Darrick J. Wong wrote: > On Mon, Jun 05, 2023 at 02:49:49PM +0530, Chandan Babu R wrote: >> On Tue, May 23, 2023 at 09:48:07 AM -0700, Darrick J. Wong wrote: >> > On Tue, May 23, 2023 at 02:30:31PM +0530, Chandan Babu R wrote: >> >> This commit changes set_cur() to be able to read from external log >> >> devices. This is required by a future commit which will add the ability to >> >> dump metadata from external log devices. >> >> >> >> Signed-off-by: Chandan Babu R <chandan.babu@xxxxxxxxxx> >> >> --- >> >> db/io.c | 22 +++++++++++++++------- >> >> db/type.c | 2 ++ >> >> db/type.h | 2 +- >> >> 3 files changed, 18 insertions(+), 8 deletions(-) >> >> >> >> diff --git a/db/io.c b/db/io.c >> >> index 3d2572364..e8c8f57e2 100644 >> >> --- a/db/io.c >> >> +++ b/db/io.c >> >> @@ -516,12 +516,13 @@ set_cur( >> >> int ring_flag, >> >> bbmap_t *bbmap) >> >> { >> >> - struct xfs_buf *bp; >> >> - xfs_ino_t dirino; >> >> - xfs_ino_t ino; >> >> - uint16_t mode; >> >> + struct xfs_buftarg *btargp; >> >> + struct xfs_buf *bp; >> >> + xfs_ino_t dirino; >> >> + xfs_ino_t ino; >> >> + uint16_t mode; >> >> const struct xfs_buf_ops *ops = type ? type->bops : NULL; >> >> - int error; >> >> + int error; >> >> >> >> if (iocur_sp < 0) { >> >> dbprintf(_("set_cur no stack element to set\n")); >> >> @@ -534,7 +535,14 @@ set_cur( >> >> pop_cur(); >> >> push_cur(); >> >> >> >> + btargp = mp->m_ddev_targp; >> >> + if (type->typnm == TYP_ELOG) { >> > >> > This feels like a layering violation, see below... >> > >> >> + ASSERT(mp->m_ddev_targp != mp->m_logdev_targp); >> >> + btargp = mp->m_logdev_targp; >> >> + } >> >> + >> >> if (bbmap) { >> >> + ASSERT(btargp == mp->m_ddev_targp); >> >> #ifdef DEBUG_BBMAP >> >> int i; >> >> printf(_("xfs_db got a bbmap for %lld\n"), (long long)blknum); >> >> @@ -548,11 +556,11 @@ set_cur( >> >> if (!iocur_top->bbmap) >> >> return; >> >> memcpy(iocur_top->bbmap, bbmap, sizeof(struct bbmap)); >> >> - error = -libxfs_buf_read_map(mp->m_ddev_targp, bbmap->b, >> >> + error = -libxfs_buf_read_map(btargp, bbmap->b, >> >> bbmap->nmaps, LIBXFS_READBUF_SALVAGE, &bp, >> >> ops); >> >> } else { >> >> - error = -libxfs_buf_read(mp->m_ddev_targp, blknum, len, >> >> + error = -libxfs_buf_read(btargp, blknum, len, >> >> LIBXFS_READBUF_SALVAGE, &bp, ops); >> >> iocur_top->bbmap = NULL; >> >> } >> >> diff --git a/db/type.c b/db/type.c >> >> index efe704456..cc406ae4c 100644 >> >> --- a/db/type.c >> >> +++ b/db/type.c >> >> @@ -100,6 +100,7 @@ static const typ_t __typtab_crc[] = { >> >> { TYP_INODE, "inode", handle_struct, inode_crc_hfld, >> >> &xfs_inode_buf_ops, TYP_F_CRC_FUNC, xfs_inode_set_crc }, >> >> { TYP_LOG, "log", NULL, NULL, NULL, TYP_F_NO_CRC_OFF }, >> >> + { TYP_ELOG, "elog", NULL, NULL, NULL, TYP_F_NO_CRC_OFF }, >> > >> > It strikes me as a little odd to create a new /metadata type/ to >> > reference the external log. If we someday want to add a bunch of new >> > types to xfs_db to allow us to decode/fuzz the log contents, wouldn't we >> > have to add them twice -- once for decoding an internal log, and again >> > to decode the external log? And the only difference between the two >> > would be the buftarg, right? The set_cur caller needs to know the >> > daddr already, so I don't think it's unreasonable for the caller to have >> > to know which buftarg too. >> > >> > IOWs, I think set_cur ought to take the buftarg, the typ_t, and a daddr >> > as explicit arguments. But maybe others have opinions? >> > >> > e.g. rename set_cur to __set_cur and make it take a buftarg, and then: >> > >> > int >> > set_log_cur( >> > const typ_t *type, >> > xfs_daddr_t blknum, >> > int len, >> > int ring_flag, >> > bbmap_t *bbmap) >> > { >> > if (!mp->m_logdev_targp->bt_bdev || >> > mp->m_logdev_targp->bt_bdev == mp->m_ddev_targp->bt_bdev) { >> > printf(_("external log device not loaded, use -l.\n")); >> > return ENODEV; >> > } >> > >> > __set_cur(mp->m_logdev_targp, type, blknum, len, ring_flag, bbmap); >> > return 0; >> > } >> > >> > and then metadump can do something like .... >> > >> > error = set_log_cur(&typtab[TYP_LOG], 0, >> > mp->m_sb.sb_logblocks * blkbb, DB_RING_IGN, NULL); >> > >> >> Darrick, How about implementing the following instead, >> >> static void >> __set_cur( >> struct xfs_buftarg *btargp, >> const typ_t *type, >> xfs_daddr_t blknum, >> int len, >> int ring_flag, >> bbmap_t *bbmap) >> { >> struct xfs_buf *bp; >> xfs_ino_t dirino; >> xfs_ino_t ino; >> uint16_t mode; >> const struct xfs_buf_ops *ops = type ? type->bops : NULL; >> int error; >> >> if (iocur_sp < 0) { >> dbprintf(_("set_cur no stack element to set\n")); >> return; >> } >> >> ino = iocur_top->ino; >> dirino = iocur_top->dirino; >> mode = iocur_top->mode; >> pop_cur(); >> push_cur(); >> >> if (bbmap) { >> #ifdef DEBUG_BBMAP >> int i; >> printf(_("xfs_db got a bbmap for %lld\n"), (long long)blknum); >> printf(_("\tblock map")); >> for (i = 0; i < bbmap->nmaps; i++) >> printf(" %lld:%d", (long long)bbmap->b[i].bm_bn, >> bbmap->b[i].bm_len); >> printf("\n"); >> #endif >> iocur_top->bbmap = malloc(sizeof(struct bbmap)); >> if (!iocur_top->bbmap) >> return; >> memcpy(iocur_top->bbmap, bbmap, sizeof(struct bbmap)); >> error = -libxfs_buf_read_map(btargp, bbmap->b, >> bbmap->nmaps, LIBXFS_READBUF_SALVAGE, &bp, >> ops); >> } else { >> error = -libxfs_buf_read(btargp, blknum, len, >> LIBXFS_READBUF_SALVAGE, &bp, ops); >> iocur_top->bbmap = NULL; >> } >> >> /* >> * Salvage mode means that we still get a buffer even if the verifier >> * says the metadata is corrupt. Therefore, the only errors we should >> * get are for IO errors or runtime errors. >> */ >> if (error) >> return; >> iocur_top->buf = bp->b_addr; >> iocur_top->bp = bp; >> if (!ops) { >> bp->b_ops = NULL; >> bp->b_flags |= LIBXFS_B_UNCHECKED; >> } >> >> iocur_top->bb = blknum; >> iocur_top->blen = len; >> iocur_top->boff = 0; >> iocur_top->data = iocur_top->buf; >> iocur_top->len = BBTOB(len); >> iocur_top->off = blknum << BBSHIFT; >> iocur_top->typ = cur_typ = type; >> iocur_top->ino = ino; >> iocur_top->dirino = dirino; >> iocur_top->mode = mode; >> iocur_top->ino_buf = 0; >> iocur_top->dquot_buf = 0; >> >> /* store location in ring */ >> if (ring_flag) >> ring_add(); >> } >> >> void >> set_cur( >> const typ_t *type, >> xfs_daddr_t blknum, >> int len, >> int ring_flag, >> bbmap_t *bbmap) >> { >> struct xfs_buftarg *btargp = mp->m_ddev_targp; >> >> if (type->typnm == TYP_LOG && >> mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev) { >> ASSERT(mp->m_sb.sb_logstart == 0); >> btargp = mp->m_logdev_targp; >> } >> >> __set_cur(btargp, type, blknum, len, ring_flag, bbmap); >> } >> >> i.e. We continue to have just one type for the log and set_cur() will >> internally decide which buftarg to pass to __set_cur(). Please let me know >> your opinion on this approach. > > If I'm understanding this correctly, you're proposing to push the > buftarg decision down into set_cur instead of encoding it in the typ_t > information? > > I still don't like this, because that decision should be made by the > callers of set_*cur, not down in the io cursor handling code. > > Take a look at the users of set_log_cur and set_rt_cur in the 'dblock' > command as of djwong-wtf: > https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfsprogs-dev.git/tree/db/block.c?h=djwong-wtf_2023-06-05#n217 > > Notice this bit here: > > static inline bool > is_rtfile( > struct xfs_dinode *dip) > { > return dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME); > } > > static int > dblock_f(...) > { > ... > > if (is_rtfile(iocur_top->data)) > set_rt_cur(&typtab[type], (int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), > nb * blkbb, DB_RING_ADD, > nex > 1 ? &bbmap : NULL); > else > set_cur(&typtab[type], (int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), > nb * blkbb, DB_RING_ADD, > nex > 1 ? &bbmap : NULL); > > xfs_db can now access the data blocks of realtime files, because we have > the high level logic to decide which buftarg based on the di_flags set > in the inode core. TYP_DATA doesn't know anything at all about inodes > or data blocks or whatever -- down at the level of "data block" we don't > actually have the context we need to select a device. > Ok. Now I understand. Thanks for the explaination. -- chandan