On Tue, Feb 23, 2021 at 07:05:03PM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > ... > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > --- > Version 2: > - repost manually without git/guilt mangling the patch author > - fix bug in XLOG_ICL_NEED_FUA definition that didn't manifest as an > ordering bug in generic/45[57] until testing the CIL pipelining > changes much later in the series. > > fs/xfs/xfs_log.c | 33 +++++++++++++++++++++++---------- > fs/xfs/xfs_log_cil.c | 7 ++++++- > fs/xfs/xfs_log_priv.h | 4 ++++ > 3 files changed, 33 insertions(+), 11 deletions(-) > > diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c > index 6c3fb6dcb505..08d68a6161ae 100644 > --- a/fs/xfs/xfs_log.c > +++ b/fs/xfs/xfs_log.c ... > @@ -1951,7 +1952,7 @@ xlog_sync( > unsigned int roundoff; /* roundoff to BB or stripe */ > uint64_t bno; > unsigned int size; > - bool need_flush = true, split = false; > + bool split = false; I find the mash of the logic change (i.e. when to flush) and rework of the bool to ->ic_flags changes in this patch unnecessarily convoluted. IMO, this should be two patches. > > ASSERT(atomic_read(&iclog->ic_refcnt) == 0); > > @@ -2009,13 +2010,14 @@ xlog_sync( > * synchronously here; for an internal log we can simply use the block > * layer state machine for preflushes. > */ > - if (log->l_targ != log->l_mp->m_ddev_targp || split) { > + if (log->l_targ != log->l_mp->m_ddev_targp || > + (split && (iclog->ic_flags & XLOG_ICL_NEED_FLUSH))) { > xfs_flush_bdev(log->l_mp->m_ddev_targp->bt_bdev); > - need_flush = false; > + iclog->ic_flags &= ~XLOG_ICL_NEED_FLUSH; > } > > xlog_verify_iclog(log, iclog, count); > - xlog_write_iclog(log, iclog, bno, count, need_flush); > + xlog_write_iclog(log, iclog, bno, count); > } > > /* > @@ -2469,10 +2471,21 @@ xlog_write( > ASSERT(log_offset <= iclog->ic_size - 1); > ptr = iclog->ic_datap + log_offset; > > - /* start_lsn is the first lsn written to. That's all we need. */ > + /* Start_lsn is the first lsn written to. */ > if (start_lsn && !*start_lsn) > *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); > > + /* > + * iclogs containing commit records or unmount records need > + * to issue ordering cache flushes and commit immediately > + * to stable storage to guarantee journal vs metadata ordering > + * is correctly maintained in the storage media. > + */ > + if (optype & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) { > + iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | > + XLOG_ICL_NEED_FUA); > + } > + > /* > * This loop writes out as many regions as can fit in the amount > * of space which was allocated by xlog_state_get_iclog_space(). > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > index 4093d2d0db7c..370da7c2bfc8 100644 > --- a/fs/xfs/xfs_log_cil.c > +++ b/fs/xfs/xfs_log_cil.c > @@ -894,10 +894,15 @@ xlog_cil_push_work( > > /* > * If the checkpoint spans multiple iclogs, wait for all previous > - * iclogs to complete before we submit the commit_iclog. > + * iclogs to complete before we submit the commit_iclog. If it is in the > + * same iclog as the start of the checkpoint, then we can skip the iclog > + * cache flush because there are no other iclogs we need to order > + * against. > */ > if (ctx->start_lsn != commit_lsn) > xlog_wait_on_iclog_lsn(commit_iclog, ctx->start_lsn); > + else > + commit_iclog->ic_flags &= ~XLOG_ICL_NEED_FLUSH; Is there a reason we're making these iclog changes (here and in xlog_write(), at least) outside of ->l_icloglock? I suspect this is safe atm due to the fact that the CIL push is single threaded via the workqueue, but the rest of the iclog management code is written with proper internal exclusion in mind. This seems like a bit of a landmine if the execution context is the only thing protecting us here... hm? Brian > > /* release the hounds! */ > xfs_log_release_iclog(commit_iclog); > diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h > index 10a41b1dd895..a77e00b7789a 100644 > --- a/fs/xfs/xfs_log_priv.h > +++ b/fs/xfs/xfs_log_priv.h > @@ -133,6 +133,9 @@ enum xlog_iclog_state { > > #define XLOG_COVER_OPS 5 > > +#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ > +#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ > + > /* Ticket reservation region accounting */ > #define XLOG_TIC_LEN_MAX 15 > > @@ -201,6 +204,7 @@ typedef struct xlog_in_core { > u32 ic_size; > u32 ic_offset; > enum xlog_iclog_state ic_state; > + unsigned int ic_flags; > char *ic_datap; /* pointer to iclog data */ > > /* Callback structures need their own cacheline */ >