When using fadump (fireware assist dump) mode on powerpc, a mismatch between grub xfs driver and kernel xfs driver has been obsevered. Note: fadump boots up in the following sequence: fireware -> grub reads kernel and initramfs -> kernel boots. The process to reproduce this mismatch: - On powerpc, boot kernel with fadump=on and edit /etc/kdump.conf. - Replacing "path /var/crash" with "path /var/crashnew", then, "kdumpctl restart" to rebuild the initramfs. Detail about the rebuilding looks like: mkdumprd /boot/initramfs-`uname -r`.img.tmp; mv /boot/initramfs-`uname -r`.img.tmp /boot/initramfs-`uname -r`.img sync - "echo c >/proc/sysrq-trigger". The result: The dump image will not be saved under /var/crashnew/* as expected, but still saved under /var/crash. The root cause: As Eric pointed out that on xfs, 'sync' ensures the consistency by writing back metadata to xlog, but not necessary to fsblock. This raises issue if grub can not replay the xlog before accessing the xfs files. Since the above dir entry of initramfs should be saved as inline data with xfs_inode, so xfs_fs_sync_fs() does not guarantee it written to fsblock. umount can be used to write metadata fsblock, but the filesystem can not be umounted if still in use. There are two ways to fix this mismatch, either grub or xfs. It may be easier to do this in xfs side by introducing an interface to flush metadata to fsblock explicitly. With this patch, metadata can be written to fsblock by: # update AIL sync # new introduced interface to flush metadata to fsblock mount -o remount,metasync mountpoint Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx> Cc: "Darrick J. Wong" <darrick.wong@xxxxxxxxxx> Cc: Dave Chinner <dchinner@xxxxxxxxxx> Cc: Eric Sandeen <esandeen@xxxxxxxxxx> Cc: Hari Bathini <hbathini@xxxxxxxxxxxxx> Cc: linuxppc-dev@xxxxxxxxxxxxxxxx To: linux-xfs@xxxxxxxxxxxxxxx --- fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_super.c | 15 ++++++++++++++- fs/xfs/xfs_trans.h | 2 ++ fs/xfs/xfs_trans_ail.c | 26 +++++++++++++++++++++++++- fs/xfs/xfs_trans_priv.h | 1 + 5 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index fdb60e0..85f32e6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -243,6 +243,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams allocator */ #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ +#define XFS_MOUNT_METASYNC (1ull << 26) /* write meta to fsblock */ #define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8d1df9f..41df810 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -59,7 +59,7 @@ enum { Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, - Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, + Opt_discard, Opt_nodiscard, Opt_dax, Opt_metasync, Opt_err }; static const match_table_t tokens = { @@ -106,6 +106,7 @@ static const match_table_t tokens = { {Opt_discard, "discard"}, /* Discard unused blocks */ {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ + {Opt_metasync, "metasync"}, /* one shot to write meta to fsblock */ {Opt_err, NULL}, }; @@ -338,6 +339,9 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DAX; break; #endif + case Opt_metasync: + mp->m_flags |= XFS_MOUNT_METASYNC; + break; default: xfs_warn(mp, "unknown mount option [%s].", p); return -EINVAL; @@ -1259,6 +1263,9 @@ xfs_fs_remount( mp->m_flags |= XFS_MOUNT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); break; + case Opt_metasync: + mp->m_flags |= XFS_MOUNT_METASYNC; + break; default: /* * Logically we would return an error here to prevent @@ -1286,6 +1293,12 @@ xfs_fs_remount( } } + if (mp->m_flags & XFS_MOUNT_METASYNC) { + xfs_ail_push_sync(mp->m_ail); + /* one shot flag */ + mp->m_flags &= ~XFS_MOUNT_METASYNC; + } + /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { if (mp->m_flags & XFS_MOUNT_NORECOVERY) { diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 64d7f17..fcdb902 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -242,6 +242,8 @@ void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); +void xfs_ail_push_sync(struct xfs_ail *ailp); + extern kmem_zone_t *xfs_trans_zone; #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 6ccfd75..b8d8df1 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -488,7 +488,11 @@ xfsaild_push( xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->ail_lock); - if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list)) + if (unlikely(mp->m_flags & XFS_MOUNT_METASYNC)) { + xfs_buf_delwri_submit(&ailp->ail_buf_list); + ailp->ail_log_flush++; + wake_up_all(&ailp->pushed_que); + } else if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list)) ailp->ail_log_flush++; if (!count || XFS_LSN_CMP(lsn, target) >= 0) { @@ -641,6 +645,25 @@ xfs_ail_push( wake_up_process(ailp->ail_task); } +void +xfs_ail_push_sync( + struct xfs_ail *ailp) +{ + xfs_lsn_t sync_lsn; + DEFINE_WAIT(wait); + + sync_lsn = xfs_ail_max_lsn(ailp); + for (;;) { + xfs_ail_push(ailp, sync_lsn); + prepare_to_wait(&ailp->pushed_que, &wait, TASK_INTERRUPTIBLE); + if (XFS_LSN_CMP(READ_ONCE(ailp->ail_target_prev), + sync_lsn) >= 0) + break; + schedule(); + } + finish_wait(&ailp->pushed_que, &wait); +} + /* * Push out all items in the AIL immediately */ @@ -834,6 +857,7 @@ xfs_trans_ail_init( spin_lock_init(&ailp->ail_lock); INIT_LIST_HEAD(&ailp->ail_buf_list); init_waitqueue_head(&ailp->ail_empty); + init_waitqueue_head(&ailp->pushed_que); ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s", ailp->ail_mount->m_fsname); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 2e073c1..9fe3cc6 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -61,6 +61,7 @@ struct xfs_ail { int ail_log_flush; struct list_head ail_buf_list; wait_queue_head_t ail_empty; + wait_queue_head_t pushed_que; }; /* -- 2.7.5