On Thu, Feb 06, 2020 at 12:38:17PM -0700, Allison Collins wrote: > > > On 2/4/20 5:47 PM, Darrick J. Wong wrote: > > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > > > Add a new function that will ensure that everything we changed has > > landed on stable media, and report the results. Teach the individual > > programs to report when things go wrong. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > Ok, looks functionally sane to me: Thanks for reviewing the three series! --D > Reviewed-by: Allison Collins <allison.henderson@xxxxxxxxxx> > > > --- > > db/init.c | 14 ++++++++++++++ > > include/xfs_mount.h | 3 +++ > > libxfs/init.c | 43 +++++++++++++++++++++++++++++++++++++++++++ > > libxfs/libxfs_io.h | 2 ++ > > libxfs/rdwr.c | 27 +++++++++++++++++++++++++-- > > mkfs/xfs_mkfs.c | 15 +++++++++++++++ > > repair/xfs_repair.c | 35 +++++++++++++++++++++++++++++++++++ > > 7 files changed, 137 insertions(+), 2 deletions(-) > > > > > > diff --git a/db/init.c b/db/init.c > > index 0ac37368..e92de232 100644 > > --- a/db/init.c > > +++ b/db/init.c > > @@ -184,6 +184,7 @@ main( > > char *input; > > char **v; > > int start_iocur_sp; > > + int d, l, r; > > init(argc, argv); > > start_iocur_sp = iocur_sp; > > @@ -216,6 +217,19 @@ main( > > */ > > while (iocur_sp > start_iocur_sp) > > pop_cur(); > > + > > + libxfs_flush_devices(mp, &d, &l, &r); > > + if (d) > > + fprintf(stderr, _("%s: cannot flush data device (%d).\n"), > > + progname, d); > > + if (l) > > + fprintf(stderr, _("%s: cannot flush log device (%d).\n"), > > + progname, l); > > + if (r) > > + fprintf(stderr, _("%s: cannot flush realtime device (%d).\n"), > > + progname, r); > > + > > + > > libxfs_umount(mp); > > if (x.ddev) > > libxfs_device_close(x.ddev); > > diff --git a/include/xfs_mount.h b/include/xfs_mount.h > > index 29b3cc1b..c80aaf69 100644 > > --- a/include/xfs_mount.h > > +++ b/include/xfs_mount.h > > @@ -187,4 +187,7 @@ extern xfs_mount_t *libxfs_mount (xfs_mount_t *, xfs_sb_t *, > > extern void libxfs_umount (xfs_mount_t *); > > extern void libxfs_rtmount_destroy (xfs_mount_t *); > > +void libxfs_flush_devices(struct xfs_mount *mp, int *datadev, int *logdev, > > + int *rtdev); > > + > > #endif /* __XFS_MOUNT_H__ */ > > diff --git a/libxfs/init.c b/libxfs/init.c > > index a0d4b7f4..d1d3f4df 100644 > > --- a/libxfs/init.c > > +++ b/libxfs/init.c > > @@ -569,6 +569,8 @@ libxfs_buftarg_alloc( > > } > > btp->bt_mount = mp; > > btp->dev = dev; > > + btp->lost_writes = false; > > + > > return btp; > > } > > @@ -791,6 +793,47 @@ libxfs_rtmount_destroy(xfs_mount_t *mp) > > mp->m_rsumip = mp->m_rbmip = NULL; > > } > > +static inline int > > +libxfs_flush_buftarg( > > + struct xfs_buftarg *btp) > > +{ > > + if (btp->lost_writes) > > + return -ENOTRECOVERABLE; > > + > > + return libxfs_blkdev_issue_flush(btp); > > +} > > + > > +/* > > + * Purge the buffer cache to write all dirty buffers to disk and free all > > + * incore buffers. Buffers that cannot be written will cause the lost_writes > > + * flag to be set in the buftarg. If there were no lost writes, flush the > > + * device to make sure the writes made it to stable storage. > > + * > > + * For each device, the return code will be set to -ENOTRECOVERABLE if we > > + * couldn't write something to disk; or the results of the block device flush > > + * operation. > > + */ > > +void > > +libxfs_flush_devices( > > + struct xfs_mount *mp, > > + int *datadev, > > + int *logdev, > > + int *rtdev) > > +{ > > + *datadev = *logdev = *rtdev = 0; > > + > > + libxfs_bcache_purge(); > > + > > + if (mp->m_ddev_targp) > > + *datadev = libxfs_flush_buftarg(mp->m_ddev_targp); > > + > > + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) > > + *logdev = libxfs_flush_buftarg(mp->m_logdev_targp); > > + > > + if (mp->m_rtdev_targp) > > + *rtdev = libxfs_flush_buftarg(mp->m_rtdev_targp); > > +} > > + > > /* > > * Release any resource obtained during a mount. > > */ > > diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h > > index 579df52b..fc0fd060 100644 > > --- a/libxfs/libxfs_io.h > > +++ b/libxfs/libxfs_io.h > > @@ -23,10 +23,12 @@ struct xfs_perag; > > struct xfs_buftarg { > > struct xfs_mount *bt_mount; > > dev_t dev; > > + bool lost_writes; > > }; > > extern void libxfs_buftarg_init(struct xfs_mount *mp, dev_t ddev, > > dev_t logdev, dev_t rtdev); > > +int libxfs_blkdev_issue_flush(struct xfs_buftarg *btp); > > #define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) > > diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c > > index 8b47d438..92e497f9 100644 > > --- a/libxfs/rdwr.c > > +++ b/libxfs/rdwr.c > > @@ -17,6 +17,7 @@ > > #include "xfs_inode_fork.h" > > #include "xfs_inode.h" > > #include "xfs_trans.h" > > +#include "libfrog/platform.h" > > #include "libxfs.h" /* for LIBXFS_EXIT_ON_FAILURE */ > > @@ -1227,9 +1228,11 @@ libxfs_brelse( > > if (!bp) > > return; > > - if (bp->b_flags & LIBXFS_B_DIRTY) > > + if (bp->b_flags & LIBXFS_B_DIRTY) { > > fprintf(stderr, > > "releasing dirty buffer to free list!\n"); > > + bp->b_target->lost_writes = true; > > + } > > pthread_mutex_lock(&xfs_buf_freelist.cm_mutex); > > list_add(&bp->b_node.cn_mru, &xfs_buf_freelist.cm_list); > > @@ -1248,9 +1251,11 @@ libxfs_bulkrelse( > > return 0 ; > > list_for_each_entry(bp, list, b_node.cn_mru) { > > - if (bp->b_flags & LIBXFS_B_DIRTY) > > + if (bp->b_flags & LIBXFS_B_DIRTY) { > > fprintf(stderr, > > "releasing dirty buffer (bulk) to free list!\n"); > > + bp->b_target->lost_writes = true; > > + } > > count++; > > } > > @@ -1479,6 +1484,24 @@ libxfs_irele( > > kmem_cache_free(xfs_inode_zone, ip); > > } > > +/* > > + * Flush everything dirty in the kernel and disk write caches to stable media. > > + * Returns 0 for success or a negative error code. > > + */ > > +int > > +libxfs_blkdev_issue_flush( > > + struct xfs_buftarg *btp) > > +{ > > + int fd, ret; > > + > > + if (btp->dev == 0) > > + return 0; > > + > > + fd = libxfs_device_to_fd(btp->dev); > > + ret = platform_flush_device(fd, btp->dev); > > + return ret ? -errno : 0; > > +} > > + > > /* > > * Write out a buffer list synchronously. > > * > > diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c > > index 1f5d2105..6b182264 100644 > > --- a/mkfs/xfs_mkfs.c > > +++ b/mkfs/xfs_mkfs.c > > @@ -3644,6 +3644,7 @@ main( > > char *protofile = NULL; > > char *protostring = NULL; > > int worst_freelist = 0; > > + int d, l, r; > > struct libxfs_xinit xi = { > > .isdirect = LIBXFS_DIRECT, > > @@ -3940,6 +3941,20 @@ main( > > (XFS_BUF_TO_SBP(buf))->sb_inprogress = 0; > > libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE); > > + /* Make sure our new fs made it to stable storage. */ > > + libxfs_flush_devices(mp, &d, &l, &r); > > + if (d) > > + fprintf(stderr, _("%s: cannot flush data device (%d).\n"), > > + progname, d); > > + if (l) > > + fprintf(stderr, _("%s: cannot flush log device (%d).\n"), > > + progname, l); > > + if (r) > > + fprintf(stderr, _("%s: cannot flush realtime device (%d).\n"), > > + progname, r); > > + if (d || l || r) > > + return 1; > > + > > libxfs_umount(mp); > > if (xi.rtdev) > > libxfs_device_close(xi.rtdev); > > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c > > index eb1ce546..c0a77cad 100644 > > --- a/repair/xfs_repair.c > > +++ b/repair/xfs_repair.c > > @@ -690,6 +690,36 @@ check_fs_vs_host_sectsize( > > } > > } > > +/* Flush the devices and complain if anything bad happened. */ > > +static bool > > +check_write_failed( > > + struct xfs_mount *mp) > > +{ > > + int d, l, r; > > + > > + libxfs_flush_devices(mp, &d, &l, &r); > > + > > + if (d == -ENOTRECOVERABLE) > > + do_warn(_("Lost writes to data device, please re-run.\n")); > > + else if (d) > > + do_warn(_("Error %d flushing data device, please re-run.\n"), > > + -d); > > + > > + if (l == -ENOTRECOVERABLE) > > + do_warn(_("Lost writes to log device, please re-run.\n")); > > + else if (l) > > + do_warn(_("Error %d flushing log device, please re-run.\n"), > > + -l); > > + > > + if (r == -ENOTRECOVERABLE) > > + do_warn(_("Lost writes to realtime device, please re-run.\n")); > > + else if (r) > > + do_warn(_("Error %d flushing realtime device, please re-run.\n"), > > + -r); > > + > > + return d || l || r; > > +} > > + > > int > > main(int argc, char **argv) > > { > > @@ -703,6 +733,7 @@ main(int argc, char **argv) > > struct xfs_sb psb; > > int rval; > > struct xfs_ino_geometry *igeo; > > + bool writes_failed; > > progname = basename(argv[0]); > > setlocale(LC_ALL, ""); > > @@ -1106,6 +1137,8 @@ _("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\ > > format_log_max_lsn(mp); > > libxfs_umount(mp); > > + writes_failed = check_write_failed(mp); > > + > > if (x.rtdev) > > libxfs_device_close(x.rtdev); > > if (x.logdev && x.logdev != x.ddev) > > @@ -1125,6 +1158,8 @@ _("Repair of readonly mount complete. Immediate reboot encouraged.\n")); > > free(msgbuf); > > + if (writes_failed) > > + return 1; > > if (fs_is_dirty && report_corrected) > > return (4); > > return (0); > >