From: Darrick J. Wong <djwong@xxxxxxxxxx> Allow the buffer cache to target in-memory files by connecting it to xfiles. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- fs/xfs/Kconfig | 4 ++ fs/xfs/scrub/xfile.h | 15 +++++++++ fs/xfs/xfs_aops.c | 5 ++- fs/xfs/xfs_bmap_util.c | 8 ++--- fs/xfs/xfs_buf.c | 80 +++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_buf.h | 71 +++++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_discard.c | 8 ++--- fs/xfs/xfs_file.c | 6 ++- fs/xfs/xfs_ioctl.c | 3 +- fs/xfs/xfs_iomap.c | 4 +- fs/xfs/xfs_log.c | 4 +- fs/xfs/xfs_log_cil.c | 3 +- fs/xfs/xfs_log_recover.c | 3 +- fs/xfs/xfs_super.c | 4 +- 14 files changed, 188 insertions(+), 30 deletions(-) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 54806c2b80d4..2373324be997 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -101,6 +101,9 @@ config XFS_LIVE_HOOKS bool select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL +config XFS_IN_MEMORY_FILE + bool + config XFS_ONLINE_SCRUB bool "XFS online metadata check support" default n @@ -108,6 +111,7 @@ config XFS_ONLINE_SCRUB depends on TMPFS && SHMEM select XFS_LIVE_HOOKS select XFS_DRAIN_INTENTS + select XFS_IN_MEMORY_FILE help If you say Y here you will be able to check metadata on a mounted XFS filesystem. This feature is intended to reduce diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index b7f046016b1b..99b6db838612 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -6,6 +6,8 @@ #ifndef __XFS_SCRUB_XFILE_H__ #define __XFS_SCRUB_XFILE_H__ +#ifdef CONFIG_XFS_IN_MEMORY_FILE + struct xfile_page { struct page *page; void *fsdata; @@ -76,5 +78,18 @@ int xfile_get_page(struct xfile *xf, loff_t offset, unsigned int len, int xfile_put_page(struct xfile *xf, struct xfile_page *xbuf); int xfile_dump(struct xfile *xf); +#else +static inline int +xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t offset) +{ + return -EIO; +} + +static inline int +xfile_obj_store(struct xfile *xf, const void *buf, size_t count, loff_t offset) +{ + return -EIO; +} +#endif /* CONFIG_XFS_IN_MEMORY_FILE */ #endif /* __XFS_SCRUB_XFILE_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 41734202796f..c3a9df0c0eab 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -562,7 +562,10 @@ xfs_iomap_swapfile_activate( struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + struct xfs_buftarg *btp = xfs_inode_buftarg(ip); + + sis->bdev = xfs_buftarg_bdev(btp); return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 867645b74d88..e094932869f6 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -62,10 +62,10 @@ xfs_zero_extent( xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); sector_t block = XFS_BB_TO_FSBT(mp, sector); - return blkdev_issue_zeroout(target->bt_bdev, - block << (mp->m_super->s_blocksize_bits - 9), - count_fsb << (mp->m_super->s_blocksize_bits - 9), - GFP_NOFS, 0); + return xfs_buftarg_zeroout(target, + block << (mp->m_super->s_blocksize_bits - 9), + count_fsb << (mp->m_super->s_blocksize_bits - 9), + GFP_NOFS, 0); } #ifdef CONFIG_XFS_RT diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7dfc1db566fa..2ec8d39def9c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -21,6 +21,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_ag.h" +#include "scrub/xfile.h" struct kmem_cache *xfs_buf_cache; @@ -1554,6 +1555,36 @@ xfs_buf_ioapply_map( } +static inline void +xfs_buf_ioapply_in_memory( + struct xfs_buf *bp) +{ + struct xfile *xfile = bp->b_target->bt_xfile; + loff_t pos = BBTOB(xfs_buf_daddr(bp)); + size_t size = BBTOB(bp->b_length); + int error; + + atomic_inc(&bp->b_io_remaining); + + if (bp->b_map_count > 1) { + /* We don't need or support multi-map buffers. */ + ASSERT(0); + error = -EIO; + } else if (bp->b_flags & XBF_WRITE) { + error = xfile_obj_store(xfile, bp->b_addr, size, pos); + } else { + error = xfile_obj_load(xfile, bp->b_addr, size, pos); + } + if (error) + cmpxchg(&bp->b_io_error, 0, error); + + if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp); +} + STATIC void _xfs_buf_ioapply( struct xfs_buf *bp) @@ -1611,6 +1642,11 @@ _xfs_buf_ioapply( /* we only use the buffer cache for meta-data */ op |= REQ_META; + if (bp->b_target->bt_flags & XFS_BUFTARG_IN_MEMORY) { + xfs_buf_ioapply_in_memory(bp); + return; + } + /* * Walk all the vectors issuing IO on them. Set up the initial offset * into the buffer and the desired IO size before we start - @@ -1978,9 +2014,11 @@ xfs_free_buftarg( if (btp->bt_flags & XFS_BUFTARG_SELF_CACHED) rhashtable_destroy(&btp->bt_bufhash); - blkdev_issue_flush(btp->bt_bdev); - invalidate_bdev(btp->bt_bdev); - fs_put_dax(btp->bt_daxdev, btp->bt_mount); + if (!(btp->bt_flags & XFS_BUFTARG_IN_MEMORY)) { + blkdev_issue_flush(btp->bt_bdev); + invalidate_bdev(btp->bt_bdev); + fs_put_dax(btp->bt_daxdev, btp->bt_mount); + } kmem_free(btp); } @@ -2024,12 +2062,13 @@ xfs_setsize_buftarg_early( static struct xfs_buftarg * __xfs_alloc_buftarg( struct xfs_mount *mp, - unsigned int flags) + unsigned int flags, + xfs_km_flags_t km_flags) { struct xfs_buftarg *btp; int error; - btp = kmem_zalloc(sizeof(*btp), KM_NOFS); + btp = kmem_zalloc(sizeof(*btp), KM_NOFS | km_flags); if (!btp) return NULL; @@ -2090,7 +2129,7 @@ xfs_alloc_buftarg( ops = &xfs_dax_holder_operations; #endif - btp = __xfs_alloc_buftarg(mp, 0); + btp = __xfs_alloc_buftarg(mp, 0, 0); if (!btp) return NULL; @@ -2109,6 +2148,35 @@ xfs_alloc_buftarg( return NULL; } +#ifdef CONFIG_XFS_IN_MEMORY_FILE +/* Allocate a buffer cache target for a memory-backed file. */ +int +xfs_alloc_memory_buftarg( + struct xfs_mount *mp, + struct xfile *xfile, + struct xfs_buftarg **btpp) +{ + struct xfs_buftarg *btp; + + btp = __xfs_alloc_buftarg(mp, + XFS_BUFTARG_SELF_CACHED | XFS_BUFTARG_IN_MEMORY, + KM_MAYFAIL); + if (!btp) + return -ENOMEM; + + btp->bt_xfile = xfile; + btp->bt_dev = (dev_t)-1U; + + btp->bt_meta_sectorsize = SECTOR_SIZE; + btp->bt_meta_sectormask = SECTOR_SIZE - 1; + btp->bt_logical_sectorsize = SECTOR_SIZE; + btp->bt_logical_sectormask = SECTOR_SIZE - 1; + + *btpp = btp; + return 0; +} +#endif /* CONFIG_XFS_IN_MEMORY_FILE */ + /* * Cancel a delayed write list. * diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d7bf7f657e99..dcae77dabdcc 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -21,6 +21,7 @@ extern struct kmem_cache *xfs_buf_cache; * Base types */ struct xfs_buf; +struct xfile; #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) @@ -99,7 +100,10 @@ typedef unsigned int xfs_buf_flags_t; */ typedef struct xfs_buftarg { dev_t bt_dev; - struct block_device *bt_bdev; + union { + struct block_device *bt_bdev; + struct xfile *bt_xfile; + }; struct dax_device *bt_daxdev; u64 bt_dax_part_off; struct xfs_mount *bt_mount; @@ -124,6 +128,20 @@ typedef struct xfs_buftarg { /* the xfs_buftarg indexes buffers via bt_buf_hash */ #define XFS_BUFTARG_SELF_CACHED (1U << 0) +/* in-memory buftarg via bt_xfile */ +#ifdef CONFIG_XFS_IN_MEMORY_FILE +# define XFS_BUFTARG_IN_MEMORY (1U << 1) +#else +# define XFS_BUFTARG_IN_MEMORY (0) +#endif + +static inline bool +xfs_buftarg_in_memory( + struct xfs_buftarg *btp) +{ + return btp->bt_flags & XFS_BUFTARG_IN_MEMORY; +} + #define XB_PAGES 2 struct xfs_buf_map { @@ -372,13 +390,60 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) */ struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp, struct block_device *bdev); +#ifdef CONFIG_XFS_IN_MEMORY_FILE +int xfs_alloc_memory_buftarg(struct xfs_mount *mp, struct xfile *xfile, + struct xfs_buftarg **btpp); +#endif extern void xfs_free_buftarg(struct xfs_buftarg *); extern void xfs_buftarg_wait(struct xfs_buftarg *); extern void xfs_buftarg_drain(struct xfs_buftarg *); extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int); -#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) -#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) +static inline struct block_device * +xfs_buftarg_bdev(struct xfs_buftarg *btp) +{ + if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY) + return NULL; + return btp->bt_bdev; +} + +static inline unsigned int +xfs_getsize_buftarg(struct xfs_buftarg *btp) +{ + if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY) + return SECTOR_SIZE; + return block_size(btp->bt_bdev); +} + +static inline bool +xfs_readonly_buftarg(struct xfs_buftarg *btp) +{ + if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY) + return false; + return bdev_read_only(btp->bt_bdev); +} + +static inline int +xfs_buftarg_flush(struct xfs_buftarg *btp) +{ + if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY) + return 0; + return blkdev_issue_flush(btp->bt_bdev); +} + +static inline int +xfs_buftarg_zeroout( + struct xfs_buftarg *btp, + sector_t sector, + sector_t nr_sects, + gfp_t gfp_mask, + unsigned flags) +{ + if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY) + return -EOPNOTSUPP; + return blkdev_issue_zeroout(btp->bt_bdev, sector, nr_sects, gfp_mask, + flags); +} int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 3fa6b0ab9ed6..44658cc7d3f2 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -29,7 +29,7 @@ xfs_trim_extents( xfs_daddr_t minlen, uint64_t *blocks_trimmed) { - struct block_device *bdev = mp->m_ddev_targp->bt_bdev; + struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); struct xfs_btree_cur *cur; struct xfs_buf *agbp; struct xfs_agf *agf; @@ -154,8 +154,8 @@ xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { - unsigned int granularity = - bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); + struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); + unsigned int granularity = bdev_discard_granularity(bdev); struct fstrim_range range; xfs_daddr_t start, end, minlen; xfs_agnumber_t start_agno, end_agno, agno; @@ -164,7 +164,7 @@ xfs_ioc_trim( if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 595a5bcf46b9..c4bdadd8fa71 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -164,9 +164,9 @@ xfs_file_fsync( * inode size in case of an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) - error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); + error = xfs_buftarg_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) - error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); + error = xfs_buftarg_flush(mp->m_ddev_targp); /* * Any inode that has dirty modifications in the log is pinned. The @@ -189,7 +189,7 @@ xfs_file_fsync( */ if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && mp->m_logdev_targp == mp->m_ddev_targp) { - err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); + err2 = xfs_buftarg_flush(mp->m_ddev_targp); if (err2 && !error) error = err2; } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 020111f0f2a2..4b2a02a08dfa 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1762,6 +1762,7 @@ xfs_ioc_setlabel( char __user *newlabel) { struct xfs_sb *sbp = &mp->m_sb; + struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp); char label[XFSLABEL_MAX + 1]; size_t len; int error; @@ -1808,7 +1809,7 @@ xfs_ioc_setlabel( error = xfs_update_secondary_sbs(mp); mutex_unlock(&mp->m_growlock); - invalidate_bdev(mp->m_ddev_targp->bt_bdev); + invalidate_bdev(bdev); out: mnt_drop_write_file(filp); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c2ba03281daf..99a7c271c353 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -129,7 +129,7 @@ xfs_bmbt_to_iomap( if (mapping_flags & IOMAP_DAX) iomap->dax_dev = target->bt_daxdev; else - iomap->bdev = target->bt_bdev; + iomap->bdev = xfs_buftarg_bdev(target); iomap->flags = iomap_flags; if (xfs_ipincount(ip) && @@ -154,7 +154,7 @@ xfs_hole_to_iomap( iomap->type = IOMAP_HOLE; iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); - iomap->bdev = target->bt_bdev; + iomap->bdev = xfs_buftarg_bdev(target); iomap->dax_dev = target->bt_daxdev; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fc61cc024023..b32a8e57f576 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1938,7 +1938,7 @@ xlog_write_iclog( * writeback throttle from throttling log writes behind background * metadata writeback and causing priority inversions. */ - bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec, + bio_init(&iclog->ic_bio, xfs_buftarg_bdev(log->l_targ), iclog->ic_bvec, howmany(count, PAGE_SIZE), REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE); iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; @@ -1959,7 +1959,7 @@ xlog_write_iclog( * avoid shutdown re-entering this path and erroring out again. */ if (log->l_targ != log->l_mp->m_ddev_targp && - blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { + xfs_buftarg_flush(log->l_mp->m_ddev_targp)) { xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index eccbfb99e894..12cd2874048f 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -742,7 +742,8 @@ xlog_discard_busy_extents( trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, busyp->length); - error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + error = __blkdev_issue_discard( + xfs_buftarg_bdev(mp->m_ddev_targp), XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_NOFS, &bio); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 322eb2ee6c55..6b1f37bc3e95 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -137,7 +137,8 @@ xlog_do_io( nbblks = round_up(nbblks, log->l_sectBBsize); ASSERT(nbblks > 0); - error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, + error = xfs_rw_bdev(xfs_buftarg_bdev(log->l_targ), + log->l_logBBstart + blk_no, BBTOB(nbblks), data, op); if (error && !xlog_is_shutdown(log)) { xfs_alert(log->l_mp, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 020ff2d93f23..8841947bdce7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -397,13 +397,13 @@ xfs_close_devices( struct xfs_mount *mp) { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - struct block_device *logdev = mp->m_logdev_targp->bt_bdev; + struct block_device *logdev = xfs_buftarg_bdev(mp->m_logdev_targp); xfs_free_buftarg(mp->m_logdev_targp); xfs_blkdev_put(logdev); } if (mp->m_rtdev_targp) { - struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; + struct block_device *rtdev = xfs_buftarg_bdev(mp->m_rtdev_targp); xfs_free_buftarg(mp->m_rtdev_targp); xfs_blkdev_put(rtdev);